1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[ARM] Add larger than legal ICmp costs

A v8i32 compare will produce a v8i1 predicate, but during codegen the
v8i32 will be split into two v4i32, potentially requiring two v4i1
predicates to be merged into a single v8i1. Because this merging of two
v4i1's into a v8i1 is very expensive, we need to make the cost of the
compare equally high.

This patch adds the cost of that to ARMTTIImpl::getCmpSelInstrCost.
Because we don't know whether the user of the predicate can be split,
and the cost model is mostly pre-instruction, we may be pessimistic but
that should only be for larger and legal types. This also adds min/max
detection to the costmodel where it can be detected, to keep those in
line with the cost of simple min/max instructions. Otherwise for the
most part, costs that were already expensive have become more expensive.

Differential Revision: https://reviews.llvm.org/D96692
This commit is contained in:
David Green 2021-02-18 11:42:17 +00:00
parent a1577dbc3a
commit 20c6684726
10 changed files with 236 additions and 142 deletions

View File

@ -859,6 +859,51 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
return Cost;
}
// If this is a vector min/max/abs, use the cost of that intrinsic directly
// instead. Hopefully when min/max intrinsics are more prevalent this code
// will not be needed.
const Instruction *Sel = I;
if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
Sel->hasOneUse())
Sel = cast<Instruction>(Sel->user_back());
if (Sel && ValTy->isVectorTy()) {
const Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(Sel, LHS, RHS).Flavor;
unsigned IID = 0;
switch (SPF) {
case SPF_ABS:
IID = Intrinsic::abs;
break;
case SPF_SMIN:
IID = Intrinsic::smin;
break;
case SPF_SMAX:
IID = Intrinsic::smax;
break;
case SPF_UMIN:
IID = Intrinsic::umin;
break;
case SPF_UMAX:
IID = Intrinsic::umax;
break;
case SPF_FMINNUM:
IID = Intrinsic::minnum;
break;
case SPF_FMAXNUM:
IID = Intrinsic::maxnum;
break;
default:
break;
}
if (IID) {
// The ICmp is free, the select gets the cost of the min/max/etc
if (Sel != I)
return 0;
IntrinsicCostAttributes CostAttrs(IID, ValTy, {ValTy, ValTy});
return getIntrinsicInstrCost(CostAttrs, CostKind);
}
}
// On NEON a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
// Lowering of some vector selects is currently far from perfect.
@ -881,6 +926,41 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
return LT.first;
}
if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
(Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
FixedVectorType *VecValTy = cast<FixedVectorType>(ValTy);
FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
if (!VecCondTy)
VecCondTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(VecValTy));
// If we don't have mve.fp any fp operations will need to be scalarized.
if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
// One scalaization insert, one scalarization extract and the cost of the
// fcmps.
return BaseT::getScalarizationOverhead(VecValTy, false, true) +
BaseT::getScalarizationOverhead(VecCondTy, true, false) +
VecValTy->getNumElements() *
getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
CondTy->getScalarType(), VecPred, CostKind,
I);
}
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
int BaseCost = ST->getMVEVectorCostFactor(CostKind);
// There are two types - the input that specifies the type of the compare
// and the output vXi1 type. Because we don't know how the output will be
// split, we may need an expensive shuffle to get two in sync. This has the
// effect of making larger than legal compares (v8i32 for example)
// expensive.
if (LT.second.getVectorNumElements() > 2) {
if (LT.first > 1)
return LT.first * BaseCost +
BaseT::getScalarizationOverhead(VecCondTy, true, false);
return BaseCost;
}
}
// Default to cheap (throughput/size of 1 instruction) but adjust throughput
// for "multiple beats" potentially needed by MVE instructions.
int BaseCost = 1;

View File

@ -72,16 +72,16 @@ define i32 @sadd(i32 %arg) {
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 866 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1890 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1106 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4260 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1874 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7332 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4244 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16680 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7316 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28968 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; V8M-SIZE-LABEL: 'sadd'
@ -129,16 +129,16 @@ define i32 @sadd(i32 %arg) {
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 787 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 779 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3092 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3084 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12310 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
@ -230,16 +230,16 @@ define i32 @uadd(i32 %arg) {
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; V8M-SIZE-LABEL: 'uadd'
@ -287,16 +287,16 @@ define i32 @uadd(i32 %arg) {
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
@ -388,16 +388,16 @@ define i32 @ssub(i32 %arg) {
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 866 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1890 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1106 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4260 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1874 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7332 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4244 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16680 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7316 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28968 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; V8M-SIZE-LABEL: 'ssub'
@ -445,16 +445,16 @@ define i32 @ssub(i32 %arg) {
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 787 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 779 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3092 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3084 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12310 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
@ -546,16 +546,16 @@ define i32 @usub(i32 %arg) {
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; V8M-SIZE-LABEL: 'usub'
@ -603,16 +603,16 @@ define i32 @usub(i32 %arg) {
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
@ -704,16 +704,16 @@ define i32 @smul(i32 %arg) {
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1208 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1464 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1512 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 616 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1252 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4712 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; V8M-SIZE-LABEL: 'smul'
@ -761,16 +761,16 @@ define i32 @smul(i32 %arg) {
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 325 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 557 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1174 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 275 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1164 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4374 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
@ -862,16 +862,16 @@ define i32 @umul(i32 %arg) {
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1456 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1504 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1248 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4704 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; V8M-SIZE-LABEL: 'umul'
@ -919,16 +919,16 @@ define i32 @umul(i32 %arg) {
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 553 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 812 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1170 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1162 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4370 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)

View File

@ -225,7 +225,7 @@ define void @ctlz(i32 %a, <16 x i32> %va) {
define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
; THRU-LABEL: 'fshl'
; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
; THRU-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; THRU-NEXT: Cost Model: Found an estimated cost of 832 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'fshl'
@ -235,12 +235,12 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
;
; SIZE-LABEL: 'fshl'
; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
; SIZE-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; SIZE-NEXT: Cost Model: Found an estimated cost of 805 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'fshl'
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 564 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 826 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
@ -292,7 +292,7 @@ define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
define void @reduce_fmax(<16 x float> %va) {
; THRU-LABEL: 'reduce_fmax'
; THRU-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; THRU-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'reduce_fmax'
@ -300,11 +300,11 @@ define void @reduce_fmax(<16 x float> %va) {
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fmax'
; SIZE-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; SIZE-NEXT: Cost Model: Found an estimated cost of 685 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'reduce_fmax'
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 628 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 694 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
define void @icmp() {
; CHECK-LABEL: 'icmp'
@ -8,15 +8,15 @@ define void @icmp() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = icmp slt <4 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = icmp slt <8 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = icmp slt <16 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i16 = icmp slt <2 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = icmp slt <4 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = icmp slt <8 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i32 = icmp slt <2 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = icmp slt <4 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef
@ -46,19 +46,33 @@ define void @icmp() {
}
define void @fcmp() {
; CHECK-LABEL: 'fcmp'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; CHECK-MVE-LABEL: 'fcmp'
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'fcmp'
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v2f16 = fcmp olt <2 x half> undef, undef
%v4f16 = fcmp olt <4 x half> undef, undef

View File

@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)

View File

@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)

View File

@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)

View File

@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)

View File

@ -21,8 +21,8 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
; CHECK: vmin.s16
; CHECK: vmin.s16
; COST: func_blend10
; COST: cost of 2 {{.*}} icmp
; COST: cost of 2 {{.*}} select
; COST: cost of 0 {{.*}} icmp
; COST: cost of 4 {{.*}} select
%r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
store %T0_10 %r, %T0_10* %storeaddr
ret void
@ -38,8 +38,8 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
; CHECK: vmin.s32
; CHECK: vmin.s32
; COST: func_blend14
; COST: cost of 2 {{.*}} icmp
; COST: cost of 2 {{.*}} select
; COST: cost of 0 {{.*}} icmp
; COST: cost of 4 {{.*}} select
%r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
store %T0_14 %r, %T0_14* %storeaddr
ret void
@ -55,8 +55,8 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
%v1 = load %T0_15, %T0_15* %loadaddr2
%c = icmp slt %T0_15 %v0, %v1
; COST: func_blend15
; COST: cost of 4 {{.*}} icmp
; COST: cost of 4 {{.*}} select
; COST: cost of 0 {{.*}} icmp
; COST: cost of 8 {{.*}} select
%r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
store %T0_15 %r, %T0_15* %storeaddr
ret void
@ -130,8 +130,8 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
%v1 = load %T0_18, %T0_18* %loadaddr2
%c = icmp slt %T0_18 %v0, %v1
; COST: func_blend18
; COST: cost of 2 {{.*}} icmp
; COST: cost of 19 {{.*}} select
; COST: cost of 0 {{.*}} icmp
; COST: cost of 21 {{.*}} select
%r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
store %T0_18 %r, %T0_18* %storeaddr
ret void
@ -260,8 +260,8 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
%v1 = load %T0_19, %T0_19* %loadaddr2
%c = icmp slt %T0_19 %v0, %v1
; COST: func_blend19
; COST: cost of 4 {{.*}} icmp
; COST: cost of 50 {{.*}} select
; COST: cost of 0 {{.*}} icmp
; COST: cost of 54 {{.*}} select
%r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
store %T0_19 %r, %T0_19* %storeaddr
ret void
@ -516,8 +516,8 @@ define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
%v1 = load %T0_20, %T0_20* %loadaddr2
%c = icmp slt %T0_20 %v0, %v1
; COST: func_blend20
; COST: cost of 8 {{.*}} icmp
; COST: cost of 100 {{.*}} select
; COST: cost of 0 {{.*}} icmp
; COST: cost of 108 {{.*}} select
%r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
store %T0_20 %r, %T0_20* %storeaddr
ret void

View File

@ -50,7 +50,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i16, i16* %arrayidx, align 2
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv = sext i16 %1 to i32
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %cmp2, label %if.then, label %for.inc
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv6 = add i16 %1, %0
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016
@ -59,8 +59,8 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %inc = add nuw nsw i32 %i.016, 1
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %exitcond.not = icmp eq i32 %inc, %n
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
; CHECK: LV: Vector loop of width 8 costs: 1.
; CHECK: LV: Selecting VF: 8.
; CHECK: LV: Vector loop of width 8 costs: 9.
; CHECK: LV: Selecting VF: 4.
define void @expensive_icmp(i16* noalias nocapture %d, i16* nocapture readonly %s, i32 %n, i16 zeroext %m) #0 {
entry:
%cmp15 = icmp sgt i32 %n, 0
@ -127,8 +127,8 @@ for.inc: ; preds = %for.body, %if.then
; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 24 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1
@ -148,7 +148,7 @@ for.inc: ; preds = %for.body, %if.then
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
@ -156,7 +156,7 @@ for.inc: ; preds = %for.body, %if.then
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %dec = add i32 %blkCnt.012, -1
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %cmp.not = icmp eq i32 %dec, 0
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body
; CHECK: LV: Vector loop of width 4 costs: 4.
; CHECK: LV: Vector loop of width 4 costs: 3.
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
@ -169,7 +169,7 @@ for.inc: ; preds = %for.body, %if.then
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
@ -190,7 +190,7 @@ for.inc: ; preds = %for.body, %if.then
; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1