mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM][CostModel] Improve getCastInstrCost
- Specifically check for sext/zext users which have 'long' form NEON instructions. - Add more entries to the table for sext/zexts so that we can report more accurately the number of vmovls required for NEON. - Pass the instruction to the pass implementation. Differential Revision: https://reviews.llvm.org/D79561
This commit is contained in:
parent
e6a74d424e
commit
35eefb1191
@ -191,7 +191,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
EVT DstTy = TLI->getValueType(DL, Dst);
|
||||
|
||||
if (!SrcTy.isSimple() || !DstTy.isSimple())
|
||||
return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
|
||||
return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
|
||||
|
||||
// The extend of a load is free
|
||||
if (I && isa<LoadInst>(I->getOperand(0))) {
|
||||
@ -229,18 +229,53 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
}
|
||||
}
|
||||
|
||||
// NEON vector operations that can extend their inputs.
|
||||
if ((ISD == ISD::SIGN_EXTEND || ISD == ISD::ZERO_EXTEND) &&
|
||||
I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
|
||||
static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = {
|
||||
// vaddl
|
||||
{ ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
|
||||
// vsubl
|
||||
{ ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
|
||||
// vmull
|
||||
{ ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
|
||||
// vshll
|
||||
{ ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
|
||||
};
|
||||
|
||||
auto *User = cast<Instruction>(*I->user_begin());
|
||||
int UserISD = TLI->InstructionOpcodeToISD(User->getOpcode());
|
||||
if (auto *Entry = ConvertCostTableLookup(NEONDoubleWidthTbl, UserISD,
|
||||
DstTy.getSimpleVT(),
|
||||
SrcTy.getSimpleVT())) {
|
||||
return Entry->Cost;
|
||||
}
|
||||
}
|
||||
|
||||
// Some arithmetic, load and store operations have specific instructions
|
||||
// to cast up/down their types automatically at no extra cost.
|
||||
// TODO: Get these tables to know at least what the related operations are.
|
||||
static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
|
||||
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
|
||||
|
||||
// The number of vmovl instructions for the extension.
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
|
||||
@ -422,7 +457,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
|
||||
? ST->getMVEVectorCostFactor()
|
||||
: 1;
|
||||
return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind);
|
||||
return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
|
||||
}
|
||||
|
||||
int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||
|
114
test/Analysis/CostModel/ARM/add-cast-vect.ll
Normal file
114
test/Analysis/CostModel/ARM/add-cast-vect.ll
Normal file
@ -0,0 +1,114 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
|
||||
; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
|
||||
; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
|
||||
|
||||
; ModuleID = 'arm.ll'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
|
||||
target triple = "armv7--linux-gnueabihf"
|
||||
|
||||
%T216 = type <2 x i16>
|
||||
%T232 = type <2 x i32>
|
||||
%T264 = type <2 x i64>
|
||||
|
||||
%T416 = type <4 x i16>
|
||||
%T432 = type <4 x i32>
|
||||
%T464 = type <4 x i64>
|
||||
|
||||
define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'direct':
|
||||
%v0 = load %T432, %T432* %loadaddr
|
||||
; ASM: vld1.64
|
||||
%v1 = load %T432, %T432* %loadaddr2
|
||||
; ASM: vld1.64
|
||||
%r3 = add %T432 %v0, %v1
|
||||
; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
|
||||
; ASM: vadd.i32
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'ups1632':
|
||||
%v0 = load %T416, %T416* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T416, %T416* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r1 = sext %T416 %v0 to %T432
|
||||
%r2 = sext %T416 %v1 to %T432
|
||||
; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
|
||||
%r3 = add %T432 %r1, %r2
|
||||
; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
|
||||
; ASM: vaddl.s16
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'upu1632':
|
||||
%v0 = load %T416, %T416* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T416, %T416* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r1 = zext %T416 %v0 to %T432
|
||||
%r2 = zext %T416 %v1 to %T432
|
||||
; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
|
||||
%r3 = add %T432 %r1, %r2
|
||||
; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
|
||||
; ASM: vaddl.u16
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
|
||||
; COST: function 'ups3264':
|
||||
%v0 = load %T232, %T232* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T232, %T232* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r3 = add %T232 %v0, %v1
|
||||
; ASM: vadd.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} add <2 x i32>
|
||||
%st = sext %T232 %r3 to %T264
|
||||
; ASM: vmovl.s32
|
||||
; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
|
||||
store %T264 %st, %T264* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
|
||||
; COST: function 'upu3264':
|
||||
%v0 = load %T232, %T232* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T232, %T232* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r3 = add %T232 %v0, %v1
|
||||
; ASM: vadd.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} add <2 x i32>
|
||||
%st = zext %T232 %r3 to %T264
|
||||
; ASM: vmovl.u32
|
||||
; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
|
||||
store %T264 %st, %T264* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
|
||||
; COST: function 'dn3216':
|
||||
%v0 = load %T432, %T432* %loadaddr
|
||||
; ASM: vld1.64
|
||||
%v1 = load %T432, %T432* %loadaddr2
|
||||
; ASM: vld1.64
|
||||
%r3 = add %T432 %v0, %v1
|
||||
; ASM: vadd.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
|
||||
%st = trunc %T432 %r3 to %T416
|
||||
; ASM: vmovn.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
|
||||
store %T416 %st, %T416* %storeaddr
|
||||
; ASM: vstr
|
||||
ret void
|
||||
}
|
@ -77,14 +77,14 @@ define i32 @casts() {
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64>
|
||||
@ -93,10 +93,10 @@ define i32 @casts() {
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8>
|
||||
@ -1145,14 +1145,14 @@ define i32 @casts() {
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q71 = sext <8 x i8> undef to <8 x i16>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s70 = sext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q73 = zext <8 x i8> undef to <8 x i16>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s72 = zext <4 x i8> undef to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64>
|
||||
@ -1161,10 +1161,10 @@ define i32 @casts() {
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8>
|
||||
@ -1668,14 +1668,14 @@ define i32 @load_extends() {
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
@ -1746,7 +1746,7 @@ define i32 @load_extends() {
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
@ -1782,7 +1782,7 @@ define i32 @load_extends() {
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
@ -1812,14 +1812,14 @@ define i32 @load_extends() {
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext <8 x i8> %loadv8i8 to <8 x i16>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64>
|
||||
; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
|
114
test/Analysis/CostModel/ARM/shl-cast-vect.ll
Normal file
114
test/Analysis/CostModel/ARM/shl-cast-vect.ll
Normal file
@ -0,0 +1,114 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
|
||||
; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
|
||||
; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
|
||||
|
||||
; ModuleID = 'arm.ll'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
|
||||
target triple = "armv7--linux-gnueabihf"
|
||||
|
||||
%T216 = type <2 x i16>
|
||||
%T232 = type <2 x i32>
|
||||
%T264 = type <2 x i64>
|
||||
|
||||
%T416 = type <4 x i16>
|
||||
%T432 = type <4 x i32>
|
||||
%T464 = type <4 x i64>
|
||||
|
||||
define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'direct':
|
||||
%v0 = load %T432, %T432* %loadaddr
|
||||
; ASM: vld1.64
|
||||
%v1 = load %T432, %T432* %loadaddr2
|
||||
; ASM: vld1.64
|
||||
%r3 = shl %T432 %v0, %v1
|
||||
; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
|
||||
; ASM: vshl.i32
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'ups1632':
|
||||
%v0 = load %T416, %T416* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T416, %T416* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r1 = sext %T416 %v0 to %T432
|
||||
%r2 = sext %T416 %v1 to %T432
|
||||
; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
|
||||
%r3 = shl %T432 %r1, %r2
|
||||
; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
|
||||
; ASM: vshll.s16
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'upu1632':
|
||||
%v0 = load %T416, %T416* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T416, %T416* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r1 = zext %T416 %v0 to %T432
|
||||
%r2 = zext %T416 %v1 to %T432
|
||||
; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
|
||||
%r3 = shl %T432 %r1, %r2
|
||||
; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
|
||||
; ASM: vshll.u16
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
|
||||
; COST: function 'ups3264':
|
||||
%v0 = load %T232, %T232* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T232, %T232* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r3 = shl %T232 %v0, %v1
|
||||
; ASM: vshl.i32
|
||||
; COST: cost of 2 for instruction: {{.*}} shl <2 x i32>
|
||||
%st = sext %T232 %r3 to %T264
|
||||
; ASM: vmovl.s32
|
||||
; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
|
||||
store %T264 %st, %T264* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
|
||||
; COST: function 'upu3264':
|
||||
%v0 = load %T232, %T232* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T232, %T232* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r3 = shl %T232 %v0, %v1
|
||||
; ASM: vshl.i32
|
||||
; COST: cost of 2 for instruction: {{.*}} shl <2 x i32>
|
||||
%st = zext %T232 %r3 to %T264
|
||||
; ASM: vmovl.u32
|
||||
; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
|
||||
store %T264 %st, %T264* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
|
||||
; COST: function 'dn3216':
|
||||
%v0 = load %T432, %T432* %loadaddr
|
||||
; ASM: vld1.64
|
||||
%v1 = load %T432, %T432* %loadaddr2
|
||||
; ASM: vld1.64
|
||||
%r3 = shl %T432 %v0, %v1
|
||||
; ASM: vshl.i32
|
||||
; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
|
||||
%st = trunc %T432 %r3 to %T416
|
||||
; ASM: vmovn.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
|
||||
store %T416 %st, %T416* %storeaddr
|
||||
; ASM: vstr
|
||||
ret void
|
||||
}
|
114
test/Analysis/CostModel/ARM/sub-cast-vect.ll
Normal file
114
test/Analysis/CostModel/ARM/sub-cast-vect.ll
Normal file
@ -0,0 +1,114 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
|
||||
; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
|
||||
; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
|
||||
|
||||
; ModuleID = 'arm.ll'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
|
||||
target triple = "armv7--linux-gnueabihf"
|
||||
|
||||
%T216 = type <2 x i16>
|
||||
%T232 = type <2 x i32>
|
||||
%T264 = type <2 x i64>
|
||||
|
||||
%T416 = type <4 x i16>
|
||||
%T432 = type <4 x i32>
|
||||
%T464 = type <4 x i64>
|
||||
|
||||
define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'direct':
|
||||
%v0 = load %T432, %T432* %loadaddr
|
||||
; ASM: vld1.64
|
||||
%v1 = load %T432, %T432* %loadaddr2
|
||||
; ASM: vld1.64
|
||||
%r3 = sub %T432 %v0, %v1
|
||||
; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
|
||||
; ASM: vsub.i32
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'ups1632':
|
||||
%v0 = load %T416, %T416* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T416, %T416* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r1 = sext %T416 %v0 to %T432
|
||||
%r2 = sext %T416 %v1 to %T432
|
||||
; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
|
||||
%r3 = sub %T432 %r1, %r2
|
||||
; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
|
||||
; ASM: vsubl.s16
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
|
||||
; COST: function 'upu1632':
|
||||
%v0 = load %T416, %T416* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T416, %T416* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r1 = zext %T416 %v0 to %T432
|
||||
%r2 = zext %T416 %v1 to %T432
|
||||
; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
|
||||
%r3 = sub %T432 %r1, %r2
|
||||
; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
|
||||
; ASM: vsubl.u16
|
||||
store %T432 %r3, %T432* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
|
||||
; COST: function 'ups3264':
|
||||
%v0 = load %T232, %T232* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T232, %T232* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r3 = sub %T232 %v0, %v1
|
||||
; ASM: vsub.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} sub <2 x i32>
|
||||
%st = sext %T232 %r3 to %T264
|
||||
; ASM: vmovl.s32
|
||||
; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
|
||||
store %T264 %st, %T264* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
|
||||
; COST: function 'upu3264':
|
||||
%v0 = load %T232, %T232* %loadaddr
|
||||
; ASM: vldr
|
||||
%v1 = load %T232, %T232* %loadaddr2
|
||||
; ASM: vldr
|
||||
%r3 = sub %T232 %v0, %v1
|
||||
; ASM: vsub.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} sub <2 x i32>
|
||||
%st = zext %T232 %r3 to %T264
|
||||
; ASM: vmovl.u32
|
||||
; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
|
||||
store %T264 %st, %T264* %storeaddr
|
||||
; ASM: vst1.64
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
|
||||
; COST: function 'dn3216':
|
||||
%v0 = load %T432, %T432* %loadaddr
|
||||
; ASM: vld1.64
|
||||
%v1 = load %T432, %T432* %loadaddr2
|
||||
; ASM: vld1.64
|
||||
%r3 = sub %T432 %v0, %v1
|
||||
; ASM: vsub.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
|
||||
%st = trunc %T432 %r3 to %T416
|
||||
; ASM: vmovn.i32
|
||||
; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
|
||||
store %T416 %st, %T416* %storeaddr
|
||||
; ASM: vstr
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user