mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[TargetTransformInfo] Handle intrinsic call in getInstructionLatency()
Usually an intrinsic is a simple target instruction, it should have a small latency. A real function call has much larger latency. So handle the intrinsic call in function getInstructionLatency(). Differential Revision: https://reviews.llvm.org/D38104 llvm-svn: 314003
This commit is contained in:
parent
76c70d6c2c
commit
af71947aaf
@ -785,16 +785,27 @@ public:
|
||||
if (getUserCost(I, Operands) == TTI::TCC_Free)
|
||||
return 0;
|
||||
|
||||
if (isa<CallInst>(I))
|
||||
return 40;
|
||||
|
||||
if (isa<LoadInst>(I))
|
||||
return 4;
|
||||
|
||||
Type *dstTy = I->getType();
|
||||
if (VectorType *VectorTy = dyn_cast<VectorType>(dstTy))
|
||||
dstTy = VectorTy->getElementType();
|
||||
if (dstTy->isFloatingPointTy())
|
||||
Type *DstTy = I->getType();
|
||||
|
||||
// Usually an intrinsic is a simple instruction.
|
||||
// A real function call is much slower.
|
||||
if (auto *CI = dyn_cast<CallInst>(I)) {
|
||||
const Function *F = CI->getCalledFunction();
|
||||
if (static_cast<T *>(this)->isLoweredToCall(F))
|
||||
return 40;
|
||||
// Some intrinsics return a value and a flag, we use the value type
|
||||
// to decide its latency.
|
||||
if (StructType* StructTy = dyn_cast<StructType>(DstTy))
|
||||
DstTy = StructTy->getElementType(0);
|
||||
// Fall through to simple instructions.
|
||||
}
|
||||
|
||||
if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
|
||||
DstTy = VectorTy->getElementType();
|
||||
if (DstTy->isFloatingPointTy())
|
||||
return 3;
|
||||
|
||||
return 1;
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
|
||||
|
||||
define i64 @foo(i64 %arg) {
|
||||
|
||||
; LATENCY: cost of 0 {{.*}} alloca i32
|
||||
@ -39,6 +41,10 @@ define i64 @foo(i64 %arg) {
|
||||
; CODESIZE: cost of 0 {{.*}} trunc
|
||||
%TC = trunc i64 undef to i32
|
||||
|
||||
; LATENCY: cost of 1 {{.*}} call
|
||||
; CODESIZE: cost of 1 {{.*}} call
|
||||
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
|
||||
|
||||
; LATENCY: cost of 1 {{.*}} ret
|
||||
; CODESIZE: cost of 1 {{.*}} ret
|
||||
ret i64 undef
|
||||
|
Loading…
Reference in New Issue
Block a user