mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 13:11:39 +01:00
Code Model: Improve the accuracy of the zext/sext/trunc vector cost estimation.
llvm-svn: 167412
This commit is contained in:
parent
0befa1c4e0
commit
8ddfd47801
@ -101,7 +101,7 @@ int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
|
||||
case AtomicRMW: return 0;
|
||||
case Trunc: return ISD::TRUNCATE;
|
||||
case ZExt: return ISD::ZERO_EXTEND;
|
||||
case SExt: return ISD::SEXTLOAD;
|
||||
case SExt: return ISD::SIGN_EXTEND;
|
||||
case FPToUI: return ISD::FP_TO_UINT;
|
||||
case FPToSI: return ISD::FP_TO_SINT;
|
||||
case UIToFP: return ISD::UINT_TO_FP;
|
||||
@ -235,9 +235,17 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
|
||||
|
||||
// Bitcast between types that are legalized to the same type are free.
|
||||
if (Opcode == Instruction::BitCast)
|
||||
if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
|
||||
return 0;
|
||||
|
||||
// Assume that Zext is done using AND.
|
||||
if (Opcode == Instruction::ZExt)
|
||||
return 1;
|
||||
|
||||
// Assume that sext is done using SHL and SRA.
|
||||
if (Opcode == Instruction::SExt)
|
||||
return 2;
|
||||
|
||||
// Just check the op cost. If the operation is legal then assume it costs
|
||||
// 1 and multiply by the type-legalization overhead.
|
||||
if (!TLI->isOperationExpand(ISD, DstLT.second))
|
||||
@ -310,7 +318,6 @@ unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns the expected cost of Vector Insert and Extract.
|
||||
unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
|
||||
Type *Val,
|
||||
unsigned Index) const {
|
||||
|
34
test/Analysis/CostModel/X86/cast.ll
Normal file
34
test/Analysis/CostModel/X86/cast.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
define i32 @add(i32 %arg) {
|
||||
|
||||
; -- Same size registeres --
|
||||
;CHECK: cost of 1 {{.*}} zext
|
||||
%A = zext <4 x i1> undef to <4 x i32>
|
||||
;CHECK: cost of 2 {{.*}} sext
|
||||
%B = sext <4 x i1> undef to <4 x i32>
|
||||
;CHECK: cost of 0 {{.*}} trunc
|
||||
%C = trunc <4 x i32> undef to <4 x i1>
|
||||
|
||||
; -- Different size registers --
|
||||
;CHECK-NOT: cost of 1 {{.*}} zext
|
||||
%D = zext <8 x i1> undef to <8 x i32>
|
||||
;CHECK-NOT: cost of 2 {{.*}} sext
|
||||
%E = sext <8 x i1> undef to <8 x i32>
|
||||
;CHECK-NOT: cost of 2 {{.*}} trunc
|
||||
%F = trunc <8 x i32> undef to <8 x i1>
|
||||
|
||||
; -- scalars --
|
||||
|
||||
;CHECK: cost of 1 {{.*}} zext
|
||||
%G = zext i1 undef to i32
|
||||
;CHECK: cost of 0 {{.*}} trunc
|
||||
%H = trunc i32 undef to i1
|
||||
|
||||
;CHECK: cost of 1 {{.*}} ret
|
||||
ret i32 undef
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
;CHECK: @conversion_cost1
|
||||
;CHECK: store <8 x i8>
|
||||
;CHECK: store <2 x i8>
|
||||
;CHECK: ret
|
||||
define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
|
||||
%1 = icmp sgt i32 %n, 3
|
||||
@ -25,7 +25,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun
|
||||
}
|
||||
|
||||
;CHECK: @conversion_cost2
|
||||
;CHECK-NOT: <8 x float>
|
||||
;CHECK: <2 x float>
|
||||
;CHECK: ret
|
||||
define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
|
||||
%1 = icmp sgt i32 %n, 9
|
||||
|
Loading…
x
Reference in New Issue
Block a user