1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

AVX-512: added cost for some AVX-512 instructions

llvm-svn: 217863
This commit is contained in:
Elena Demikhovsky 2014-09-16 07:57:37 +00:00
parent b42946011d
commit 17fcb19667
3 changed files with 112 additions and 0 deletions

View File

@ -225,6 +225,15 @@ unsigned X86TTI::getArithmeticInstrCost(
return LT.first * AVX2UniformConstCostTable[Idx].Cost;
}
static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
{ ISD::SHL, MVT::v16i32, 1 },
{ ISD::SRL, MVT::v16i32, 1 },
{ ISD::SRA, MVT::v16i32, 1 },
{ ISD::SHL, MVT::v8i64, 1 },
{ ISD::SRL, MVT::v8i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
};
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
@ -260,6 +269,11 @@ unsigned X86TTI::getArithmeticInstrCost(
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
if (ST->hasAVX512()) {
int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * AVX512CostTable[Idx].Cost;
}
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
@ -580,6 +594,38 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
AVX512ConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
{ ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
{ ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
// v16i1 -> v16i32 - load + broadcast
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
};
if (ST->hasAVX512()) {
int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
LTSrc.second);
if (Idx != -1)
return AVX512ConversionTbl[Idx].Cost;
}
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@ -612,6 +658,9 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
};
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
@ -738,6 +787,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 1 },
};
static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
{ ISD::SETCC, MVT::v8i64, 1 },
{ ISD::SETCC, MVT::v16i32, 1 },
{ ISD::SETCC, MVT::v8f64, 1 },
{ ISD::SETCC, MVT::v16f32, 1 },
};
if (ST->hasAVX512()) {
int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
if (Idx != -1)
return LT.first * AVX512CostTbl[Idx].Cost;
}
if (ST->hasAVX2()) {
int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)

View File

@ -1,3 +1,4 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
@ -83,6 +84,19 @@ define i32 @zext_sext(<8 x i1> %in) {
;CHECK-AVX: cost of 4 {{.*}} zext
%D = zext <4 x i32> undef to <4 x i64>
;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
%D1 = zext <16 x i32> undef to <16 x i64>
;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
%D2 = sext <16 x i32> undef to <16 x i64>
;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
%D3 = zext <16 x i16> undef to <16 x i32>
;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
%D4 = zext <16 x i8> undef to <16 x i32>
;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
%D5 = zext <16 x i1> undef to <16 x i32>
;CHECK-AVX2: cost of 2 {{.*}} trunc
;CHECK-AVX: cost of 4 {{.*}} trunc
%E = trunc <4 x i64> undef to <4 x i32>
@ -101,8 +115,12 @@ define i32 @zext_sext(<8 x i1> %in) {
;CHECK-AVX2: cost of 4 {{.*}} trunc
;CHECK-AVX: cost of 9 {{.*}} trunc
;CHECK_AVX512: cost of 1 {{.*}} G = trunc
%G = trunc <8 x i64> undef to <8 x i32>
;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
%G1 = trunc <16 x i64> undef to <16 x i32>
ret i32 undef
}
@ -211,3 +229,24 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
%D1 = uitofp <8 x i32> %d to <8 x float>
ret void
}
define void @fp_conv(<8 x float> %a, <16 x float>%b) {
;CHECK-LABEL: for function 'fp_conv'
; CHECK-AVX512: cost of 1 {{.*}} fpext
%A1 = fpext <8 x float> %a to <8 x double>
; CHECK-AVX512: cost of 3 {{.*}} fpext
%A2 = fpext <16 x float> %b to <16 x double>
; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext
; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
%A3 = fpext <8 x float> %a to <8 x double>
; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc
; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
%A4 = fptrunc <8 x double> undef to <8 x float>
; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
%A5 = fptrunc <16 x double> undef to <16 x float>
ret void
}

View File

@ -1,5 +1,6 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@ -22,6 +23,11 @@ define i32 @cmp(i32 %arg) {
;AVX2: cost of 1 {{.*}} fcmp
%E = fcmp olt <4 x double> undef, undef
; AVX512: cost of 1 {{.*}} %E1 = fcmp
%E1 = fcmp olt <16 x float> undef, undef
; AVX512: cost of 2 {{.*}} %E2 = fcmp
%E2 = fcmp olt <16 x double> undef, undef
; -- integers --
;AVX1: cost of 1 {{.*}} icmp
@ -49,6 +55,11 @@ define i32 @cmp(i32 %arg) {
;AVX2: cost of 1 {{.*}} icmp
%M = icmp eq <32 x i8> undef, undef
; AVX512: cost of 1 {{.*}} %M1 = icmp
%M1 = icmp eq <16 x i32> undef, undef
; AVX512: cost of 2 {{.*}} %M2 = icmp
%M2 = icmp eq <16 x i64> undef, undef
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}