1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later

See discussion in D12154 ( http://reviews.llvm.org/D12154 ), AMD Software
Optimization Guides for 10H/12H/15H/16H, and Agner Fog's experimental data.

llvm-svn: 245733
This commit is contained in:
Sanjay Patel 2015-08-21 20:39:17 +00:00
parent 256ba29add
commit 256ad9fa9f
2 changed files with 14 additions and 18 deletions

View File

@ -433,21 +433,19 @@ def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"amdfam10", [FeatureSlowUAMem, FeatureSSE4A,
def : Proc<"amdfam10", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"barcelona", [FeatureSlowUAMem, FeatureSSE4A,
def : Proc<"barcelona", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>;
// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here.
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
FeatureSlowSHLD, FeatureSlowUAMem]>;
FeatureSlowSHLD]>;
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model,
@ -461,15 +459,13 @@ def : ProcessorModel<"btver2", BtVer2Model,
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowSHLD,
FeatureSlowUAMem]>;
FeaturePOPCNT, FeatureSlowSHLD]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD,
FeatureSlowUAMem]>;
FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
// Steamroller
def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
@ -477,7 +473,7 @@ def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD,
FeatureFSGSBase, FeatureSlowUAMem]>;
FeatureFSGSBase]>;
// Excavator
def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
@ -485,7 +481,7 @@ def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI, FeatureBMI2,
FeatureTBM, FeatureFMA, FeatureSSE4A,
FeatureFSGSBase, FeatureSlowUAMem]>;
FeatureFSGSBase]>;
def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>;

View File

@ -39,14 +39,14 @@
; AMD chips with fast unaligned memory accesses
; FIXME: These are wrong except for btver2.
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
; Other chips with slow unaligned memory accesses