mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[X86] AMD znver2 enablement
This patch enables the following 1) AMD family 17h "znver2" tune flag (-march, -mcpu). 2) ISAs that are enabled for "znver2" architecture. 3) For the time being, it uses the znver1 scheduler model. 4) Tests are updated. 5) Scheduler descriptions are yet to be put in place. Reviewers: craig.topper Differential Revision: https://reviews.llvm.org/D58343 llvm-svn: 354897
This commit is contained in:
parent
53e0093564
commit
9a9159179f
@ -98,6 +98,7 @@ X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-a
|
||||
X86_CPU_SUBTYPE_COMPAT("cannonlake", INTEL_COREI7_CANNONLAKE, "cannonlake")
|
||||
X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
|
||||
X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
|
||||
X86_CPU_SUBTYPE_COMPAT("znver2", AMDFAM17H_ZNVER2, "znver2")
|
||||
// Entries below this are not in libgcc/compiler-rt.
|
||||
X86_CPU_SUBTYPE ("core2", INTEL_CORE2_65)
|
||||
X86_CPU_SUBTYPE ("penryn", INTEL_CORE2_45)
|
||||
|
@ -916,7 +916,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
|
||||
break; // "btver2"
|
||||
case 23:
|
||||
*Type = X86::AMDFAM17H;
|
||||
*Subtype = X86::AMDFAM17H_ZNVER1;
|
||||
if (Model >= 0x30 && Model <= 0x3f) {
|
||||
*Subtype = X86::AMDFAM17H_ZNVER2;
|
||||
break; // "znver2"; 30h-3fh: Zen2
|
||||
}
|
||||
if (Model <= 0x0f) {
|
||||
*Subtype = X86::AMDFAM17H_ZNVER1;
|
||||
break; // "znver1"; 00h-0Fh: Zen1
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break; // "generic"
|
||||
|
@ -1143,8 +1143,8 @@ def : Proc<"bdver4", [
|
||||
FeatureMacroFusion
|
||||
]>;
|
||||
|
||||
// Znver1
|
||||
def: ProcessorModel<"znver1", Znver1Model, [
|
||||
// AMD Zen Processors common ISAs
|
||||
def ZNFeatures : ProcessorFeatures<[], [
|
||||
FeatureADX,
|
||||
FeatureAES,
|
||||
FeatureAVX2,
|
||||
@ -1183,6 +1183,19 @@ def: ProcessorModel<"znver1", Znver1Model, [
|
||||
FeatureXSAVEOPT,
|
||||
FeatureXSAVES]>;
|
||||
|
||||
class Znver1Proc<string Name> : ProcModel<Name, Znver1Model,
|
||||
ZNFeatures.Value, [
|
||||
]>;
|
||||
def : Znver1Proc<"znver1">;
|
||||
|
||||
class Znver2Proc<string Name> : ProcModel<Name, Znver1Model,
|
||||
ZNFeatures.Value, [
|
||||
FeatureCLWB,
|
||||
FeatureRDPID,
|
||||
FeatureWBNOINVD
|
||||
]>;
|
||||
def : Znver2Proc<"znver2">;
|
||||
|
||||
def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
|
||||
|
||||
def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
|
||||
|
@ -26,6 +26,7 @@
|
||||
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
|
||||
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
|
||||
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
|
||||
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
|
||||
|
||||
define void @foo() {
|
||||
ret void
|
||||
|
@ -5,6 +5,8 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
|
||||
|
||||
; Test one 32-bit input, output is 32-bit, no transformations expected.
|
||||
define i32 @test_zext_cmp0(i32 %a) {
|
||||
|
@ -47,6 +47,7 @@
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
|
||||
; Other chips with slow unaligned memory accesses
|
||||
|
||||
|
@ -13,8 +13,9 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
|
||||
|
||||
; Verify that for the X86_64 processors that are known to have poor latency
|
||||
; Verify that for the X86_64 processors that are known to have poor latency
|
||||
; double precision shift instructions we do not generate 'shld' or 'shrd'
|
||||
; instructions.
|
||||
|
||||
@ -25,7 +26,7 @@
|
||||
|
||||
define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
|
||||
entry:
|
||||
; CHECK-NOT: shld
|
||||
; CHECK-NOT: shld
|
||||
%sh_prom = zext i32 %c to i64
|
||||
%shl = shl i64 %a, %sh_prom
|
||||
%sub = sub nsw i32 64, %c
|
||||
|
Loading…
Reference in New Issue
Block a user