1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[AARch64] Add Marvell ThunderX3T110 support

This is the first checkin to support Marvell ThunderX3T110.

Initial definition of the micro-ops of the instructions in ThunderX3T110
is included.

Differential Revision: https://reviews.llvm.org/D78129
This commit is contained in:
Wei Zhao 2020-05-13 16:38:42 -07:00 committed by Joel Jones
parent 3a9bcbe9e6
commit f6a8e3ef0e
21 changed files with 2073 additions and 16 deletions

View File

@ -165,6 +165,10 @@ AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
AARCH64_CPU_NAME("thunderx3t110", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AEK_CRYPTO | AEK_FP | AEK_SIMD |
AEK_LSE | AEK_RAND | AArch64::AEK_PROFILE |
AArch64::AEK_RAS))
AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,

View File

@ -472,6 +472,10 @@ def SVEUnsupported : AArch64Unsupported {
HasSVE2BitPerm];
}
def PAUnsupported : AArch64Unsupported {
let F = [HasPA];
}
include "AArch64SchedA53.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
@ -482,6 +486,7 @@ include "AArch64SchedExynosM4.td"
include "AArch64SchedExynosM5.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
include "AArch64SchedThunderX3T110.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@ -830,6 +835,25 @@ def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
FeatureLSE,
HasV8_1aOps]>;
def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
"ThunderX3T110",
"Marvell ThunderX3 processors", [
FeatureAggressiveFMA,
FeatureCRC,
FeatureCrypto,
FeatureFPARMv8,
FeatureArithmeticBccFusion,
FeatureNEON,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureLSE,
FeaturePA,
FeatureUseAA,
FeatureBalanceFPOps,
FeaturePerfMon,
FeatureStrictAlign,
HasV8_3aOps]>;
def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
"Cavium ThunderX processors", [
FeatureCRC,
@ -929,6 +953,8 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
// Marvell ThunderX3T110 Processors.
def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;

View File

@ -26,7 +26,8 @@ def CortexA53Model : SchedMachineModel {
// v 1.0 Spreadsheet
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
}

View File

@ -31,7 +31,8 @@ def CortexA57Model : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
}
//===----------------------------------------------------------------------===//

View File

@ -18,7 +18,8 @@ def CycloneModel : SchedMachineModel {
let MispredictPenalty = 16; // 14-19 cycles are typical.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
}
//===----------------------------------------------------------------------===//

View File

@ -24,7 +24,8 @@ def ExynosM3Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
}
//===----------------------------------------------------------------------===//

View File

@ -24,7 +24,8 @@ def ExynosM4Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
}
//===----------------------------------------------------------------------===//

View File

@ -24,7 +24,8 @@ def ExynosM5Model : SchedMachineModel {
let MispredictPenalty = 15; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
}
//===----------------------------------------------------------------------===//

View File

@ -23,8 +23,8 @@ def FalkorModel : SchedMachineModel {
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

View File

@ -27,8 +27,8 @@ def KryoModel : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

View File

@ -25,8 +25,8 @@ def ThunderXT8XModel : SchedMachineModel {
let PostRAScheduler = 1; // Use PostRA scheduler.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

View File

@ -25,8 +25,8 @@ def ThunderX2T99Model : SchedMachineModel {
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -168,6 +168,17 @@ void AArch64Subtarget::initializeProperties() {
PrefFunctionLogAlignment = 4;
PrefLoopLogAlignment = 2;
break;
case ThunderX3T110:
CacheLineSize = 64;
PrefFunctionLogAlignment = 4;
PrefLoopLogAlignment = 2;
MaxInterleaveFactor = 4;
PrefetchDistance = 128;
MinPrefetchStride = 1024;
MaxPrefetchIterationsAhead = 4;
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
}
}

View File

@ -66,7 +66,8 @@ public:
ThunderXT81,
ThunderXT83,
ThunderXT88,
TSV110
TSV110,
ThunderX3T110
};
protected:

View File

@ -2,6 +2,7 @@
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
#
name: f1_2s
registers:

View File

@ -24,6 +24,7 @@
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=saphira 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=kryo 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx2t99 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx3t110 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=tsv110 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=apple-latest 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=a64fx 2>&1 | FileCheck %s

View File

@ -6,6 +6,7 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=kryo < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx2t99 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx3t110 < %s | FileCheck %s
; Make sure that inst-combine fuses the multiply add in the addressing mode of
; the load.

View File

@ -19,6 +19,7 @@
; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=thunderxt83 < %s | FileCheck --check-prefixes=ALIGN3,CHECK %s
; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=thunderxt88 < %s | FileCheck --check-prefixes=ALIGN3,CHECK %s
; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 < %s | FileCheck --check-prefixes=ALIGN3,CHECK %s
; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s
; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 < %s | FileCheck --check-prefixes=ALIGN5,CHECK %s
define void @test() {

View File

@ -19,6 +19,7 @@
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx2t99 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=tsv110 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx3t110 -o - %s | FileCheck %s
%X = type { i64, i64, i64 }
declare void @f(%X*)

View File

@ -942,6 +942,13 @@ TEST(TargetParserTest, testAArch64CPU) {
"thunderx2t99", "armv8.1-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE |
AArch64::AEK_RDM | AArch64::AEK_FP | AArch64::AEK_SIMD, "8.1-A"));
EXPECT_TRUE(testAArch64CPU(
"thunderx3t110", "armv8.3-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_LSE |
AArch64::AEK_RDM | AArch64::AEK_FP | AArch64::AEK_SIMD |
AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RAND |
AArch64::AEK_RCPC,
"8.3-A"));
EXPECT_TRUE(testAArch64CPU(
"thunderx", "armv8-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_SIMD |
@ -983,7 +990,7 @@ TEST(TargetParserTest, testAArch64CPU) {
"8.2-A"));
}
static constexpr unsigned NumAArch64CPUArchs = 38;
static constexpr unsigned NumAArch64CPUArchs = 39;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;