From e25a1a8f419078895b4f3028791de4eb3825c438 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 12 Jul 2021 10:58:36 +0000 Subject: [PATCH] [AArch64] Add target features for Armv9-A Scalable Matrix Extension (SME) First patch in a series adding MC layer support for the Arm Scalable Matrix Extension. This patch adds the following features: sme, sme-i64, sme-f64 The sme-i64 and sme-f64 flags are for the optional I16I64 and F64F64 features. If a target supports I16I64 then the following instructions are implemented: * 64-bit integer ADDHA and ADDVA variants (D105570). * SMOPA, SMOPS, SUMOPA, SUMOPS, UMOPA, UMOPS, USMOPA, and USMOPS instructions that accumulate 16-bit integer outer products into 64-bit integer tiles. If a target supports F64F64 then the FMOPA and FMOPS instructions that accumulate double-precision floating-point outer products into double-precision tiles are implemented. Outer products are implemented in D105571. The reference can be found here: https://developer.arm.com/documentation/ddi0602/2021-06 Reviewed By: CarolineConcatto Differential Revision: https://reviews.llvm.org/D105569 --- include/llvm/Support/AArch64TargetParser.def | 3 +++ include/llvm/Support/AArch64TargetParser.h | 3 +++ lib/Support/AArch64TargetParser.cpp | 6 ++++++ lib/Target/AArch64/AArch64.td | 15 +++++++++++++++ lib/Target/AArch64/AArch64InstrInfo.td | 6 ++++++ lib/Target/AArch64/AArch64SchedA53.td | 3 ++- lib/Target/AArch64/AArch64SchedA57.td | 3 ++- lib/Target/AArch64/AArch64SchedCyclone.td | 3 ++- lib/Target/AArch64/AArch64SchedExynosM3.td | 3 ++- lib/Target/AArch64/AArch64SchedExynosM4.td | 3 ++- lib/Target/AArch64/AArch64SchedExynosM5.td | 3 ++- lib/Target/AArch64/AArch64SchedFalkor.td | 3 ++- lib/Target/AArch64/AArch64SchedKryo.td | 3 ++- lib/Target/AArch64/AArch64SchedThunderX.td | 3 ++- lib/Target/AArch64/AArch64SchedThunderX2T99.td | 3 ++- lib/Target/AArch64/AArch64Subtarget.h | 10 ++++++++++ test/MC/AArch64/SME/feature.s | 11 +++++++++++ unittests/Support/TargetParserTest.cpp | 10 +++++++++- 18 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 test/MC/AArch64/SME/feature.s diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def index eb333f299d4..ae2fc673c54 100644 --- a/include/llvm/Support/AArch64TargetParser.def +++ b/include/llvm/Support/AArch64TargetParser.def @@ -110,6 +110,9 @@ AARCH64_ARCH_EXT_NAME("ls64", AArch64::AEK_LS64, "+ls64", "-ls64 AARCH64_ARCH_EXT_NAME("brbe", AArch64::AEK_BRBE, "+brbe", "-brbe") AARCH64_ARCH_EXT_NAME("pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth") AARCH64_ARCH_EXT_NAME("flagm", AArch64::AEK_FLAGM, "+flagm", "-flagm") +AARCH64_ARCH_EXT_NAME("sme", AArch64::AEK_SME, "+sme", "-sme") +AARCH64_ARCH_EXT_NAME("sme-f64", AArch64::AEK_SMEF64, "+sme-f64", "-sme-f64") +AARCH64_ARCH_EXT_NAME("sme-i64", AArch64::AEK_SMEI64, "+sme-i64", "-sme-i64") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME diff --git a/include/llvm/Support/AArch64TargetParser.h b/include/llvm/Support/AArch64TargetParser.h index 5772c03b095..131a58412db 100644 --- a/include/llvm/Support/AArch64TargetParser.h +++ b/include/llvm/Support/AArch64TargetParser.h @@ -66,6 +66,9 @@ enum ArchExtKind : uint64_t { AEK_BRBE = 1ULL << 34, AEK_PAUTH = 1ULL << 35, AEK_FLAGM = 1ULL << 36, + AEK_SME = 1ULL << 37, + AEK_SMEF64 = 1ULL << 38, + AEK_SMEI64 = 1ULL << 39, }; enum class ArchKind { diff --git a/lib/Support/AArch64TargetParser.cpp b/lib/Support/AArch64TargetParser.cpp index 503a7bd49d1..2993892097e 100644 --- a/lib/Support/AArch64TargetParser.cpp +++ b/lib/Support/AArch64TargetParser.cpp @@ -106,6 +106,12 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions, Features.push_back("+pauth"); if (Extensions & AEK_FLAGM) Features.push_back("+flagm"); + if (Extensions & AArch64::AEK_SME) + Features.push_back("+sme"); + if (Extensions & AArch64::AEK_SMEF64) + Features.push_back("+sme-f64"); + if (Extensions & AArch64::AEK_SMEI64) + Features.push_back("+sme-i64"); return true; } diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 4e1dc81a8aa..d8dd9d1b2f9 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -429,6 +429,17 @@ def FeatureEnhancedCounterVirtualization : def FeatureRME : SubtargetFeature<"rme", "HasRME", "true", "Enable Realm Management Extension">; +// FIXME: SME should only imply the subset of SVE(2) instructions that are +// legal in streaming mode. +def FeatureSME : SubtargetFeature<"sme", "HasSME", "true", + "Enable Scalable Matrix Extension (SME)", [FeatureSVE2, FeatureBF16]>; + +def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true", + "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>; + +def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true", + "Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>; + //===----------------------------------------------------------------------===// // Architectures. // @@ -549,6 +560,10 @@ def PAUnsupported : AArch64Unsupported { let F = [HasPAuth]; } +def SMEUnsupported : AArch64Unsupported { + let F = [HasSME, HasSMEF64, HasSMEI64]; +} + include "AArch64SchedA53.td" include "AArch64SchedA55.td" include "AArch64SchedA57.td" diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index b921a6c4b88..6b659d55d3e 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -122,6 +122,12 @@ def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; +def HasSME : Predicate<"Subtarget->hasSME()">, + AssemblerPredicate<(all_of FeatureSME), "sme">; +def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">, + AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">; +def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">, + AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicate<(all_of FeatureRCPC), "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td index c5ff1fcb274..65c84b1f39c 100644 --- a/lib/Target/AArch64/AArch64SchedA53.td +++ b/lib/Target/AArch64/AArch64SchedA53.td @@ -27,7 +27,8 @@ def CortexA53Model : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td index aa5bec8088e..c1eacca8cc1 100644 --- a/lib/Target/AArch64/AArch64SchedA57.td +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -32,7 +32,8 @@ def CortexA57Model : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td index 8abcb804d5c..11df304a974 100644 --- a/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/lib/Target/AArch64/AArch64SchedCyclone.td @@ -19,7 +19,8 @@ def CycloneModel : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedExynosM3.td b/lib/Target/AArch64/AArch64SchedExynosM3.td index 8413a06ed39..6a33258be02 100644 --- a/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -25,7 +25,8 @@ def ExynosM3Model : SchedMachineModel { let CompleteModel = 1; // Use the default model otherwise. list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedExynosM4.td b/lib/Target/AArch64/AArch64SchedExynosM4.td index 34e8beb423c..db066a19b0b 100644 --- a/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -25,7 +25,8 @@ def ExynosM4Model : SchedMachineModel { let CompleteModel = 1; // Use the default model otherwise. list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedExynosM5.td b/lib/Target/AArch64/AArch64SchedExynosM5.td index 403aac80e47..0429b6ab2ee 100644 --- a/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -25,7 +25,8 @@ def ExynosM5Model : SchedMachineModel { let CompleteModel = 1; // Use the default model otherwise. list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td index a17ab36d7f9..8bb95e44224 100644 --- a/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/lib/Target/AArch64/AArch64SchedFalkor.td @@ -24,7 +24,8 @@ def FalkorModel : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/lib/Target/AArch64/AArch64SchedKryo.td b/lib/Target/AArch64/AArch64SchedKryo.td index ba14bf1f50d..45964e1ed6d 100644 --- a/lib/Target/AArch64/AArch64SchedKryo.td +++ b/lib/Target/AArch64/AArch64SchedKryo.td @@ -28,7 +28,8 @@ def KryoModel : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td index 9c50f970858..125eb284cfd 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/lib/Target/AArch64/AArch64SchedThunderX.td @@ -26,7 +26,8 @@ def ThunderXT8XModel : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 95c29dd2a56..8d8675b7ac6 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -26,7 +26,8 @@ def ThunderX2T99Model : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 491e7bdaa8d..e0ef8df6fca 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -186,6 +186,11 @@ protected: // Armv9-A Extensions bool HasRME = false; + // Arm Scalable Matrix Extension (SME) + bool HasSME = false; + bool HasSMEF64 = false; + bool HasSMEI64 = false; + // Future architecture extensions. bool HasETE = false; bool HasTRBE = false; @@ -485,6 +490,11 @@ public: return HasEnhancedCounterVirtualization; } + // Arm Scalable Matrix Extension (SME) + bool hasSME() const { return HasSME; } + bool hasSMEF64() const { return HasSMEF64; } + bool hasSMEI64() const { return HasSMEI64; } + bool isLittleEndian() const { return IsLittle; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } diff --git a/test/MC/AArch64/SME/feature.s b/test/MC/AArch64/SME/feature.s new file mode 100644 index 00000000000..374a5140d8a --- /dev/null +++ b/test/MC/AArch64/SME/feature.s @@ -0,0 +1,11 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme-f64 < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme-i64 < %s | FileCheck %s + +// Verify +sme flags imply +sve2 +tbx z0.b, z1.b, z2.b +// CHECK: tbx z0.b, z1.b, z2.b + +// Verify +sme flags imply +bf16 +bfdot z0.s, z1.h, z2.h +// CHECK-INST: bfdot z0.s, z1.h, z2.h diff --git a/unittests/Support/TargetParserTest.cpp b/unittests/Support/TargetParserTest.cpp index ab864b86aaf..a465eb0c57c 100644 --- a/unittests/Support/TargetParserTest.cpp +++ b/unittests/Support/TargetParserTest.cpp @@ -1360,7 +1360,9 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_SVE, AArch64::AEK_SVE2, AArch64::AEK_SVE2AES, AArch64::AEK_SVE2SM4, AArch64::AEK_SVE2SHA3, AArch64::AEK_SVE2BITPERM, - AArch64::AEK_RCPC, AArch64::AEK_FP16FML }; + AArch64::AEK_RCPC, AArch64::AEK_FP16FML, + AArch64::AEK_SME, AArch64::AEK_SMEF64, + AArch64::AEK_SMEI64 }; std::vector Features; @@ -1392,6 +1394,9 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme-f64")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme-i64")); } TEST(TargetParserTest, AArch64ArchFeatures) { @@ -1437,6 +1442,9 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"i8mm", "noi8mm", "+i8mm", "-i8mm"}, {"f32mm", "nof32mm", "+f32mm", "-f32mm"}, {"f64mm", "nof64mm", "+f64mm", "-f64mm"}, + {"sme", "nosme", "+sme", "-sme"}, + {"sme-f64", "nosme-f64", "+sme-f64", "-sme-f64"}, + {"sme-i64", "nosme-i64", "+sme-i64", "-sme-i64"}, }; for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {