From fcd9253fa0f1e4b2fa29ced9be9a3e983a91bbfb Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Wed, 14 Jul 2021 08:01:19 +0000
Subject: [PATCH] [AArch64][SME] Add matrix register definitions and parsing
 support

SME introduces the ZA array, a new piece of architectural register state
consisting of a matrix of [SVLb x SVLb] bytes, where SVL is the
implementation defined Streaming SVE vector length and SVLb is the
number of 8-bit elements in a vector of SVL bits.

SME instructions consist of three types of matrix operands:

  * Tiles: a ZA tile is a square, two-dimensional sub-array of elements
  within the ZA array. These tiles make up the larger accumulator array
  and the granularity varies based on the element size, i.e.
    - ZAQ0..ZAQ15 (smallest tile granule)
    - ZAD0..ZAD7
    - ZAS0..ZAS3
    - ZAH0..ZAH1
    or ZAB0       (largest tile granule, single tile)
  * Tile vectors: similar to regular tiles, but have an extra 'h' or 'v'
  to tell how the vector at [reg+offset] is layed out in the tile,
  horizontally or vertically. E.g. za1h.h or za15v.q, which corresponds
  to vectors in registers ZAH1 and ZAQ15, respectively.
  * Accumulator matrix: this is the entire accumulator array ZA.

This patch adds the register classes and related operands and parsing
for SME instructions operating on the accumulator array.

The ADDHA and ADDVA instructions which operate on tiles are also added
in this patch to make some use of the code added, later patches will
make use of the other operands introduced here.

The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06

Co-authored by: Sander de Smalen (@sdesmalen)

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D105570
---
 lib/Target/AArch64/AArch64InstrInfo.td        |   3 +-
 lib/Target/AArch64/AArch64RegisterInfo.td     | 195 ++++++++++++++
 lib/Target/AArch64/AArch64SMEInstrInfo.td     |  25 ++
 .../AArch64/AsmParser/AArch64AsmParser.cpp    | 253 +++++++++++++++++-
 .../Disassembler/AArch64Disassembler.cpp      |  26 ++
 .../MCTargetDesc/AArch64InstPrinter.cpp       |  53 ++++
 .../AArch64/MCTargetDesc/AArch64InstPrinter.h |   9 +
 lib/Target/AArch64/SMEInstrFormats.td         |  47 ++++
 test/MC/AArch64/SME/addha-diagnostics.s       |  52 ++++
 test/MC/AArch64/SME/addha-u32.s               |  85 ++++++
 test/MC/AArch64/SME/addha-u64.s               |  85 ++++++
 test/MC/AArch64/SME/addva-diagnostics.s       |  37 +++
 test/MC/AArch64/SME/addva-u32.s               |  85 ++++++
 test/MC/AArch64/SME/addva-u64.s               |  85 ++++++
 unittests/Target/AArch64/CMakeLists.txt       |   1 +
 .../Target/AArch64/MatrixRegisterAliasing.cpp | 135 ++++++++++
 16 files changed, 1174 insertions(+), 2 deletions(-)
 create mode 100644 lib/Target/AArch64/AArch64SMEInstrInfo.td
 create mode 100644 lib/Target/AArch64/SMEInstrFormats.td
 create mode 100644 test/MC/AArch64/SME/addha-diagnostics.s
 create mode 100644 test/MC/AArch64/SME/addha-u32.s
 create mode 100644 test/MC/AArch64/SME/addha-u64.s
 create mode 100644 test/MC/AArch64/SME/addva-diagnostics.s
 create mode 100644 test/MC/AArch64/SME/addva-u32.s
 create mode 100644 test/MC/AArch64/SME/addva-u64.s
 create mode 100644 unittests/Target/AArch64/MatrixRegisterAliasing.cpp

diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 6b659d55d3e..682cec36172 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -648,6 +648,7 @@ let RecomputePerFunction = 1 in {
 
 include "AArch64InstrFormats.td"
 include "SVEInstrFormats.td"
+include "SMEInstrFormats.td"
 
 //===----------------------------------------------------------------------===//
 
@@ -8115,5 +8116,5 @@ def StoreSwiftAsyncContext
 
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
-
+include "AArch64SMEInstrInfo.td"
 include "AArch64InstrGISel.td"
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index a57192da462..6fc0af6ef46 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -45,6 +45,16 @@ let Namespace = "AArch64" in {
   def qsub1 : SubRegIndex<128>;
   def qsub2 : SubRegIndex<128>;
   def qsub3 : SubRegIndex<128>;
+  // Note: Code depends on these having consecutive numbers
+  def zasubb  : SubRegIndex<2048>; // (16 x 16)/1 bytes  = 2048 bits
+  def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubs0 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubs1 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubd0 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubd1 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubq0 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
+  def zasubq1 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
 }
 
 let Namespace = "AArch64" in {
@@ -1156,3 +1166,188 @@ foreach Scale = [8, 16, 32, 64] in {
   def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
   def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
 }
+
+// Accumulator array tiles.
+def ZAQ0  : AArch64Reg<0,  "za0.q">;
+def ZAQ1  : AArch64Reg<1,  "za1.q">;
+def ZAQ2  : AArch64Reg<2,  "za2.q">;
+def ZAQ3  : AArch64Reg<3,  "za3.q">;
+def ZAQ4  : AArch64Reg<4,  "za4.q">;
+def ZAQ5  : AArch64Reg<5,  "za5.q">;
+def ZAQ6  : AArch64Reg<6,  "za6.q">;
+def ZAQ7  : AArch64Reg<7,  "za7.q">;
+def ZAQ8  : AArch64Reg<8,  "za8.q">;
+def ZAQ9  : AArch64Reg<9,  "za9.q">;
+def ZAQ10 : AArch64Reg<10, "za10.q">;
+def ZAQ11 : AArch64Reg<11, "za11.q">;
+def ZAQ12 : AArch64Reg<12, "za12.q">;
+def ZAQ13 : AArch64Reg<13, "za13.q">;
+def ZAQ14 : AArch64Reg<14, "za14.q">;
+def ZAQ15 : AArch64Reg<15, "za15.q">;
+
+let SubRegIndices = [zasubq0, zasubq1] in {
+  def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
+  def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
+  def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
+  def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
+  def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
+  def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
+  def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
+  def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
+}
+
+let SubRegIndices = [zasubd0, zasubd1] in {
+  def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
+  def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
+  def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
+  def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
+}
+
+let SubRegIndices = [zasubs0, zasubs1] in {
+  def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
+  def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
+}
+
+let SubRegIndices = [zasubh0, zasubh1] in {
+  def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
+}
+
+let SubRegIndices = [zasubb] in {
+  def ZA : AArch64Reg<0, "za", [ZAB0]>;
+}
+
+// SME Register Classes
+
+// Accumulator array
+def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
+  let Size = 2048;
+}
+
+// Accumulator array as single tiles
+def MPR8    : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
+  let Size = 2048;
+}
+def MPR16   : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
+  let Size = 1024;
+}
+def MPR32   : RegisterClass<"AArch64", [untyped],  512, (add (sequence "ZAS%u", 0, 3))> {
+  let Size = 512;
+}
+def MPR64   : RegisterClass<"AArch64", [untyped],  256, (add (sequence "ZAD%u", 0, 7))> {
+  let Size = 256;
+}
+def MPR128  : RegisterClass<"AArch64", [untyped],  128, (add (sequence "ZAQ%u", 0, 15))> {
+  let Size = 128;
+}
+
+// SME Register Operands
+// There are three types of SME matrix register operands:
+// * Tiles:
+//
+//   These tiles make up the larger accumulator matrix. The tile representation
+//   has an element type suffix, e.g. za0.b or za15.q and can be any of the
+//   registers:
+//          ZAQ0..ZAQ15
+//          ZAD0..ZAD7
+//          ZAS0..ZAS3
+//          ZAH0..ZAH1
+//       or ZAB0
+//
+// * Tile vectors:
+//
+//   Their representation is similar to regular tiles, but they have an extra
+//   'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
+//   horizontally or vertically.
+//
+//   e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
+//   ZAQ15, respectively. The horizontal/vertical is more a property of the
+//   instruction, than a property of the asm-operand itself, or its register.
+//   The distinction is required for the parsing/printing of the operand,
+//   as from a compiler's perspective, the whole tile is read/written.
+//
+// * Accumulator matrix:
+//
+//   This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
+//   'za'.
+
+//
+// Tiles
+//
+
+class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "MatrixTile" # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Tile" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTile";
+}
+
+def TileOp32  : MatrixTileOperand<32, 2, MPR32>;
+def TileOp64  : MatrixTileOperand<64, 3, MPR64>;
+
+//
+// Tile vectors (horizontal and vertical)
+//
+
+class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
+    : AsmOperandClass {
+  let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::"
+                          # !if(IsVertical, "Col", "Row") # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
+                              RegisterClass RC, int IsVertical>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
+                                                    IsVertical>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
+}
+
+def TileVectorOpH8   : MatrixTileVectorOperand<  8, 0, MPR8,   0>;
+def TileVectorOpH16  : MatrixTileVectorOperand< 16, 1, MPR16,  0>;
+def TileVectorOpH32  : MatrixTileVectorOperand< 32, 2, MPR32,  0>;
+def TileVectorOpH64  : MatrixTileVectorOperand< 64, 3, MPR64,  0>;
+def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
+
+def TileVectorOpV8   : MatrixTileVectorOperand<  8, 0, MPR8,   1>;
+def TileVectorOpV16  : MatrixTileVectorOperand< 16, 1, MPR16,  1>;
+def TileVectorOpV32  : MatrixTileVectorOperand< 32, 2, MPR32,  1>;
+def TileVectorOpV64  : MatrixTileVectorOperand< 64, 3, MPR64,  1>;
+def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
+
+//
+// Accumulator matrix
+//
+
+class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "Matrix";
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Array" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
+  let PrintMethod = "printMatrix<" # EltSize # ">";
+}
+
+def MatrixOp : MatrixOperand<MPR, 0>;
diff --git a/lib/Target/AArch64/AArch64SMEInstrInfo.td b/lib/Target/AArch64/AArch64SMEInstrInfo.td
new file mode 100644
index 00000000000..bc636a1276c
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -0,0 +1,25 @@
+//=- AArch64SMEInstrInfo.td -  AArch64 SME Instructions -*- tablegen -*-----=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Add vector elements horizontally or vertically to ZA tile.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSME] in {
+def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
+def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
+}
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
+def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
+}
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f6487814d9c..c8c6d7947ed 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -66,9 +66,12 @@ enum class RegKind {
   Scalar,
   NeonVector,
   SVEDataVector,
-  SVEPredicateVector
+  SVEPredicateVector,
+  Matrix
 };
 
+enum class MatrixKind { Array, Tile, Row, Col };
+
 enum RegConstraintEqualityTy {
   EqualsReg,
   EqualsSuperReg,
@@ -229,6 +232,7 @@ private:
   OperandMatchResultTy tryParseScalarRegister(unsigned &Reg);
   OperandMatchResultTy tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
                                               RegKind MatchKind);
+  OperandMatchResultTy tryParseMatrixRegister(OperandVector &Operands);
   OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
   OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
   OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
@@ -316,6 +320,7 @@ private:
     k_ShiftedImm,
     k_CondCode,
     k_Register,
+    k_MatrixRegister,
     k_VectorList,
     k_VectorIndex,
     k_Token,
@@ -370,6 +375,12 @@ private:
     ShiftExtendOp ShiftExtend;
   };
 
+  struct MatrixRegOp {
+    unsigned RegNum;
+    unsigned ElementWidth;
+    MatrixKind Kind;
+  };
+
   struct VectorListOp {
     unsigned RegNum;
     unsigned Count;
@@ -440,6 +451,7 @@ private:
   union {
     struct TokOp Tok;
     struct RegOp Reg;
+    struct MatrixRegOp MatrixReg;
     struct VectorListOp VectorList;
     struct VectorIndexOp VectorIndex;
     struct ImmOp Imm;
@@ -488,6 +500,9 @@ public:
     case k_Register:
       Reg = o.Reg;
       break;
+    case k_MatrixRegister:
+      MatrixReg = o.MatrixReg;
+      break;
     case k_VectorList:
       VectorList = o.VectorList;
       break;
@@ -580,6 +595,21 @@ public:
     return Reg.RegNum;
   }
 
+  unsigned getMatrixReg() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.RegNum;
+  }
+
+  unsigned getMatrixElementWidth() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.ElementWidth;
+  }
+
+  MatrixKind getMatrixKind() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.Kind;
+  }
+
   RegConstraintEqualityTy getRegEqualityTy() const {
     assert(Kind == k_Register && "Invalid access!");
     return Reg.EqualityTy;
@@ -1089,6 +1119,8 @@ public:
                 Reg.RegNum));
   }
 
+  bool isMatrix() const { return Kind == k_MatrixRegister; }
+
   template <unsigned Class> bool isSVEVectorReg() const {
     RegKind RK;
     switch (Class) {
@@ -1470,6 +1502,15 @@ public:
     return true;
   }
 
+  template <MatrixKind Kind, unsigned EltSize, unsigned RegClass>
+  DiagnosticPredicate isMatrixRegOperand() const {
+    if (isMatrix() && getMatrixKind() == Kind &&
+        AArch64MCRegisterClasses[RegClass].contains(getMatrixReg()) &&
+        EltSize == getMatrixElementWidth())
+      return DiagnosticPredicateTy::Match;
+    return DiagnosticPredicateTy::NoMatch;
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
@@ -1485,6 +1526,11 @@ public:
     Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
+  void addMatrixOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getMatrixReg()));
+  }
+
   void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     assert(
@@ -2054,6 +2100,18 @@ public:
     return Op;
   }
 
+  static std::unique_ptr<AArch64Operand>
+  CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind,
+                       SMLoc S, SMLoc E, MCContext &Ctx) {
+    auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx);
+    Op->MatrixReg.RegNum = RegNum;
+    Op->MatrixReg.ElementWidth = ElementWidth;
+    Op->MatrixReg.Kind = Kind;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
   static std::unique_ptr<AArch64Operand>
   CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
                     bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -2132,6 +2190,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
   case k_BTIHint:
     OS << getBTIHintName();
     break;
+  case k_MatrixRegister:
+    OS << "<matrix " << getMatrixReg() << ">";
+    break;
   case k_Register:
     OS << "<register " << getReg() << ">";
     if (!getShiftExtendAmount() && !hasShiftExtendAmount())
@@ -2229,6 +2290,7 @@ static Optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
     break;
   case RegKind::SVEPredicateVector:
   case RegKind::SVEDataVector:
+  case RegKind::Matrix:
     Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
               .Case("", {0, 0})
               .Case(".b", {0, 8})
@@ -2310,6 +2372,105 @@ static unsigned matchSVEPredicateVectorRegName(StringRef Name) {
       .Default(0);
 }
 
+static unsigned matchMatrixRegName(StringRef Name) {
+  return StringSwitch<unsigned>(Name.lower())
+      .Case("za", AArch64::ZA)
+      .Case("za0.q", AArch64::ZAQ0)
+      .Case("za1.q", AArch64::ZAQ1)
+      .Case("za2.q", AArch64::ZAQ2)
+      .Case("za3.q", AArch64::ZAQ3)
+      .Case("za4.q", AArch64::ZAQ4)
+      .Case("za5.q", AArch64::ZAQ5)
+      .Case("za6.q", AArch64::ZAQ6)
+      .Case("za7.q", AArch64::ZAQ7)
+      .Case("za8.q", AArch64::ZAQ8)
+      .Case("za9.q", AArch64::ZAQ9)
+      .Case("za10.q", AArch64::ZAQ10)
+      .Case("za11.q", AArch64::ZAQ11)
+      .Case("za12.q", AArch64::ZAQ12)
+      .Case("za13.q", AArch64::ZAQ13)
+      .Case("za14.q", AArch64::ZAQ14)
+      .Case("za15.q", AArch64::ZAQ15)
+      .Case("za0.d", AArch64::ZAD0)
+      .Case("za1.d", AArch64::ZAD1)
+      .Case("za2.d", AArch64::ZAD2)
+      .Case("za3.d", AArch64::ZAD3)
+      .Case("za4.d", AArch64::ZAD4)
+      .Case("za5.d", AArch64::ZAD5)
+      .Case("za6.d", AArch64::ZAD6)
+      .Case("za7.d", AArch64::ZAD7)
+      .Case("za0.s", AArch64::ZAS0)
+      .Case("za1.s", AArch64::ZAS1)
+      .Case("za2.s", AArch64::ZAS2)
+      .Case("za3.s", AArch64::ZAS3)
+      .Case("za0.h", AArch64::ZAH0)
+      .Case("za1.h", AArch64::ZAH1)
+      .Case("za0.b", AArch64::ZAB0)
+      .Case("za0h.q", AArch64::ZAQ0)
+      .Case("za1h.q", AArch64::ZAQ1)
+      .Case("za2h.q", AArch64::ZAQ2)
+      .Case("za3h.q", AArch64::ZAQ3)
+      .Case("za4h.q", AArch64::ZAQ4)
+      .Case("za5h.q", AArch64::ZAQ5)
+      .Case("za6h.q", AArch64::ZAQ6)
+      .Case("za7h.q", AArch64::ZAQ7)
+      .Case("za8h.q", AArch64::ZAQ8)
+      .Case("za9h.q", AArch64::ZAQ9)
+      .Case("za10h.q", AArch64::ZAQ10)
+      .Case("za11h.q", AArch64::ZAQ11)
+      .Case("za12h.q", AArch64::ZAQ12)
+      .Case("za13h.q", AArch64::ZAQ13)
+      .Case("za14h.q", AArch64::ZAQ14)
+      .Case("za15h.q", AArch64::ZAQ15)
+      .Case("za0h.d", AArch64::ZAD0)
+      .Case("za1h.d", AArch64::ZAD1)
+      .Case("za2h.d", AArch64::ZAD2)
+      .Case("za3h.d", AArch64::ZAD3)
+      .Case("za4h.d", AArch64::ZAD4)
+      .Case("za5h.d", AArch64::ZAD5)
+      .Case("za6h.d", AArch64::ZAD6)
+      .Case("za7h.d", AArch64::ZAD7)
+      .Case("za0h.s", AArch64::ZAS0)
+      .Case("za1h.s", AArch64::ZAS1)
+      .Case("za2h.s", AArch64::ZAS2)
+      .Case("za3h.s", AArch64::ZAS3)
+      .Case("za0h.h", AArch64::ZAH0)
+      .Case("za1h.h", AArch64::ZAH1)
+      .Case("za0h.b", AArch64::ZAB0)
+      .Case("za0v.q", AArch64::ZAQ0)
+      .Case("za1v.q", AArch64::ZAQ1)
+      .Case("za2v.q", AArch64::ZAQ2)
+      .Case("za3v.q", AArch64::ZAQ3)
+      .Case("za4v.q", AArch64::ZAQ4)
+      .Case("za5v.q", AArch64::ZAQ5)
+      .Case("za6v.q", AArch64::ZAQ6)
+      .Case("za7v.q", AArch64::ZAQ7)
+      .Case("za8v.q", AArch64::ZAQ8)
+      .Case("za9v.q", AArch64::ZAQ9)
+      .Case("za10v.q", AArch64::ZAQ10)
+      .Case("za11v.q", AArch64::ZAQ11)
+      .Case("za12v.q", AArch64::ZAQ12)
+      .Case("za13v.q", AArch64::ZAQ13)
+      .Case("za14v.q", AArch64::ZAQ14)
+      .Case("za15v.q", AArch64::ZAQ15)
+      .Case("za0v.d", AArch64::ZAD0)
+      .Case("za1v.d", AArch64::ZAD1)
+      .Case("za2v.d", AArch64::ZAD2)
+      .Case("za3v.d", AArch64::ZAD3)
+      .Case("za4v.d", AArch64::ZAD4)
+      .Case("za5v.d", AArch64::ZAD5)
+      .Case("za6v.d", AArch64::ZAD6)
+      .Case("za7v.d", AArch64::ZAD7)
+      .Case("za0v.s", AArch64::ZAS0)
+      .Case("za1v.s", AArch64::ZAS1)
+      .Case("za2v.s", AArch64::ZAS2)
+      .Case("za3v.s", AArch64::ZAS3)
+      .Case("za0v.h", AArch64::ZAH0)
+      .Case("za1v.h", AArch64::ZAH1)
+      .Case("za0v.b", AArch64::ZAB0)
+      .Default(0);
+}
+
 bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                      SMLoc &EndLoc) {
   return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
@@ -2337,6 +2498,9 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
   if ((RegNum = MatchNeonVectorRegName(Name)))
     return Kind == RegKind::NeonVector ? RegNum : 0;
 
+  if ((RegNum = matchMatrixRegName(Name)))
+    return Kind == RegKind::Matrix ? RegNum : 0;
+
   // The parsed register must be of RegKind Scalar
   if ((RegNum = MatchRegisterName(Name)))
     return Kind == RegKind::Scalar ? RegNum : 0;
@@ -2809,6 +2973,54 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
   return false;
 }
 
+OperandMatchResultTy
+AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc S = getLoc();
+
+  StringRef Name = Tok.getString();
+
+  if (Name.equals_insensitive("za")) {
+    Parser.Lex(); // eat "za"
+    Operands.push_back(AArch64Operand::CreateMatrixRegister(
+        AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
+        getContext()));
+    return MatchOperand_Success;
+  }
+
+  // Try to parse matrix register.
+  unsigned Reg = matchRegisterNameAlias(Name, RegKind::Matrix);
+  if (!Reg)
+    return MatchOperand_NoMatch;
+
+  size_t DotPosition = Name.find('.');
+  assert(DotPosition != StringRef::npos && "Unexpected register");
+
+  StringRef Head = Name.take_front(DotPosition);
+  StringRef Tail = Name.drop_front(DotPosition);
+  StringRef RowOrColumn = Head.take_back();
+
+  MatrixKind Kind = StringSwitch<MatrixKind>(RowOrColumn)
+                        .Case("h", MatrixKind::Row)
+                        .Case("v", MatrixKind::Col)
+                        .Default(MatrixKind::Tile);
+
+  // Next up, parsing the suffix
+  const auto &KindRes = parseVectorKind(Tail, RegKind::Matrix);
+  if (!KindRes) {
+    TokError("Expected the register to be followed by element width suffix");
+    return MatchOperand_ParseFail;
+  }
+  unsigned ElementWidth = KindRes->second;
+
+  Parser.Lex();
+
+  Operands.push_back(AArch64Operand::CreateMatrixRegister(
+      Reg, ElementWidth, Kind, S, getLoc(), getContext()));
+  return MatchOperand_Success;
+}
+
 /// tryParseOptionalShift - Some operands take an optional shift argument. Parse
 /// them if present.
 OperandMatchResultTy
@@ -4733,6 +4945,32 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
     return Error(Loc, "Invalid floating point constant, expected 0.5 or 2.0.");
   case Match_InvalidSVEExactFPImmOperandZeroOne:
     return Error(Loc, "Invalid floating point constant, expected 0.0 or 1.0.");
+  case Match_InvalidMatrixTileVectorH8:
+    return Error(Loc, "invalid matrix operand, expected za0h.b");
+  case Match_InvalidMatrixTileVectorH16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1]h.h");
+  case Match_InvalidMatrixTileVectorH32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3]h.s");
+  case Match_InvalidMatrixTileVectorH64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7]h.d");
+  case Match_InvalidMatrixTileVectorH128:
+    return Error(Loc, "invalid matrix operand, expected za[0-15]h.q");
+  case Match_InvalidMatrixTileVectorV8:
+    return Error(Loc, "invalid matrix operand, expected za0v.b");
+  case Match_InvalidMatrixTileVectorV16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1]v.h");
+  case Match_InvalidMatrixTileVectorV32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3]v.s");
+  case Match_InvalidMatrixTileVectorV64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7]v.d");
+  case Match_InvalidMatrixTileVectorV128:
+    return Error(Loc, "invalid matrix operand, expected za[0-15]v.q");
+  case Match_InvalidMatrixTile32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3].s");
+  case Match_InvalidMatrixTile64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7].d");
+  case Match_InvalidMatrix:
+    return Error(Loc, "invalid matrix operand, expected za");
   default:
     llvm_unreachable("unexpected error code!");
   }
@@ -5251,6 +5489,19 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidSVEExactFPImmOperandHalfOne:
   case Match_InvalidSVEExactFPImmOperandHalfTwo:
   case Match_InvalidSVEExactFPImmOperandZeroOne:
+  case Match_InvalidMatrixTile32:
+  case Match_InvalidMatrixTile64:
+  case Match_InvalidMatrix:
+  case Match_InvalidMatrixTileVectorH8:
+  case Match_InvalidMatrixTileVectorH16:
+  case Match_InvalidMatrixTileVectorH32:
+  case Match_InvalidMatrixTileVectorH64:
+  case Match_InvalidMatrixTileVectorH128:
+  case Match_InvalidMatrixTileVectorV8:
+  case Match_InvalidMatrixTileVectorV16:
+  case Match_InvalidMatrixTileVectorV32:
+  case Match_InvalidMatrixTileVectorV64:
+  case Match_InvalidMatrixTileVectorV128:
   case Match_MSR:
   case Match_MRS: {
     if (ErrorInfo >= Operands.size())
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index dca76f8457f..e1554b2dbd6 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -111,6 +111,9 @@ static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
                                             uint64_t Address,
                                             const void *Decoder);
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+                                     uint64_t Address, const void *Decoder);
 static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                            uint64_t Address,
                                            const void *Decoder);
@@ -642,6 +645,29 @@ static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
   return Success;
 }
 
+static const SmallVector<SmallVector<unsigned, 16>, 5>
+    MatrixZATileDecoderTable = {
+        {AArch64::ZAB0},
+        {AArch64::ZAH0, AArch64::ZAH1},
+        {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
+        {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3,
+         AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
+        {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3,
+         AArch64::ZAQ4, AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7,
+         AArch64::ZAQ8, AArch64::ZAQ9, AArch64::ZAQ10, AArch64::ZAQ11,
+         AArch64::ZAQ12, AArch64::ZAQ13, AArch64::ZAQ14, AArch64::ZAQ15}};
+
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+                                     uint64_t Address, const void *Decoder) {
+  unsigned LastReg = (1 << NumBitsForTile) - 1;
+  if (RegNo > LastReg)
+    return Fail;
+  Inst.addOperand(
+      MCOperand::createReg(MatrixZATileDecoderTable[NumBitsForTile][RegNo]));
+  return Success;
+}
+
 static const unsigned PPRDecoderTable[] = {
   AArch64::P0,  AArch64::P1,  AArch64::P2,  AArch64::P3,
   AArch64::P4,  AArch64::P5,  AArch64::P6,  AArch64::P7,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 8378ebce73e..9ef35c5b047 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -880,6 +880,59 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
   return true;
 }
 
+template <int EltSize>
+void AArch64InstPrinter::printMatrix(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+
+  O << getRegisterName(RegOp.getReg());
+  switch (EltSize) {
+  case 0:
+    break;
+  case 8:
+    O << ".b";
+    break;
+  case 16:
+    O << ".h";
+    break;
+  case 32:
+    O << ".s";
+    break;
+  case 64:
+    O << ".d";
+    break;
+  case 128:
+    O << ".q";
+    break;
+  default:
+    llvm_unreachable("Unsupported element size");
+  }
+}
+
+template <bool IsVertical>
+void AArch64InstPrinter::printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+  StringRef RegName = getRegisterName(RegOp.getReg());
+
+  // Insert the horizontal/vertical flag before the suffix.
+  StringRef Base, Suffix;
+  std::tie(Base, Suffix) = RegName.split('.');
+  O << Base << (IsVertical ? "v" : "h") << '.' << Suffix;
+}
+
+void AArch64InstPrinter::printMatrixTile(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+  O << getRegisterName(RegOp.getReg());
+}
+
 void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 4dbc659f652..109d25507e6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -187,6 +187,15 @@ protected:
                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printSVEPattern(const MCInst *MI, unsigned OpNum,
                        const MCSubtargetInfo &STI, raw_ostream &O);
+
+  template <bool IsVertical>
+  void printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMatrixTile(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  template <int EltSize>
+  void printMatrix(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
   template <char = 0>
   void printSVERegOp(const MCInst *MI, unsigned OpNum,
                     const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/lib/Target/AArch64/SMEInstrFormats.td b/lib/Target/AArch64/SMEInstrFormats.td
new file mode 100644
index 00000000000..8c36a17d70e
--- /dev/null
+++ b/lib/Target/AArch64/SMEInstrFormats.td
@@ -0,0 +1,47 @@
+//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SME Add Vector to Tile
+//===----------------------------------------------------------------------===//
+
+class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
+                                  ZPRRegOp zpr_ty, string mnemonic>
+    : I<(outs tile_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
+        "", []>, Sched<[]> {
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-23} = 0b110000001;
+  let Inst{22}    = op;
+  let Inst{21-17} = 0b01000;
+  let Inst{16}    = V;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4-3}   = 0b00;
+}
+
+class sme_add_vector_to_tile_u32<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
+  bits<2> ZAda;
+  let Inst{2}   = 0b0;
+  let Inst{1-0} = ZAda;
+}
+
+class sme_add_vector_to_tile_u64<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
diff --git a/test/MC/AArch64/SME/addha-diagnostics.s b/test/MC/AArch64/SME/addha-diagnostics.s
new file mode 100644
index 00000000000..5338622e3e9
--- /dev/null
+++ b/test/MC/AArch64/SME/addha-diagnostics.s
@@ -0,0 +1,52 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addha za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0h.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0h.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0v.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0v.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0p.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0p.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addha za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SME/addha-u32.s b/test/MC/AArch64/SME/addha-u32.s
new file mode 100644
index 00000000000..c65c0286f14
--- /dev/null
+++ b/test/MC/AArch64/SME/addha-u32.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha   za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addha   za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 90 c0 <unknown>
+
+addha   za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addha   za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 90 c0 <unknown>
+
+addha   za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addha   za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 90 c0 <unknown>
+
+addha   za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addha   za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 90 c0 <unknown>
+
+addha   za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addha   za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 90 c0 <unknown>
+
+addha   za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addha   za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 90 c0 <unknown>
+
+addha   za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addha   za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 90 c0 <unknown>
+
+addha   za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addha   za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 90 c0 <unknown>
+
+addha   za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addha   za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 90 c0 <unknown>
+
+addha   za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addha   za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 90 c0 <unknown>
+
+addha   za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addha   za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 90 c0 <unknown>
+
+addha   za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addha   za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 90 c0 <unknown>
diff --git a/test/MC/AArch64/SME/addha-u64.s b/test/MC/AArch64/SME/addha-u64.s
new file mode 100644
index 00000000000..32d50ebe56e
--- /dev/null
+++ b/test/MC/AArch64/SME/addha-u64.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha   za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addha   za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d0 c0 <unknown>
+
+addha   za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addha   za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d0 c0 <unknown>
+
+addha   za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addha   za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d0 c0 <unknown>
+
+addha   za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addha   za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d0 c0 <unknown>
+
+addha   za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addha   za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d0 c0 <unknown>
+
+addha   za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addha   za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d0 c0 <unknown>
+
+addha   za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addha   za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d0 c0 <unknown>
+
+addha   za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addha   za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d0 c0 <unknown>
+
+addha   za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addha   za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d0 c0 <unknown>
+
+addha   za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addha   za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d0 c0 <unknown>
+
+addha   za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addha   za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d0 c0 <unknown>
+
+addha   za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addha   za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d0 c0 <unknown>
diff --git a/test/MC/AArch64/SME/addva-diagnostics.s b/test/MC/AArch64/SME/addva-diagnostics.s
new file mode 100644
index 00000000000..e68217afdcb
--- /dev/null
+++ b/test/MC/AArch64/SME/addva-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addva za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addva za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/test/MC/AArch64/SME/addva-u32.s b/test/MC/AArch64/SME/addva-u32.s
new file mode 100644
index 00000000000..7ae2dc83192
--- /dev/null
+++ b/test/MC/AArch64/SME/addva-u32.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva   za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addva   za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 91 c0 <unknown>
+
+addva   za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addva   za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 91 c0 <unknown>
+
+addva   za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addva   za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 91 c0 <unknown>
+
+addva   za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addva   za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 91 c0 <unknown>
+
+addva   za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addva   za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 91 c0 <unknown>
+
+addva   za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addva   za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 91 c0 <unknown>
+
+addva   za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addva   za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 91 c0 <unknown>
+
+addva   za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addva   za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 91 c0 <unknown>
+
+addva   za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addva   za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 91 c0 <unknown>
+
+addva   za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addva   za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 91 c0 <unknown>
+
+addva   za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addva   za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 91 c0 <unknown>
+
+addva   za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addva   za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 91 c0 <unknown>
diff --git a/test/MC/AArch64/SME/addva-u64.s b/test/MC/AArch64/SME/addva-u64.s
new file mode 100644
index 00000000000..e425c4ac512
--- /dev/null
+++ b/test/MC/AArch64/SME/addva-u64.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva   za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addva   za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d1 c0 <unknown>
+
+addva   za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addva   za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d1 c0 <unknown>
+
+addva   za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addva   za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d1 c0 <unknown>
+
+addva   za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addva   za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d1 c0 <unknown>
+
+addva   za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addva   za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d1 c0 <unknown>
+
+addva   za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addva   za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d1 c0 <unknown>
+
+addva   za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addva   za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d1 c0 <unknown>
+
+addva   za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addva   za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d1 c0 <unknown>
+
+addva   za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addva   za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d1 c0 <unknown>
+
+addva   za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addva   za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d1 c0 <unknown>
+
+addva   za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addva   za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d1 c0 <unknown>
+
+addva   za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addva   za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d1 c0 <unknown>
diff --git a/unittests/Target/AArch64/CMakeLists.txt b/unittests/Target/AArch64/CMakeLists.txt
index d95793f3f3a..2a400a90ddc 100644
--- a/unittests/Target/AArch64/CMakeLists.txt
+++ b/unittests/Target/AArch64/CMakeLists.txt
@@ -20,4 +20,5 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_target_unittest(AArch64Tests
   InstSizes.cpp
   DecomposeStackOffsetTest.cpp
+  MatrixRegisterAliasing.cpp
   )
diff --git a/unittests/Target/AArch64/MatrixRegisterAliasing.cpp b/unittests/Target/AArch64/MatrixRegisterAliasing.cpp
new file mode 100644
index 00000000000..2f0d3717d03
--- /dev/null
+++ b/unittests/Target/AArch64/MatrixRegisterAliasing.cpp
@@ -0,0 +1,135 @@
+#include "AArch64Subtarget.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
+  auto TT(Triple::normalize("aarch64--"));
+  std::string CPU("generic");
+  std::string FS("+sme");
+
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64Target();
+  LLVMInitializeAArch64TargetMC();
+
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
+
+  return std::unique_ptr<LLVMTargetMachine>(
+      static_cast<LLVMTargetMachine *>(TheTarget->createTargetMachine(
+          TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default)));
+}
+
+std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
+  AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+                      std::string(TM->getTargetFeatureString()), *TM,
+                      /* isLittle */ false);
+  return std::make_unique<AArch64InstrInfo>(ST);
+}
+
+TEST(MatrixRegisterAliasing, Aliasing) {
+  std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
+  ASSERT_TRUE(TM);
+  std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
+
+  const AArch64RegisterInfo &TRI = II->getRegisterInfo();
+
+  // za overlaps with za.b
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZA, AArch64::ZAB0));
+
+  // za0.b overlaps with all tiles
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ15));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD7));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS3));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH1));
+
+  // za0.h aliases with za0.q, za2.q, ..
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ2));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ4));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ6));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ8));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ10));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ12));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ14));
+
+  // za1.h aliases with za1.q, za3.q, ...
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ3));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ5));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ7));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ9));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ11));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ13));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ15));
+
+  // za1.h doesn't alias with za0.q, za2.q, ..
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ2));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ4));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ6));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ8));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ10));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ12));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ14));
+
+  // za0.h doesn't alias with za1.q, za3.q, ..
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ3));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ5));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ7));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ9));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ11));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ13));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ15));
+
+  // za0.s aliases with za0.q, za4.q, za8.q, za12.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ4));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ8));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ12));
+
+  // za1.s aliases with za1.q, za5.q, za9.q, za13.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ5));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ9));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ13));
+
+  // za0.s doesn't alias with za1.q, za5.q, za9.q, za13.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ5));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ9));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ13));
+
+  // za1.s doesn't alias with za0.q, za4.q, za8.q, za12.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ4));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ8));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ12));
+
+  // za0.d aliases za0.q and za8.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ8));
+
+  // za1.d aliases za1.q and za9.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ9));
+
+  // za0.d doesn't alias with za1.q and za9.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ9));
+
+  // za1.d doesn't alias with za0.q and za8.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ8));
+}
+
+} // end anonymous namespace