[AArch64][SME] Add matrix register definitions and parsing support

SME introduces the ZA array, a new piece of architectural register state consisting of a matrix of [SVLb x SVLb] bytes, where SVL is the implementation defined Streaming SVE vector length and SVLb is the number of 8-bit elements in a vector of SVL bits. SME instructions consist of three types of matrix operands: * Tiles: a ZA tile is a square, two-dimensional sub-array of elements within the ZA array. These tiles make up the larger accumulator array and the granularity varies based on the element size, i.e. - ZAQ0..ZAQ15 (smallest tile granule) - ZAD0..ZAD7 - ZAS0..ZAS3 - ZAH0..ZAH1 or ZAB0 (largest tile granule, single tile) * Tile vectors: similar to regular tiles, but have an extra 'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile, horizontally or vertically. E.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and ZAQ15, respectively. * Accumulator matrix: this is the entire accumulator array ZA. This patch adds the register classes and related operands and parsing for SME instructions operating on the accumulator array. The ADDHA and ADDVA instructions which operate on tiles are also added in this patch to make some use of the code added, later patches will make use of the other operands introduced here. The reference can be found here: https://developer.arm.com/documentation/ddi0602/2021-06 Co-authored by: Sander de Smalen (@sdesmalen) Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D105570
2024-11-21 18:22:53 +01:00 · 2021-07-14 08:01:19 +00:00 · 2021-07-14 08:01:19 +00:00 · fcd9253fa0
commit fcd9253fa0
parent 09d83ebb79
16 changed files with 1174 additions and 2 deletions
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@ -648,6 +648,7 @@ let RecomputePerFunction = 1 in {

 include "AArch64InstrFormats.td"
 include "SVEInstrFormats.td"
+include "SMEInstrFormats.td"

 //===----------------------------------------------------------------------===//

@ -8115,5 +8116,5 @@ def StoreSwiftAsyncContext

 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
-
+include "AArch64SMEInstrInfo.td"
 include "AArch64InstrGISel.td"
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@ -45,6 +45,16 @@ let Namespace = "AArch64" in {
  def qsub1 : SubRegIndex<128>;
  def qsub2 : SubRegIndex<128>;
  def qsub3 : SubRegIndex<128>;
+  // Note: Code depends on these having consecutive numbers
+  def zasubb  : SubRegIndex<2048>; // (16 x 16)/1 bytes  = 2048 bits
+  def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes  = 1024 bits
+  def zasubs0 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubs1 : SubRegIndex<512>;  // (16 x 16)/4 bytes  = 512 bits
+  def zasubd0 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubd1 : SubRegIndex<256>;  // (16 x 16)/8 bytes  = 256 bits
+  def zasubq0 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
+  def zasubq1 : SubRegIndex<128>;  // (16 x 16)/16 bytes = 128 bits
 }

 let Namespace = "AArch64" in {
@ -1156,3 +1166,188 @@ foreach Scale = [8, 16, 32, 64] in {
  def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
  def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
 }
+
+// Accumulator array tiles.
+def ZAQ0  : AArch64Reg<0,  "za0.q">;
+def ZAQ1  : AArch64Reg<1,  "za1.q">;
+def ZAQ2  : AArch64Reg<2,  "za2.q">;
+def ZAQ3  : AArch64Reg<3,  "za3.q">;
+def ZAQ4  : AArch64Reg<4,  "za4.q">;
+def ZAQ5  : AArch64Reg<5,  "za5.q">;
+def ZAQ6  : AArch64Reg<6,  "za6.q">;
+def ZAQ7  : AArch64Reg<7,  "za7.q">;
+def ZAQ8  : AArch64Reg<8,  "za8.q">;
+def ZAQ9  : AArch64Reg<9,  "za9.q">;
+def ZAQ10 : AArch64Reg<10, "za10.q">;
+def ZAQ11 : AArch64Reg<11, "za11.q">;
+def ZAQ12 : AArch64Reg<12, "za12.q">;
+def ZAQ13 : AArch64Reg<13, "za13.q">;
+def ZAQ14 : AArch64Reg<14, "za14.q">;
+def ZAQ15 : AArch64Reg<15, "za15.q">;
+
+let SubRegIndices = [zasubq0, zasubq1] in {
+  def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
+  def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
+  def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
+  def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
+  def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
+  def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
+  def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
+  def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
+}
+
+let SubRegIndices = [zasubd0, zasubd1] in {
+  def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
+  def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
+  def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
+  def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
+}
+
+let SubRegIndices = [zasubs0, zasubs1] in {
+  def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
+  def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
+}
+
+let SubRegIndices = [zasubh0, zasubh1] in {
+  def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
+}
+
+let SubRegIndices = [zasubb] in {
+  def ZA : AArch64Reg<0, "za", [ZAB0]>;
+}
+
+// SME Register Classes
+
+// Accumulator array
+def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
+  let Size = 2048;
+}
+
+// Accumulator array as single tiles
+def MPR8    : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
+  let Size = 2048;
+}
+def MPR16   : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
+  let Size = 1024;
+}
+def MPR32   : RegisterClass<"AArch64", [untyped],  512, (add (sequence "ZAS%u", 0, 3))> {
+  let Size = 512;
+}
+def MPR64   : RegisterClass<"AArch64", [untyped],  256, (add (sequence "ZAD%u", 0, 7))> {
+  let Size = 256;
+}
+def MPR128  : RegisterClass<"AArch64", [untyped],  128, (add (sequence "ZAQ%u", 0, 15))> {
+  let Size = 128;
+}
+
+// SME Register Operands
+// There are three types of SME matrix register operands:
+// * Tiles:
+//
+//   These tiles make up the larger accumulator matrix. The tile representation
+//   has an element type suffix, e.g. za0.b or za15.q and can be any of the
+//   registers:
+//          ZAQ0..ZAQ15
+//          ZAD0..ZAD7
+//          ZAS0..ZAS3
+//          ZAH0..ZAH1
+//       or ZAB0
+//
+// * Tile vectors:
+//
+//   Their representation is similar to regular tiles, but they have an extra
+//   'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
+//   horizontally or vertically.
+//
+//   e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
+//   ZAQ15, respectively. The horizontal/vertical is more a property of the
+//   instruction, than a property of the asm-operand itself, or its register.
+//   The distinction is required for the parsing/printing of the operand,
+//   as from a compiler's perspective, the whole tile is read/written.
+//
+// * Accumulator matrix:
+//
+//   This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
+//   'za'.
+
+//
+// Tiles
+//
+
+class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "MatrixTile" # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Tile" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTile";
+}
+
+def TileOp32  : MatrixTileOperand<32, 2, MPR32>;
+def TileOp64  : MatrixTileOperand<64, 3, MPR64>;
+
+//
+// Tile vectors (horizontal and vertical)
+//
+
+class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
+    : AsmOperandClass {
+  let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::"
+                          # !if(IsVertical, "Col", "Row") # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
+                              RegisterClass RC, int IsVertical>
+    : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
+                                                    IsVertical>;
+  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
+  let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
+}
+
+def TileVectorOpH8   : MatrixTileVectorOperand<  8, 0, MPR8,   0>;
+def TileVectorOpH16  : MatrixTileVectorOperand< 16, 1, MPR16,  0>;
+def TileVectorOpH32  : MatrixTileVectorOperand< 32, 2, MPR32,  0>;
+def TileVectorOpH64  : MatrixTileVectorOperand< 64, 3, MPR64,  0>;
+def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
+
+def TileVectorOpV8   : MatrixTileVectorOperand<  8, 0, MPR8,   1>;
+def TileVectorOpV16  : MatrixTileVectorOperand< 16, 1, MPR16,  1>;
+def TileVectorOpV32  : MatrixTileVectorOperand< 32, 2, MPR32,  1>;
+def TileVectorOpV64  : MatrixTileVectorOperand< 64, 3, MPR64,  1>;
+def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
+
+//
+// Accumulator matrix
+//
+
+class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
+  let Name = "Matrix";
+  let DiagnosticType = "Invalid" # Name;
+  let ParserMethod = "tryParseMatrixRegister";
+  let RenderMethod = "addMatrixOperands";
+  let PredicateMethod = "isMatrixRegOperand<"
+                          # "MatrixKind::Array" # ", "
+                          # EltSize # ", AArch64::" # RC # "RegClassID>";
+}
+
+class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
+  let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
+  let PrintMethod = "printMatrix<" # EltSize # ">";
+}
+
+def MatrixOp : MatrixOperand<MPR, 0>;
--- a/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/lib/Target/AArch64/AArch64SMEInstrInfo.td
@ -0,0 +1,25 @@
+//=- AArch64SMEInstrInfo.td -  AArch64 SME Instructions -*- tablegen -*-----=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Add vector elements horizontally or vertically to ZA tile.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSME] in {
+def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
+def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
+}
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
+def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
+}
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@ -66,9 +66,12 @@ enum class RegKind {
  Scalar,
  NeonVector,
  SVEDataVector,
-  SVEPredicateVector
+  SVEPredicateVector,
+  Matrix
 };

+enum class MatrixKind { Array, Tile, Row, Col };
+
 enum RegConstraintEqualityTy {
  EqualsReg,
  EqualsSuperReg,
@ -229,6 +232,7 @@ private:
  OperandMatchResultTy tryParseScalarRegister(unsigned &Reg);
  OperandMatchResultTy tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
                                              RegKind MatchKind);
+  OperandMatchResultTy tryParseMatrixRegister(OperandVector &Operands);
  OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
  OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
  OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
@ -316,6 +320,7 @@ private:
    k_ShiftedImm,
    k_CondCode,
    k_Register,
+    k_MatrixRegister,
    k_VectorList,
    k_VectorIndex,
    k_Token,
@ -370,6 +375,12 @@ private:
    ShiftExtendOp ShiftExtend;
  };

+  struct MatrixRegOp {
+    unsigned RegNum;
+    unsigned ElementWidth;
+    MatrixKind Kind;
+  };
+
  struct VectorListOp {
    unsigned RegNum;
    unsigned Count;
@ -440,6 +451,7 @@ private:
  union {
    struct TokOp Tok;
    struct RegOp Reg;
+    struct MatrixRegOp MatrixReg;
    struct VectorListOp VectorList;
    struct VectorIndexOp VectorIndex;
    struct ImmOp Imm;
@ -488,6 +500,9 @@ public:
    case k_Register:
      Reg = o.Reg;
      break;
+    case k_MatrixRegister:
+      MatrixReg = o.MatrixReg;
+      break;
    case k_VectorList:
      VectorList = o.VectorList;
      break;
@ -580,6 +595,21 @@ public:
    return Reg.RegNum;
  }

+  unsigned getMatrixReg() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.RegNum;
+  }
+
+  unsigned getMatrixElementWidth() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.ElementWidth;
+  }
+
+  MatrixKind getMatrixKind() const {
+    assert(Kind == k_MatrixRegister && "Invalid access!");
+    return MatrixReg.Kind;
+  }
+
  RegConstraintEqualityTy getRegEqualityTy() const {
    assert(Kind == k_Register && "Invalid access!");
    return Reg.EqualityTy;
@ -1089,6 +1119,8 @@ public:
                Reg.RegNum));
  }

+  bool isMatrix() const { return Kind == k_MatrixRegister; }
+
  template <unsigned Class> bool isSVEVectorReg() const {
    RegKind RK;
    switch (Class) {
@ -1470,6 +1502,15 @@ public:
    return true;
  }

+  template <MatrixKind Kind, unsigned EltSize, unsigned RegClass>
+  DiagnosticPredicate isMatrixRegOperand() const {
+    if (isMatrix() && getMatrixKind() == Kind &&
+        AArch64MCRegisterClasses[RegClass].contains(getMatrixReg()) &&
+        EltSize == getMatrixElementWidth())
+      return DiagnosticPredicateTy::Match;
+    return DiagnosticPredicateTy::NoMatch;
+  }
+
  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
    // Add as immediates when possible.  Null MCExpr = 0.
    if (!Expr)
@ -1485,6 +1526,11 @@ public:
    Inst.addOperand(MCOperand::createReg(getReg()));
  }

+  void addMatrixOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getMatrixReg()));
+  }
+
  void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
    assert(N == 1 && "Invalid number of operands!");
    assert(
@ -2054,6 +2100,18 @@ public:
    return Op;
  }

+  static std::unique_ptr<AArch64Operand>
+  CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind,
+                       SMLoc S, SMLoc E, MCContext &Ctx) {
+    auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx);
+    Op->MatrixReg.RegNum = RegNum;
+    Op->MatrixReg.ElementWidth = ElementWidth;
+    Op->MatrixReg.Kind = Kind;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
  static std::unique_ptr<AArch64Operand>
  CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
                    bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@ -2132,6 +2190,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
  case k_BTIHint:
    OS << getBTIHintName();
    break;
+  case k_MatrixRegister:
+    OS << "<matrix " << getMatrixReg() << ">";
+    break;
  case k_Register:
    OS << "<register " << getReg() << ">";
    if (!getShiftExtendAmount() && !hasShiftExtendAmount())
@ -2229,6 +2290,7 @@ static Optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
    break;
  case RegKind::SVEPredicateVector:
  case RegKind::SVEDataVector:
+  case RegKind::Matrix:
    Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
              .Case("", {0, 0})
              .Case(".b", {0, 8})
@ -2310,6 +2372,105 @@ static unsigned matchSVEPredicateVectorRegName(StringRef Name) {
      .Default(0);
 }

+static unsigned matchMatrixRegName(StringRef Name) {
+  return StringSwitch<unsigned>(Name.lower())
+      .Case("za", AArch64::ZA)
+      .Case("za0.q", AArch64::ZAQ0)
+      .Case("za1.q", AArch64::ZAQ1)
+      .Case("za2.q", AArch64::ZAQ2)
+      .Case("za3.q", AArch64::ZAQ3)
+      .Case("za4.q", AArch64::ZAQ4)
+      .Case("za5.q", AArch64::ZAQ5)
+      .Case("za6.q", AArch64::ZAQ6)
+      .Case("za7.q", AArch64::ZAQ7)
+      .Case("za8.q", AArch64::ZAQ8)
+      .Case("za9.q", AArch64::ZAQ9)
+      .Case("za10.q", AArch64::ZAQ10)
+      .Case("za11.q", AArch64::ZAQ11)
+      .Case("za12.q", AArch64::ZAQ12)
+      .Case("za13.q", AArch64::ZAQ13)
+      .Case("za14.q", AArch64::ZAQ14)
+      .Case("za15.q", AArch64::ZAQ15)
+      .Case("za0.d", AArch64::ZAD0)
+      .Case("za1.d", AArch64::ZAD1)
+      .Case("za2.d", AArch64::ZAD2)
+      .Case("za3.d", AArch64::ZAD3)
+      .Case("za4.d", AArch64::ZAD4)
+      .Case("za5.d", AArch64::ZAD5)
+      .Case("za6.d", AArch64::ZAD6)
+      .Case("za7.d", AArch64::ZAD7)
+      .Case("za0.s", AArch64::ZAS0)
+      .Case("za1.s", AArch64::ZAS1)
+      .Case("za2.s", AArch64::ZAS2)
+      .Case("za3.s", AArch64::ZAS3)
+      .Case("za0.h", AArch64::ZAH0)
+      .Case("za1.h", AArch64::ZAH1)
+      .Case("za0.b", AArch64::ZAB0)
+      .Case("za0h.q", AArch64::ZAQ0)
+      .Case("za1h.q", AArch64::ZAQ1)
+      .Case("za2h.q", AArch64::ZAQ2)
+      .Case("za3h.q", AArch64::ZAQ3)
+      .Case("za4h.q", AArch64::ZAQ4)
+      .Case("za5h.q", AArch64::ZAQ5)
+      .Case("za6h.q", AArch64::ZAQ6)
+      .Case("za7h.q", AArch64::ZAQ7)
+      .Case("za8h.q", AArch64::ZAQ8)
+      .Case("za9h.q", AArch64::ZAQ9)
+      .Case("za10h.q", AArch64::ZAQ10)
+      .Case("za11h.q", AArch64::ZAQ11)
+      .Case("za12h.q", AArch64::ZAQ12)
+      .Case("za13h.q", AArch64::ZAQ13)
+      .Case("za14h.q", AArch64::ZAQ14)
+      .Case("za15h.q", AArch64::ZAQ15)
+      .Case("za0h.d", AArch64::ZAD0)
+      .Case("za1h.d", AArch64::ZAD1)
+      .Case("za2h.d", AArch64::ZAD2)
+      .Case("za3h.d", AArch64::ZAD3)
+      .Case("za4h.d", AArch64::ZAD4)
+      .Case("za5h.d", AArch64::ZAD5)
+      .Case("za6h.d", AArch64::ZAD6)
+      .Case("za7h.d", AArch64::ZAD7)
+      .Case("za0h.s", AArch64::ZAS0)
+      .Case("za1h.s", AArch64::ZAS1)
+      .Case("za2h.s", AArch64::ZAS2)
+      .Case("za3h.s", AArch64::ZAS3)
+      .Case("za0h.h", AArch64::ZAH0)
+      .Case("za1h.h", AArch64::ZAH1)
+      .Case("za0h.b", AArch64::ZAB0)
+      .Case("za0v.q", AArch64::ZAQ0)
+      .Case("za1v.q", AArch64::ZAQ1)
+      .Case("za2v.q", AArch64::ZAQ2)
+      .Case("za3v.q", AArch64::ZAQ3)
+      .Case("za4v.q", AArch64::ZAQ4)
+      .Case("za5v.q", AArch64::ZAQ5)
+      .Case("za6v.q", AArch64::ZAQ6)
+      .Case("za7v.q", AArch64::ZAQ7)
+      .Case("za8v.q", AArch64::ZAQ8)
+      .Case("za9v.q", AArch64::ZAQ9)
+      .Case("za10v.q", AArch64::ZAQ10)
+      .Case("za11v.q", AArch64::ZAQ11)
+      .Case("za12v.q", AArch64::ZAQ12)
+      .Case("za13v.q", AArch64::ZAQ13)
+      .Case("za14v.q", AArch64::ZAQ14)
+      .Case("za15v.q", AArch64::ZAQ15)
+      .Case("za0v.d", AArch64::ZAD0)
+      .Case("za1v.d", AArch64::ZAD1)
+      .Case("za2v.d", AArch64::ZAD2)
+      .Case("za3v.d", AArch64::ZAD3)
+      .Case("za4v.d", AArch64::ZAD4)
+      .Case("za5v.d", AArch64::ZAD5)
+      .Case("za6v.d", AArch64::ZAD6)
+      .Case("za7v.d", AArch64::ZAD7)
+      .Case("za0v.s", AArch64::ZAS0)
+      .Case("za1v.s", AArch64::ZAS1)
+      .Case("za2v.s", AArch64::ZAS2)
+      .Case("za3v.s", AArch64::ZAS3)
+      .Case("za0v.h", AArch64::ZAH0)
+      .Case("za1v.h", AArch64::ZAH1)
+      .Case("za0v.b", AArch64::ZAB0)
+      .Default(0);
+}
+
 bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                     SMLoc &EndLoc) {
  return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
@ -2337,6 +2498,9 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
  if ((RegNum = MatchNeonVectorRegName(Name)))
    return Kind == RegKind::NeonVector ? RegNum : 0;

+  if ((RegNum = matchMatrixRegName(Name)))
+    return Kind == RegKind::Matrix ? RegNum : 0;
+
  // The parsed register must be of RegKind Scalar
  if ((RegNum = MatchRegisterName(Name)))
    return Kind == RegKind::Scalar ? RegNum : 0;
@ -2809,6 +2973,54 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
  return false;
 }

+OperandMatchResultTy
+AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc S = getLoc();
+
+  StringRef Name = Tok.getString();
+
+  if (Name.equals_insensitive("za")) {
+    Parser.Lex(); // eat "za"
+    Operands.push_back(AArch64Operand::CreateMatrixRegister(
+        AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
+        getContext()));
+    return MatchOperand_Success;
+  }
+
+  // Try to parse matrix register.
+  unsigned Reg = matchRegisterNameAlias(Name, RegKind::Matrix);
+  if (!Reg)
+    return MatchOperand_NoMatch;
+
+  size_t DotPosition = Name.find('.');
+  assert(DotPosition != StringRef::npos && "Unexpected register");
+
+  StringRef Head = Name.take_front(DotPosition);
+  StringRef Tail = Name.drop_front(DotPosition);
+  StringRef RowOrColumn = Head.take_back();
+
+  MatrixKind Kind = StringSwitch<MatrixKind>(RowOrColumn)
+                        .Case("h", MatrixKind::Row)
+                        .Case("v", MatrixKind::Col)
+                        .Default(MatrixKind::Tile);
+
+  // Next up, parsing the suffix
+  const auto &KindRes = parseVectorKind(Tail, RegKind::Matrix);
+  if (!KindRes) {
+    TokError("Expected the register to be followed by element width suffix");
+    return MatchOperand_ParseFail;
+  }
+  unsigned ElementWidth = KindRes->second;
+
+  Parser.Lex();
+
+  Operands.push_back(AArch64Operand::CreateMatrixRegister(
+      Reg, ElementWidth, Kind, S, getLoc(), getContext()));
+  return MatchOperand_Success;
+}
+
 /// tryParseOptionalShift - Some operands take an optional shift argument. Parse
 /// them if present.
 OperandMatchResultTy
@ -4733,6 +4945,32 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
    return Error(Loc, "Invalid floating point constant, expected 0.5 or 2.0.");
  case Match_InvalidSVEExactFPImmOperandZeroOne:
    return Error(Loc, "Invalid floating point constant, expected 0.0 or 1.0.");
+  case Match_InvalidMatrixTileVectorH8:
+    return Error(Loc, "invalid matrix operand, expected za0h.b");
+  case Match_InvalidMatrixTileVectorH16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1]h.h");
+  case Match_InvalidMatrixTileVectorH32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3]h.s");
+  case Match_InvalidMatrixTileVectorH64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7]h.d");
+  case Match_InvalidMatrixTileVectorH128:
+    return Error(Loc, "invalid matrix operand, expected za[0-15]h.q");
+  case Match_InvalidMatrixTileVectorV8:
+    return Error(Loc, "invalid matrix operand, expected za0v.b");
+  case Match_InvalidMatrixTileVectorV16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1]v.h");
+  case Match_InvalidMatrixTileVectorV32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3]v.s");
+  case Match_InvalidMatrixTileVectorV64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7]v.d");
+  case Match_InvalidMatrixTileVectorV128:
+    return Error(Loc, "invalid matrix operand, expected za[0-15]v.q");
+  case Match_InvalidMatrixTile32:
+    return Error(Loc, "invalid matrix operand, expected za[0-3].s");
+  case Match_InvalidMatrixTile64:
+    return Error(Loc, "invalid matrix operand, expected za[0-7].d");
+  case Match_InvalidMatrix:
+    return Error(Loc, "invalid matrix operand, expected za");
  default:
    llvm_unreachable("unexpected error code!");
  }
@ -5251,6 +5489,19 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  case Match_InvalidSVEExactFPImmOperandHalfOne:
  case Match_InvalidSVEExactFPImmOperandHalfTwo:
  case Match_InvalidSVEExactFPImmOperandZeroOne:
+  case Match_InvalidMatrixTile32:
+  case Match_InvalidMatrixTile64:
+  case Match_InvalidMatrix:
+  case Match_InvalidMatrixTileVectorH8:
+  case Match_InvalidMatrixTileVectorH16:
+  case Match_InvalidMatrixTileVectorH32:
+  case Match_InvalidMatrixTileVectorH64:
+  case Match_InvalidMatrixTileVectorH128:
+  case Match_InvalidMatrixTileVectorV8:
+  case Match_InvalidMatrixTileVectorV16:
+  case Match_InvalidMatrixTileVectorV32:
+  case Match_InvalidMatrixTileVectorV64:
+  case Match_InvalidMatrixTileVectorV128:
  case Match_MSR:
  case Match_MRS: {
    if (ErrorInfo >= Operands.size())
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@ -111,6 +111,9 @@ static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
                                            uint64_t Address,
                                            const void *Decoder);
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+                                     uint64_t Address, const void *Decoder);
 static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                           uint64_t Address,
                                           const void *Decoder);
@ -642,6 +645,29 @@ static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
  return Success;
 }

+static const SmallVector<SmallVector<unsigned, 16>, 5>
+    MatrixZATileDecoderTable = {
+        {AArch64::ZAB0},
+        {AArch64::ZAH0, AArch64::ZAH1},
+        {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
+        {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3,
+         AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
+        {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3,
+         AArch64::ZAQ4, AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7,
+         AArch64::ZAQ8, AArch64::ZAQ9, AArch64::ZAQ10, AArch64::ZAQ11,
+         AArch64::ZAQ12, AArch64::ZAQ13, AArch64::ZAQ14, AArch64::ZAQ15}};
+
+template <unsigned NumBitsForTile>
+static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
+                                     uint64_t Address, const void *Decoder) {
+  unsigned LastReg = (1 << NumBitsForTile) - 1;
+  if (RegNo > LastReg)
+    return Fail;
+  Inst.addOperand(
+      MCOperand::createReg(MatrixZATileDecoderTable[NumBitsForTile][RegNo]));
+  return Success;
+}
+
 static const unsigned PPRDecoderTable[] = {
  AArch64::P0,  AArch64::P1,  AArch64::P2,  AArch64::P3,
  AArch64::P4,  AArch64::P5,  AArch64::P6,  AArch64::P7,
--- a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@ -880,6 +880,59 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
  return true;
 }

+template <int EltSize>
+void AArch64InstPrinter::printMatrix(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+
+  O << getRegisterName(RegOp.getReg());
+  switch (EltSize) {
+  case 0:
+    break;
+  case 8:
+    O << ".b";
+    break;
+  case 16:
+    O << ".h";
+    break;
+  case 32:
+    O << ".s";
+    break;
+  case 64:
+    O << ".d";
+    break;
+  case 128:
+    O << ".q";
+    break;
+  default:
+    llvm_unreachable("Unsupported element size");
+  }
+}
+
+template <bool IsVertical>
+void AArch64InstPrinter::printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+  StringRef RegName = getRegisterName(RegOp.getReg());
+
+  // Insert the horizontal/vertical flag before the suffix.
+  StringRef Base, Suffix;
+  std::tie(Base, Suffix) = RegName.split('.');
+  O << Base << (IsVertical ? "v" : "h") << '.' << Suffix;
+}
+
+void AArch64InstPrinter::printMatrixTile(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  const MCOperand &RegOp = MI->getOperand(OpNum);
+  assert(RegOp.isReg() && "Unexpected operand type!");
+  O << getRegisterName(RegOp.getReg());
+}
+
 void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                      const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
--- a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@ -187,6 +187,15 @@ protected:
                          const MCSubtargetInfo &STI, raw_ostream &O);
  void printSVEPattern(const MCInst *MI, unsigned OpNum,
                       const MCSubtargetInfo &STI, raw_ostream &O);
+
+  template <bool IsVertical>
+  void printMatrixTileVector(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMatrixTile(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  template <int EltSize>
+  void printMatrix(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
  template <char = 0>
  void printSVERegOp(const MCInst *MI, unsigned OpNum,
                    const MCSubtargetInfo &STI, raw_ostream &O);
--- a/lib/Target/AArch64/SMEInstrFormats.td
+++ b/lib/Target/AArch64/SMEInstrFormats.td
@ -0,0 +1,47 @@
+//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SME Add Vector to Tile
+//===----------------------------------------------------------------------===//
+
+class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
+                                  ZPRRegOp zpr_ty, string mnemonic>
+    : I<(outs tile_ty:$ZAda),
+        (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
+        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
+        "", []>, Sched<[]> {
+  bits<3> Pm;
+  bits<3> Pn;
+  bits<5> Zn;
+  let Inst{31-23} = 0b110000001;
+  let Inst{22}    = op;
+  let Inst{21-17} = 0b01000;
+  let Inst{16}    = V;
+  let Inst{15-13} = Pm;
+  let Inst{12-10} = Pn;
+  let Inst{9-5}   = Zn;
+  let Inst{4-3}   = 0b00;
+}
+
+class sme_add_vector_to_tile_u32<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
+  bits<2> ZAda;
+  let Inst{2}   = 0b0;
+  let Inst{1-0} = ZAda;
+}
+
+class sme_add_vector_to_tile_u64<bit V, string mnemonic>
+    : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
+  bits<3> ZAda;
+  let Inst{2-0} = ZAda;
+}
--- a/test/MC/AArch64/SME/addha-diagnostics.s
+++ b/test/MC/AArch64/SME/addha-diagnostics.s
@ -0,0 +1,52 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addha za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0h.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0h.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0v.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0v.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0p.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addha za0p.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addha za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addha za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addha za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- a/test/MC/AArch64/SME/addha-u32.s
+++ b/test/MC/AArch64/SME/addha-u32.s
@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha   za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addha   za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 90 c0 <unknown>
+
+addha   za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addha   za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 90 c0 <unknown>
+
+addha   za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addha   za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 90 c0 <unknown>
+
+addha   za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addha   za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 90 c0 <unknown>
+
+addha   za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addha   za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 90 c0 <unknown>
+
+addha   za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addha   za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 90 c0 <unknown>
+
+addha   za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addha   za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 90 c0 <unknown>
+
+addha   za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addha   za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 90 c0 <unknown>
+
+addha   za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addha   za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 90 c0 <unknown>
+
+addha   za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addha   za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 90 c0 <unknown>
+
+addha   za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addha   za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 90 c0 <unknown>
+
+addha   za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addha   za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x90,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 90 c0 <unknown>
--- a/test/MC/AArch64/SME/addha-u64.s
+++ b/test/MC/AArch64/SME/addha-u64.s
@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addha   za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addha   za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d0 c0 <unknown>
+
+addha   za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addha   za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d0 c0 <unknown>
+
+addha   za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addha   za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d0 c0 <unknown>
+
+addha   za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addha   za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d0 c0 <unknown>
+
+addha   za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addha   za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d0 c0 <unknown>
+
+addha   za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addha   za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d0 c0 <unknown>
+
+addha   za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addha   za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d0 c0 <unknown>
+
+addha   za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addha   za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d0 c0 <unknown>
+
+addha   za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addha   za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d0 c0 <unknown>
+
+addha   za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addha   za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d0 c0 <unknown>
+
+addha   za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addha   za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d0 c0 <unknown>
+
+addha   za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addha   za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd0,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d0 c0 <unknown>
--- a/test/MC/AArch64/SME/addva-diagnostics.s
+++ b/test/MC/AArch64/SME/addva-diagnostics.s
@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid tile
+
+addva za4.s, p0/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za4.s, p0/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za8.d, p0/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: addva za8.d, p0/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+addva za0.s, p8/m, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p8/m, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.s, p0/m, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.s, p0/m, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p8/m, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p8/m, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addva za0.d, p0/m, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: addva za0.d, p0/m, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- a/test/MC/AArch64/SME/addva-u32.s
+++ b/test/MC/AArch64/SME/addva-u32.s
@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva   za0.s, p0/m, p0/m, z0.s
+// CHECK-INST: addva   za0.s, p0/m, p0/m, z0.s
+// CHECK-ENCODING: [0x00,0x00,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 00 00 91 c0 <unknown>
+
+addva   za1.s, p5/m, p2/m, z10.s
+// CHECK-INST: addva   za1.s, p5/m, p2/m, z10.s
+// CHECK-ENCODING: [0x41,0x55,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 41 55 91 c0 <unknown>
+
+addva   za3.s, p3/m, p7/m, z13.s
+// CHECK-INST: addva   za3.s, p3/m, p7/m, z13.s
+// CHECK-ENCODING: [0xa3,0xed,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: a3 ed 91 c0 <unknown>
+
+addva   za3.s, p7/m, p7/m, z31.s
+// CHECK-INST: addva   za3.s, p7/m, p7/m, z31.s
+// CHECK-ENCODING: [0xe3,0xff,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: e3 ff 91 c0 <unknown>
+
+addva   za1.s, p3/m, p0/m, z17.s
+// CHECK-INST: addva   za1.s, p3/m, p0/m, z17.s
+// CHECK-ENCODING: [0x21,0x0e,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 0e 91 c0 <unknown>
+
+addva   za1.s, p1/m, p4/m, z1.s
+// CHECK-INST: addva   za1.s, p1/m, p4/m, z1.s
+// CHECK-ENCODING: [0x21,0x84,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 84 91 c0 <unknown>
+
+addva   za0.s, p5/m, p2/m, z19.s
+// CHECK-INST: addva   za0.s, p5/m, p2/m, z19.s
+// CHECK-ENCODING: [0x60,0x56,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 60 56 91 c0 <unknown>
+
+addva   za0.s, p6/m, p0/m, z12.s
+// CHECK-INST: addva   za0.s, p6/m, p0/m, z12.s
+// CHECK-ENCODING: [0x80,0x19,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 80 19 91 c0 <unknown>
+
+addva   za1.s, p2/m, p6/m, z1.s
+// CHECK-INST: addva   za1.s, p2/m, p6/m, z1.s
+// CHECK-ENCODING: [0x21,0xc8,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 21 c8 91 c0 <unknown>
+
+addva   za1.s, p2/m, p0/m, z22.s
+// CHECK-INST: addva   za1.s, p2/m, p0/m, z22.s
+// CHECK-ENCODING: [0xc1,0x0a,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: c1 0a 91 c0 <unknown>
+
+addva   za2.s, p5/m, p7/m, z9.s
+// CHECK-INST: addva   za2.s, p5/m, p7/m, z9.s
+// CHECK-ENCODING: [0x22,0xf5,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 22 f5 91 c0 <unknown>
+
+addva   za3.s, p2/m, p5/m, z12.s
+// CHECK-INST: addva   za3.s, p2/m, p5/m, z12.s
+// CHECK-ENCODING: [0x83,0xa9,0x91,0xc0]
+// CHECK-ERROR: instruction requires: sme
+// CHECK-UNKNOWN: 83 a9 91 c0 <unknown>
--- a/test/MC/AArch64/SME/addva-u64.s
+++ b/test/MC/AArch64/SME/addva-u64.s
@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+addva   za0.d, p0/m, p0/m, z0.d
+// CHECK-INST: addva   za0.d, p0/m, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0x00,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 00 00 d1 c0 <unknown>
+
+addva   za5.d, p5/m, p2/m, z10.d
+// CHECK-INST: addva   za5.d, p5/m, p2/m, z10.d
+// CHECK-ENCODING: [0x45,0x55,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 45 55 d1 c0 <unknown>
+
+addva   za7.d, p3/m, p7/m, z13.d
+// CHECK-INST: addva   za7.d, p3/m, p7/m, z13.d
+// CHECK-ENCODING: [0xa7,0xed,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: a7 ed d1 c0 <unknown>
+
+addva   za7.d, p7/m, p7/m, z31.d
+// CHECK-INST: addva   za7.d, p7/m, p7/m, z31.d
+// CHECK-ENCODING: [0xe7,0xff,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: e7 ff d1 c0 <unknown>
+
+addva   za5.d, p3/m, p0/m, z17.d
+// CHECK-INST: addva   za5.d, p3/m, p0/m, z17.d
+// CHECK-ENCODING: [0x25,0x0e,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 25 0e d1 c0 <unknown>
+
+addva   za1.d, p1/m, p4/m, z1.d
+// CHECK-INST: addva   za1.d, p1/m, p4/m, z1.d
+// CHECK-ENCODING: [0x21,0x84,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 84 d1 c0 <unknown>
+
+addva   za0.d, p5/m, p2/m, z19.d
+// CHECK-INST: addva   za0.d, p5/m, p2/m, z19.d
+// CHECK-ENCODING: [0x60,0x56,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 60 56 d1 c0 <unknown>
+
+addva   za0.d, p6/m, p0/m, z12.d
+// CHECK-INST: addva   za0.d, p6/m, p0/m, z12.d
+// CHECK-ENCODING: [0x80,0x19,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 80 19 d1 c0 <unknown>
+
+addva   za1.d, p2/m, p6/m, z1.d
+// CHECK-INST: addva   za1.d, p2/m, p6/m, z1.d
+// CHECK-ENCODING: [0x21,0xc8,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 21 c8 d1 c0 <unknown>
+
+addva   za5.d, p2/m, p0/m, z22.d
+// CHECK-INST: addva   za5.d, p2/m, p0/m, z22.d
+// CHECK-ENCODING: [0xc5,0x0a,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: c5 0a d1 c0 <unknown>
+
+addva   za2.d, p5/m, p7/m, z9.d
+// CHECK-INST: addva   za2.d, p5/m, p7/m, z9.d
+// CHECK-ENCODING: [0x22,0xf5,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 22 f5 d1 c0 <unknown>
+
+addva   za7.d, p2/m, p5/m, z12.d
+// CHECK-INST: addva   za7.d, p2/m, p5/m, z12.d
+// CHECK-ENCODING: [0x87,0xa9,0xd1,0xc0]
+// CHECK-ERROR: instruction requires: sme-i64
+// CHECK-UNKNOWN: 87 a9 d1 c0 <unknown>
--- a/unittests/Target/AArch64/CMakeLists.txt
+++ b/unittests/Target/AArch64/CMakeLists.txt
@ -20,4 +20,5 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_target_unittest(AArch64Tests
  InstSizes.cpp
  DecomposeStackOffsetTest.cpp
+  MatrixRegisterAliasing.cpp
  )
--- a/unittests/Target/AArch64/MatrixRegisterAliasing.cpp
+++ b/unittests/Target/AArch64/MatrixRegisterAliasing.cpp
@ -0,0 +1,135 @@
+#include "AArch64Subtarget.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
+  auto TT(Triple::normalize("aarch64--"));
+  std::string CPU("generic");
+  std::string FS("+sme");
+
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64Target();
+  LLVMInitializeAArch64TargetMC();
+
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
+
+  return std::unique_ptr<LLVMTargetMachine>(
+      static_cast<LLVMTargetMachine *>(TheTarget->createTargetMachine(
+          TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default)));
+}
+
+std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
+  AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+                      std::string(TM->getTargetFeatureString()), *TM,
+                      /* isLittle */ false);
+  return std::make_unique<AArch64InstrInfo>(ST);
+}
+
+TEST(MatrixRegisterAliasing, Aliasing) {
+  std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
+  ASSERT_TRUE(TM);
+  std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
+
+  const AArch64RegisterInfo &TRI = II->getRegisterInfo();
+
+  // za overlaps with za.b
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZA, AArch64::ZAB0));
+
+  // za0.b overlaps with all tiles
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ15));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD7));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS3));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH1));
+
+  // za0.h aliases with za0.q, za2.q, ..
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ2));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ4));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ6));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ8));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ10));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ12));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ14));
+
+  // za1.h aliases with za1.q, za3.q, ...
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ3));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ5));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ7));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ9));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ11));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ13));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ15));
+
+  // za1.h doesn't alias with za0.q, za2.q, ..
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ2));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ4));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ6));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ8));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ10));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ12));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ14));
+
+  // za0.h doesn't alias with za1.q, za3.q, ..
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ3));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ5));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ7));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ9));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ11));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ13));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ15));
+
+  // za0.s aliases with za0.q, za4.q, za8.q, za12.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ4));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ8));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ12));
+
+  // za1.s aliases with za1.q, za5.q, za9.q, za13.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ5));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ9));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ13));
+
+  // za0.s doesn't alias with za1.q, za5.q, za9.q, za13.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ5));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ9));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ13));
+
+  // za1.s doesn't alias with za0.q, za4.q, za8.q, za12.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ4));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ8));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ12));
+
+  // za0.d aliases za0.q and za8.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ0));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ8));
+
+  // za1.d aliases za1.q and za9.q
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ1));
+  ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ9));
+
+  // za0.d doesn't alias with za1.q and za9.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ1));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ9));
+
+  // za1.d doesn't alias with za0.q and za8.q
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ0));
+  ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ8));
+}
+
+} // end anonymous namespace