1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-21 18:22:53 +01:00

[AArch64][SME] Add matrix register definitions and parsing support

SME introduces the ZA array, a new piece of architectural register state
consisting of a matrix of [SVLb x SVLb] bytes, where SVL is the
implementation defined Streaming SVE vector length and SVLb is the
number of 8-bit elements in a vector of SVL bits.

SME instructions consist of three types of matrix operands:

  * Tiles: a ZA tile is a square, two-dimensional sub-array of elements
  within the ZA array. These tiles make up the larger accumulator array
  and the granularity varies based on the element size, i.e.
    - ZAQ0..ZAQ15 (smallest tile granule)
    - ZAD0..ZAD7
    - ZAS0..ZAS3
    - ZAH0..ZAH1
    or ZAB0       (largest tile granule, single tile)
  * Tile vectors: similar to regular tiles, but have an extra 'h' or 'v'
  to tell how the vector at [reg+offset] is layed out in the tile,
  horizontally or vertically. E.g. za1h.h or za15v.q, which corresponds
  to vectors in registers ZAH1 and ZAQ15, respectively.
  * Accumulator matrix: this is the entire accumulator array ZA.

This patch adds the register classes and related operands and parsing
for SME instructions operating on the accumulator array.

The ADDHA and ADDVA instructions which operate on tiles are also added
in this patch to make some use of the code added, later patches will
make use of the other operands introduced here.

The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06

Co-authored by: Sander de Smalen (@sdesmalen)

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D105570
This commit is contained in:
Cullen Rhodes 2021-07-14 08:01:19 +00:00
parent 09d83ebb79
commit fcd9253fa0
16 changed files with 1174 additions and 2 deletions

View File

@ -648,6 +648,7 @@ let RecomputePerFunction = 1 in {
include "AArch64InstrFormats.td"
include "SVEInstrFormats.td"
include "SMEInstrFormats.td"
//===----------------------------------------------------------------------===//
@ -8115,5 +8116,5 @@ def StoreSwiftAsyncContext
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
include "AArch64InstrGISel.td"

View File

@ -45,6 +45,16 @@ let Namespace = "AArch64" in {
def qsub1 : SubRegIndex<128>;
def qsub2 : SubRegIndex<128>;
def qsub3 : SubRegIndex<128>;
// Note: Code depends on these having consecutive numbers
def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits
def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits
def zasubs0 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits
def zasubs1 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits
def zasubd0 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits
def zasubd1 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits
def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits
}
let Namespace = "AArch64" in {
@ -1156,3 +1166,188 @@ foreach Scale = [8, 16, 32, 64] in {
def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
}
// Accumulator array tiles.
def ZAQ0 : AArch64Reg<0, "za0.q">;
def ZAQ1 : AArch64Reg<1, "za1.q">;
def ZAQ2 : AArch64Reg<2, "za2.q">;
def ZAQ3 : AArch64Reg<3, "za3.q">;
def ZAQ4 : AArch64Reg<4, "za4.q">;
def ZAQ5 : AArch64Reg<5, "za5.q">;
def ZAQ6 : AArch64Reg<6, "za6.q">;
def ZAQ7 : AArch64Reg<7, "za7.q">;
def ZAQ8 : AArch64Reg<8, "za8.q">;
def ZAQ9 : AArch64Reg<9, "za9.q">;
def ZAQ10 : AArch64Reg<10, "za10.q">;
def ZAQ11 : AArch64Reg<11, "za11.q">;
def ZAQ12 : AArch64Reg<12, "za12.q">;
def ZAQ13 : AArch64Reg<13, "za13.q">;
def ZAQ14 : AArch64Reg<14, "za14.q">;
def ZAQ15 : AArch64Reg<15, "za15.q">;
let SubRegIndices = [zasubq0, zasubq1] in {
def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>;
def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>;
def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>;
def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>;
def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>;
def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>;
def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>;
def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>;
}
let SubRegIndices = [zasubd0, zasubd1] in {
def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>;
def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>;
def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>;
def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>;
}
let SubRegIndices = [zasubs0, zasubs1] in {
def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>;
def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>;
}
let SubRegIndices = [zasubh0, zasubh1] in {
def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>;
}
let SubRegIndices = [zasubb] in {
def ZA : AArch64Reg<0, "za", [ZAB0]>;
}
// SME Register Classes
// Accumulator array
def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
let Size = 2048;
}
// Accumulator array as single tiles
def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
let Size = 2048;
}
def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
let Size = 1024;
}
def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> {
let Size = 512;
}
def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> {
let Size = 256;
}
def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> {
let Size = 128;
}
// SME Register Operands
// There are three types of SME matrix register operands:
// * Tiles:
//
// These tiles make up the larger accumulator matrix. The tile representation
// has an element type suffix, e.g. za0.b or za15.q and can be any of the
// registers:
// ZAQ0..ZAQ15
// ZAD0..ZAD7
// ZAS0..ZAS3
// ZAH0..ZAH1
// or ZAB0
//
// * Tile vectors:
//
// Their representation is similar to regular tiles, but they have an extra
// 'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile,
// horizontally or vertically.
//
// e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and
// ZAQ15, respectively. The horizontal/vertical is more a property of the
// instruction, than a property of the asm-operand itself, or its register.
// The distinction is required for the parsing/printing of the operand,
// as from a compiler's perspective, the whole tile is read/written.
//
// * Accumulator matrix:
//
// This is the entire matrix accumulator register ZA (<=> ZAB0), printed as
// 'za'.
//
// Tiles
//
class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
let Name = "MatrixTile" # EltSize;
let DiagnosticType = "Invalid" # Name;
let ParserMethod = "tryParseMatrixRegister";
let RenderMethod = "addMatrixOperands";
let PredicateMethod = "isMatrixRegOperand<"
# "MatrixKind::Tile" # ", "
# EltSize # ", AArch64::" # RC # "RegClassID>";
}
class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
: RegisterOperand<RC> {
let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
let PrintMethod = "printMatrixTile";
}
def TileOp32 : MatrixTileOperand<32, 2, MPR32>;
def TileOp64 : MatrixTileOperand<64, 3, MPR64>;
//
// Tile vectors (horizontal and vertical)
//
class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
: AsmOperandClass {
let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize;
let DiagnosticType = "Invalid" # Name;
let ParserMethod = "tryParseMatrixRegister";
let RenderMethod = "addMatrixOperands";
let PredicateMethod = "isMatrixRegOperand<"
# "MatrixKind::"
# !if(IsVertical, "Col", "Row") # ", "
# EltSize # ", AArch64::" # RC # "RegClassID>";
}
class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
RegisterClass RC, int IsVertical>
: RegisterOperand<RC> {
let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
IsVertical>;
let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
}
def TileVectorOpH8 : MatrixTileVectorOperand< 8, 0, MPR8, 0>;
def TileVectorOpH16 : MatrixTileVectorOperand< 16, 1, MPR16, 0>;
def TileVectorOpH32 : MatrixTileVectorOperand< 32, 2, MPR32, 0>;
def TileVectorOpH64 : MatrixTileVectorOperand< 64, 3, MPR64, 0>;
def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
def TileVectorOpV8 : MatrixTileVectorOperand< 8, 0, MPR8, 1>;
def TileVectorOpV16 : MatrixTileVectorOperand< 16, 1, MPR16, 1>;
def TileVectorOpV32 : MatrixTileVectorOperand< 32, 2, MPR32, 1>;
def TileVectorOpV64 : MatrixTileVectorOperand< 64, 3, MPR64, 1>;
def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
//
// Accumulator matrix
//
class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
let Name = "Matrix";
let DiagnosticType = "Invalid" # Name;
let ParserMethod = "tryParseMatrixRegister";
let RenderMethod = "addMatrixOperands";
let PredicateMethod = "isMatrixRegOperand<"
# "MatrixKind::Array" # ", "
# EltSize # ", AArch64::" # RC # "RegClassID>";
}
class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
let ParserMatchClass = MatrixAsmOperand<!cast<string>(RC), EltSize>;
let PrintMethod = "printMatrix<" # EltSize # ">";
}
def MatrixOp : MatrixOperand<MPR, 0>;

View File

@ -0,0 +1,25 @@
//=- AArch64SMEInstrInfo.td - AArch64 SME Instructions -*- tablegen -*-----=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Add vector elements horizontally or vertically to ZA tile.
//===----------------------------------------------------------------------===//
let Predicates = [HasSME] in {
def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
}
let Predicates = [HasSMEI64] in {
def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
}

View File

@ -66,9 +66,12 @@ enum class RegKind {
Scalar,
NeonVector,
SVEDataVector,
SVEPredicateVector
SVEPredicateVector,
Matrix
};
enum class MatrixKind { Array, Tile, Row, Col };
enum RegConstraintEqualityTy {
EqualsReg,
EqualsSuperReg,
@ -229,6 +232,7 @@ private:
OperandMatchResultTy tryParseScalarRegister(unsigned &Reg);
OperandMatchResultTy tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
RegKind MatchKind);
OperandMatchResultTy tryParseMatrixRegister(OperandVector &Operands);
OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
@ -316,6 +320,7 @@ private:
k_ShiftedImm,
k_CondCode,
k_Register,
k_MatrixRegister,
k_VectorList,
k_VectorIndex,
k_Token,
@ -370,6 +375,12 @@ private:
ShiftExtendOp ShiftExtend;
};
struct MatrixRegOp {
unsigned RegNum;
unsigned ElementWidth;
MatrixKind Kind;
};
struct VectorListOp {
unsigned RegNum;
unsigned Count;
@ -440,6 +451,7 @@ private:
union {
struct TokOp Tok;
struct RegOp Reg;
struct MatrixRegOp MatrixReg;
struct VectorListOp VectorList;
struct VectorIndexOp VectorIndex;
struct ImmOp Imm;
@ -488,6 +500,9 @@ public:
case k_Register:
Reg = o.Reg;
break;
case k_MatrixRegister:
MatrixReg = o.MatrixReg;
break;
case k_VectorList:
VectorList = o.VectorList;
break;
@ -580,6 +595,21 @@ public:
return Reg.RegNum;
}
unsigned getMatrixReg() const {
assert(Kind == k_MatrixRegister && "Invalid access!");
return MatrixReg.RegNum;
}
unsigned getMatrixElementWidth() const {
assert(Kind == k_MatrixRegister && "Invalid access!");
return MatrixReg.ElementWidth;
}
MatrixKind getMatrixKind() const {
assert(Kind == k_MatrixRegister && "Invalid access!");
return MatrixReg.Kind;
}
RegConstraintEqualityTy getRegEqualityTy() const {
assert(Kind == k_Register && "Invalid access!");
return Reg.EqualityTy;
@ -1089,6 +1119,8 @@ public:
Reg.RegNum));
}
bool isMatrix() const { return Kind == k_MatrixRegister; }
template <unsigned Class> bool isSVEVectorReg() const {
RegKind RK;
switch (Class) {
@ -1470,6 +1502,15 @@ public:
return true;
}
template <MatrixKind Kind, unsigned EltSize, unsigned RegClass>
DiagnosticPredicate isMatrixRegOperand() const {
if (isMatrix() && getMatrixKind() == Kind &&
AArch64MCRegisterClasses[RegClass].contains(getMatrixReg()) &&
EltSize == getMatrixElementWidth())
return DiagnosticPredicateTy::Match;
return DiagnosticPredicateTy::NoMatch;
}
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
@ -1485,6 +1526,11 @@ public:
Inst.addOperand(MCOperand::createReg(getReg()));
}
void addMatrixOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getMatrixReg()));
}
void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
assert(
@ -2054,6 +2100,18 @@ public:
return Op;
}
static std::unique_ptr<AArch64Operand>
CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind,
SMLoc S, SMLoc E, MCContext &Ctx) {
auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx);
Op->MatrixReg.RegNum = RegNum;
Op->MatrixReg.ElementWidth = ElementWidth;
Op->MatrixReg.Kind = Kind;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
static std::unique_ptr<AArch64Operand>
CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@ -2132,6 +2190,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
case k_BTIHint:
OS << getBTIHintName();
break;
case k_MatrixRegister:
OS << "<matrix " << getMatrixReg() << ">";
break;
case k_Register:
OS << "<register " << getReg() << ">";
if (!getShiftExtendAmount() && !hasShiftExtendAmount())
@ -2229,6 +2290,7 @@ static Optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
break;
case RegKind::SVEPredicateVector:
case RegKind::SVEDataVector:
case RegKind::Matrix:
Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
.Case("", {0, 0})
.Case(".b", {0, 8})
@ -2310,6 +2372,105 @@ static unsigned matchSVEPredicateVectorRegName(StringRef Name) {
.Default(0);
}
static unsigned matchMatrixRegName(StringRef Name) {
return StringSwitch<unsigned>(Name.lower())
.Case("za", AArch64::ZA)
.Case("za0.q", AArch64::ZAQ0)
.Case("za1.q", AArch64::ZAQ1)
.Case("za2.q", AArch64::ZAQ2)
.Case("za3.q", AArch64::ZAQ3)
.Case("za4.q", AArch64::ZAQ4)
.Case("za5.q", AArch64::ZAQ5)
.Case("za6.q", AArch64::ZAQ6)
.Case("za7.q", AArch64::ZAQ7)
.Case("za8.q", AArch64::ZAQ8)
.Case("za9.q", AArch64::ZAQ9)
.Case("za10.q", AArch64::ZAQ10)
.Case("za11.q", AArch64::ZAQ11)
.Case("za12.q", AArch64::ZAQ12)
.Case("za13.q", AArch64::ZAQ13)
.Case("za14.q", AArch64::ZAQ14)
.Case("za15.q", AArch64::ZAQ15)
.Case("za0.d", AArch64::ZAD0)
.Case("za1.d", AArch64::ZAD1)
.Case("za2.d", AArch64::ZAD2)
.Case("za3.d", AArch64::ZAD3)
.Case("za4.d", AArch64::ZAD4)
.Case("za5.d", AArch64::ZAD5)
.Case("za6.d", AArch64::ZAD6)
.Case("za7.d", AArch64::ZAD7)
.Case("za0.s", AArch64::ZAS0)
.Case("za1.s", AArch64::ZAS1)
.Case("za2.s", AArch64::ZAS2)
.Case("za3.s", AArch64::ZAS3)
.Case("za0.h", AArch64::ZAH0)
.Case("za1.h", AArch64::ZAH1)
.Case("za0.b", AArch64::ZAB0)
.Case("za0h.q", AArch64::ZAQ0)
.Case("za1h.q", AArch64::ZAQ1)
.Case("za2h.q", AArch64::ZAQ2)
.Case("za3h.q", AArch64::ZAQ3)
.Case("za4h.q", AArch64::ZAQ4)
.Case("za5h.q", AArch64::ZAQ5)
.Case("za6h.q", AArch64::ZAQ6)
.Case("za7h.q", AArch64::ZAQ7)
.Case("za8h.q", AArch64::ZAQ8)
.Case("za9h.q", AArch64::ZAQ9)
.Case("za10h.q", AArch64::ZAQ10)
.Case("za11h.q", AArch64::ZAQ11)
.Case("za12h.q", AArch64::ZAQ12)
.Case("za13h.q", AArch64::ZAQ13)
.Case("za14h.q", AArch64::ZAQ14)
.Case("za15h.q", AArch64::ZAQ15)
.Case("za0h.d", AArch64::ZAD0)
.Case("za1h.d", AArch64::ZAD1)
.Case("za2h.d", AArch64::ZAD2)
.Case("za3h.d", AArch64::ZAD3)
.Case("za4h.d", AArch64::ZAD4)
.Case("za5h.d", AArch64::ZAD5)
.Case("za6h.d", AArch64::ZAD6)
.Case("za7h.d", AArch64::ZAD7)
.Case("za0h.s", AArch64::ZAS0)
.Case("za1h.s", AArch64::ZAS1)
.Case("za2h.s", AArch64::ZAS2)
.Case("za3h.s", AArch64::ZAS3)
.Case("za0h.h", AArch64::ZAH0)
.Case("za1h.h", AArch64::ZAH1)
.Case("za0h.b", AArch64::ZAB0)
.Case("za0v.q", AArch64::ZAQ0)
.Case("za1v.q", AArch64::ZAQ1)
.Case("za2v.q", AArch64::ZAQ2)
.Case("za3v.q", AArch64::ZAQ3)
.Case("za4v.q", AArch64::ZAQ4)
.Case("za5v.q", AArch64::ZAQ5)
.Case("za6v.q", AArch64::ZAQ6)
.Case("za7v.q", AArch64::ZAQ7)
.Case("za8v.q", AArch64::ZAQ8)
.Case("za9v.q", AArch64::ZAQ9)
.Case("za10v.q", AArch64::ZAQ10)
.Case("za11v.q", AArch64::ZAQ11)
.Case("za12v.q", AArch64::ZAQ12)
.Case("za13v.q", AArch64::ZAQ13)
.Case("za14v.q", AArch64::ZAQ14)
.Case("za15v.q", AArch64::ZAQ15)
.Case("za0v.d", AArch64::ZAD0)
.Case("za1v.d", AArch64::ZAD1)
.Case("za2v.d", AArch64::ZAD2)
.Case("za3v.d", AArch64::ZAD3)
.Case("za4v.d", AArch64::ZAD4)
.Case("za5v.d", AArch64::ZAD5)
.Case("za6v.d", AArch64::ZAD6)
.Case("za7v.d", AArch64::ZAD7)
.Case("za0v.s", AArch64::ZAS0)
.Case("za1v.s", AArch64::ZAS1)
.Case("za2v.s", AArch64::ZAS2)
.Case("za3v.s", AArch64::ZAS3)
.Case("za0v.h", AArch64::ZAH0)
.Case("za1v.h", AArch64::ZAH1)
.Case("za0v.b", AArch64::ZAB0)
.Default(0);
}
bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
@ -2337,6 +2498,9 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
if ((RegNum = MatchNeonVectorRegName(Name)))
return Kind == RegKind::NeonVector ? RegNum : 0;
if ((RegNum = matchMatrixRegName(Name)))
return Kind == RegKind::Matrix ? RegNum : 0;
// The parsed register must be of RegKind Scalar
if ((RegNum = MatchRegisterName(Name)))
return Kind == RegKind::Scalar ? RegNum : 0;
@ -2809,6 +2973,54 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
return false;
}
OperandMatchResultTy
AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = getLoc();
StringRef Name = Tok.getString();
if (Name.equals_insensitive("za")) {
Parser.Lex(); // eat "za"
Operands.push_back(AArch64Operand::CreateMatrixRegister(
AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
getContext()));
return MatchOperand_Success;
}
// Try to parse matrix register.
unsigned Reg = matchRegisterNameAlias(Name, RegKind::Matrix);
if (!Reg)
return MatchOperand_NoMatch;
size_t DotPosition = Name.find('.');
assert(DotPosition != StringRef::npos && "Unexpected register");
StringRef Head = Name.take_front(DotPosition);
StringRef Tail = Name.drop_front(DotPosition);
StringRef RowOrColumn = Head.take_back();
MatrixKind Kind = StringSwitch<MatrixKind>(RowOrColumn)
.Case("h", MatrixKind::Row)
.Case("v", MatrixKind::Col)
.Default(MatrixKind::Tile);
// Next up, parsing the suffix
const auto &KindRes = parseVectorKind(Tail, RegKind::Matrix);
if (!KindRes) {
TokError("Expected the register to be followed by element width suffix");
return MatchOperand_ParseFail;
}
unsigned ElementWidth = KindRes->second;
Parser.Lex();
Operands.push_back(AArch64Operand::CreateMatrixRegister(
Reg, ElementWidth, Kind, S, getLoc(), getContext()));
return MatchOperand_Success;
}
/// tryParseOptionalShift - Some operands take an optional shift argument. Parse
/// them if present.
OperandMatchResultTy
@ -4733,6 +4945,32 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "Invalid floating point constant, expected 0.5 or 2.0.");
case Match_InvalidSVEExactFPImmOperandZeroOne:
return Error(Loc, "Invalid floating point constant, expected 0.0 or 1.0.");
case Match_InvalidMatrixTileVectorH8:
return Error(Loc, "invalid matrix operand, expected za0h.b");
case Match_InvalidMatrixTileVectorH16:
return Error(Loc, "invalid matrix operand, expected za[0-1]h.h");
case Match_InvalidMatrixTileVectorH32:
return Error(Loc, "invalid matrix operand, expected za[0-3]h.s");
case Match_InvalidMatrixTileVectorH64:
return Error(Loc, "invalid matrix operand, expected za[0-7]h.d");
case Match_InvalidMatrixTileVectorH128:
return Error(Loc, "invalid matrix operand, expected za[0-15]h.q");
case Match_InvalidMatrixTileVectorV8:
return Error(Loc, "invalid matrix operand, expected za0v.b");
case Match_InvalidMatrixTileVectorV16:
return Error(Loc, "invalid matrix operand, expected za[0-1]v.h");
case Match_InvalidMatrixTileVectorV32:
return Error(Loc, "invalid matrix operand, expected za[0-3]v.s");
case Match_InvalidMatrixTileVectorV64:
return Error(Loc, "invalid matrix operand, expected za[0-7]v.d");
case Match_InvalidMatrixTileVectorV128:
return Error(Loc, "invalid matrix operand, expected za[0-15]v.q");
case Match_InvalidMatrixTile32:
return Error(Loc, "invalid matrix operand, expected za[0-3].s");
case Match_InvalidMatrixTile64:
return Error(Loc, "invalid matrix operand, expected za[0-7].d");
case Match_InvalidMatrix:
return Error(Loc, "invalid matrix operand, expected za");
default:
llvm_unreachable("unexpected error code!");
}
@ -5251,6 +5489,19 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSVEExactFPImmOperandHalfOne:
case Match_InvalidSVEExactFPImmOperandHalfTwo:
case Match_InvalidSVEExactFPImmOperandZeroOne:
case Match_InvalidMatrixTile32:
case Match_InvalidMatrixTile64:
case Match_InvalidMatrix:
case Match_InvalidMatrixTileVectorH8:
case Match_InvalidMatrixTileVectorH16:
case Match_InvalidMatrixTileVectorH32:
case Match_InvalidMatrixTileVectorH64:
case Match_InvalidMatrixTileVectorH128:
case Match_InvalidMatrixTileVectorV8:
case Match_InvalidMatrixTileVectorV16:
case Match_InvalidMatrixTileVectorV32:
case Match_InvalidMatrixTileVectorV64:
case Match_InvalidMatrixTileVectorV128:
case Match_MSR:
case Match_MRS: {
if (ErrorInfo >= Operands.size())

View File

@ -111,6 +111,9 @@ static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
template <unsigned NumBitsForTile>
static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@ -642,6 +645,29 @@ static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
static const SmallVector<SmallVector<unsigned, 16>, 5>
MatrixZATileDecoderTable = {
{AArch64::ZAB0},
{AArch64::ZAH0, AArch64::ZAH1},
{AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
{AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3,
AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
{AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3,
AArch64::ZAQ4, AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7,
AArch64::ZAQ8, AArch64::ZAQ9, AArch64::ZAQ10, AArch64::ZAQ11,
AArch64::ZAQ12, AArch64::ZAQ13, AArch64::ZAQ14, AArch64::ZAQ15}};
template <unsigned NumBitsForTile>
static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
unsigned LastReg = (1 << NumBitsForTile) - 1;
if (RegNo > LastReg)
return Fail;
Inst.addOperand(
MCOperand::createReg(MatrixZATileDecoderTable[NumBitsForTile][RegNo]));
return Success;
}
static const unsigned PPRDecoderTable[] = {
AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3,
AArch64::P4, AArch64::P5, AArch64::P6, AArch64::P7,

View File

@ -880,6 +880,59 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
return true;
}
template <int EltSize>
void AArch64InstPrinter::printMatrix(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &RegOp = MI->getOperand(OpNum);
assert(RegOp.isReg() && "Unexpected operand type!");
O << getRegisterName(RegOp.getReg());
switch (EltSize) {
case 0:
break;
case 8:
O << ".b";
break;
case 16:
O << ".h";
break;
case 32:
O << ".s";
break;
case 64:
O << ".d";
break;
case 128:
O << ".q";
break;
default:
llvm_unreachable("Unsupported element size");
}
}
template <bool IsVertical>
void AArch64InstPrinter::printMatrixTileVector(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &RegOp = MI->getOperand(OpNum);
assert(RegOp.isReg() && "Unexpected operand type!");
StringRef RegName = getRegisterName(RegOp.getReg());
// Insert the horizontal/vertical flag before the suffix.
StringRef Base, Suffix;
std::tie(Base, Suffix) = RegName.split('.');
O << Base << (IsVertical ? "v" : "h") << '.' << Suffix;
}
void AArch64InstPrinter::printMatrixTile(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &RegOp = MI->getOperand(OpNum);
assert(RegOp.isReg() && "Unexpected operand type!");
O << getRegisterName(RegOp.getReg());
}
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {

View File

@ -187,6 +187,15 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSVEPattern(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
template <bool IsVertical>
void printMatrixTileVector(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printMatrixTile(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
template <int EltSize>
void printMatrix(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
raw_ostream &O);
template <char = 0>
void printSVERegOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);

View File

@ -0,0 +1,47 @@
//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SME Add Vector to Tile
//===----------------------------------------------------------------------===//
class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs tile_ty:$ZAda),
(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
"", []>, Sched<[]> {
bits<3> Pm;
bits<3> Pn;
bits<5> Zn;
let Inst{31-23} = 0b110000001;
let Inst{22} = op;
let Inst{21-17} = 0b01000;
let Inst{16} = V;
let Inst{15-13} = Pm;
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4-3} = 0b00;
}
class sme_add_vector_to_tile_u32<bit V, string mnemonic>
: sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
bits<2> ZAda;
let Inst{2} = 0b0;
let Inst{1-0} = ZAda;
}
class sme_add_vector_to_tile_u64<bit V, string mnemonic>
: sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
bits<3> ZAda;
let Inst{2-0} = ZAda;
}

View File

@ -0,0 +1,52 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
// ------------------------------------------------------------------------- //
// Invalid tile
addha za4.s, p0/m, p0/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: addha za4.s, p0/m, p0/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addha za8.d, p0/m, p0/m, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: addha za8.d, p0/m, p0/m, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addha za0h.s, p0/m, p0/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: addha za0h.s, p0/m, p0/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addha za0v.s, p0/m, p0/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: addha za0v.s, p0/m, p0/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addha za0p.s, p0/m, p0/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: addha za0p.s, p0/m, p0/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid predicate
addha za0.s, p8/m, p0/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addha za0.s, p8/m, p0/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addha za0.s, p0/m, p8/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addha za0.s, p0/m, p8/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addha za0.d, p8/m, p0/m, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addha za0.d, p8/m, p0/m, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addha za0.d, p0/m, p8/m, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addha za0.d, p0/m, p8/m, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,85 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
// RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
addha za0.s, p0/m, p0/m, z0.s
// CHECK-INST: addha za0.s, p0/m, p0/m, z0.s
// CHECK-ENCODING: [0x00,0x00,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 00 00 90 c0 <unknown>
addha za1.s, p5/m, p2/m, z10.s
// CHECK-INST: addha za1.s, p5/m, p2/m, z10.s
// CHECK-ENCODING: [0x41,0x55,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 41 55 90 c0 <unknown>
addha za3.s, p3/m, p7/m, z13.s
// CHECK-INST: addha za3.s, p3/m, p7/m, z13.s
// CHECK-ENCODING: [0xa3,0xed,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: a3 ed 90 c0 <unknown>
addha za3.s, p7/m, p7/m, z31.s
// CHECK-INST: addha za3.s, p7/m, p7/m, z31.s
// CHECK-ENCODING: [0xe3,0xff,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: e3 ff 90 c0 <unknown>
addha za1.s, p3/m, p0/m, z17.s
// CHECK-INST: addha za1.s, p3/m, p0/m, z17.s
// CHECK-ENCODING: [0x21,0x0e,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 21 0e 90 c0 <unknown>
addha za1.s, p1/m, p4/m, z1.s
// CHECK-INST: addha za1.s, p1/m, p4/m, z1.s
// CHECK-ENCODING: [0x21,0x84,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 21 84 90 c0 <unknown>
addha za0.s, p5/m, p2/m, z19.s
// CHECK-INST: addha za0.s, p5/m, p2/m, z19.s
// CHECK-ENCODING: [0x60,0x56,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 60 56 90 c0 <unknown>
addha za0.s, p6/m, p0/m, z12.s
// CHECK-INST: addha za0.s, p6/m, p0/m, z12.s
// CHECK-ENCODING: [0x80,0x19,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 80 19 90 c0 <unknown>
addha za1.s, p2/m, p6/m, z1.s
// CHECK-INST: addha za1.s, p2/m, p6/m, z1.s
// CHECK-ENCODING: [0x21,0xc8,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 21 c8 90 c0 <unknown>
addha za1.s, p2/m, p0/m, z22.s
// CHECK-INST: addha za1.s, p2/m, p0/m, z22.s
// CHECK-ENCODING: [0xc1,0x0a,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: c1 0a 90 c0 <unknown>
addha za2.s, p5/m, p7/m, z9.s
// CHECK-INST: addha za2.s, p5/m, p7/m, z9.s
// CHECK-ENCODING: [0x22,0xf5,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 22 f5 90 c0 <unknown>
addha za3.s, p2/m, p5/m, z12.s
// CHECK-INST: addha za3.s, p2/m, p5/m, z12.s
// CHECK-ENCODING: [0x83,0xa9,0x90,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 83 a9 90 c0 <unknown>

View File

@ -0,0 +1,85 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
// RUN: | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
addha za0.d, p0/m, p0/m, z0.d
// CHECK-INST: addha za0.d, p0/m, p0/m, z0.d
// CHECK-ENCODING: [0x00,0x00,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 00 00 d0 c0 <unknown>
addha za5.d, p5/m, p2/m, z10.d
// CHECK-INST: addha za5.d, p5/m, p2/m, z10.d
// CHECK-ENCODING: [0x45,0x55,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 45 55 d0 c0 <unknown>
addha za7.d, p3/m, p7/m, z13.d
// CHECK-INST: addha za7.d, p3/m, p7/m, z13.d
// CHECK-ENCODING: [0xa7,0xed,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: a7 ed d0 c0 <unknown>
addha za7.d, p7/m, p7/m, z31.d
// CHECK-INST: addha za7.d, p7/m, p7/m, z31.d
// CHECK-ENCODING: [0xe7,0xff,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: e7 ff d0 c0 <unknown>
addha za5.d, p3/m, p0/m, z17.d
// CHECK-INST: addha za5.d, p3/m, p0/m, z17.d
// CHECK-ENCODING: [0x25,0x0e,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 25 0e d0 c0 <unknown>
addha za1.d, p1/m, p4/m, z1.d
// CHECK-INST: addha za1.d, p1/m, p4/m, z1.d
// CHECK-ENCODING: [0x21,0x84,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 21 84 d0 c0 <unknown>
addha za0.d, p5/m, p2/m, z19.d
// CHECK-INST: addha za0.d, p5/m, p2/m, z19.d
// CHECK-ENCODING: [0x60,0x56,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 60 56 d0 c0 <unknown>
addha za0.d, p6/m, p0/m, z12.d
// CHECK-INST: addha za0.d, p6/m, p0/m, z12.d
// CHECK-ENCODING: [0x80,0x19,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 80 19 d0 c0 <unknown>
addha za1.d, p2/m, p6/m, z1.d
// CHECK-INST: addha za1.d, p2/m, p6/m, z1.d
// CHECK-ENCODING: [0x21,0xc8,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 21 c8 d0 c0 <unknown>
addha za5.d, p2/m, p0/m, z22.d
// CHECK-INST: addha za5.d, p2/m, p0/m, z22.d
// CHECK-ENCODING: [0xc5,0x0a,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: c5 0a d0 c0 <unknown>
addha za2.d, p5/m, p7/m, z9.d
// CHECK-INST: addha za2.d, p5/m, p7/m, z9.d
// CHECK-ENCODING: [0x22,0xf5,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 22 f5 d0 c0 <unknown>
addha za7.d, p2/m, p5/m, z12.d
// CHECK-INST: addha za7.d, p2/m, p5/m, z12.d
// CHECK-ENCODING: [0x87,0xa9,0xd0,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 87 a9 d0 c0 <unknown>

View File

@ -0,0 +1,37 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s
// ------------------------------------------------------------------------- //
// Invalid tile
addva za4.s, p0/m, p0/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: addva za4.s, p0/m, p0/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addva za8.d, p0/m, p0/m, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: addva za8.d, p0/m, p0/m, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid predicate
addva za0.s, p8/m, p0/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addva za0.s, p8/m, p0/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addva za0.s, p0/m, p8/m, z0.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addva za0.s, p0/m, p8/m, z0.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addva za0.d, p8/m, p0/m, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addva za0.d, p8/m, p0/m, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
addva za0.d, p0/m, p8/m, z0.d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: addva za0.d, p0/m, p8/m, z0.d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,85 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
// RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
addva za0.s, p0/m, p0/m, z0.s
// CHECK-INST: addva za0.s, p0/m, p0/m, z0.s
// CHECK-ENCODING: [0x00,0x00,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 00 00 91 c0 <unknown>
addva za1.s, p5/m, p2/m, z10.s
// CHECK-INST: addva za1.s, p5/m, p2/m, z10.s
// CHECK-ENCODING: [0x41,0x55,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 41 55 91 c0 <unknown>
addva za3.s, p3/m, p7/m, z13.s
// CHECK-INST: addva za3.s, p3/m, p7/m, z13.s
// CHECK-ENCODING: [0xa3,0xed,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: a3 ed 91 c0 <unknown>
addva za3.s, p7/m, p7/m, z31.s
// CHECK-INST: addva za3.s, p7/m, p7/m, z31.s
// CHECK-ENCODING: [0xe3,0xff,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: e3 ff 91 c0 <unknown>
addva za1.s, p3/m, p0/m, z17.s
// CHECK-INST: addva za1.s, p3/m, p0/m, z17.s
// CHECK-ENCODING: [0x21,0x0e,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 21 0e 91 c0 <unknown>
addva za1.s, p1/m, p4/m, z1.s
// CHECK-INST: addva za1.s, p1/m, p4/m, z1.s
// CHECK-ENCODING: [0x21,0x84,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 21 84 91 c0 <unknown>
addva za0.s, p5/m, p2/m, z19.s
// CHECK-INST: addva za0.s, p5/m, p2/m, z19.s
// CHECK-ENCODING: [0x60,0x56,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 60 56 91 c0 <unknown>
addva za0.s, p6/m, p0/m, z12.s
// CHECK-INST: addva za0.s, p6/m, p0/m, z12.s
// CHECK-ENCODING: [0x80,0x19,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 80 19 91 c0 <unknown>
addva za1.s, p2/m, p6/m, z1.s
// CHECK-INST: addva za1.s, p2/m, p6/m, z1.s
// CHECK-ENCODING: [0x21,0xc8,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 21 c8 91 c0 <unknown>
addva za1.s, p2/m, p0/m, z22.s
// CHECK-INST: addva za1.s, p2/m, p0/m, z22.s
// CHECK-ENCODING: [0xc1,0x0a,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: c1 0a 91 c0 <unknown>
addva za2.s, p5/m, p7/m, z9.s
// CHECK-INST: addva za2.s, p5/m, p7/m, z9.s
// CHECK-ENCODING: [0x22,0xf5,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 22 f5 91 c0 <unknown>
addva za3.s, p2/m, p5/m, z12.s
// CHECK-INST: addva za3.s, p2/m, p5/m, z12.s
// CHECK-ENCODING: [0x83,0xa9,0x91,0xc0]
// CHECK-ERROR: instruction requires: sme
// CHECK-UNKNOWN: 83 a9 91 c0 <unknown>

View File

@ -0,0 +1,85 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
// RUN: | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
addva za0.d, p0/m, p0/m, z0.d
// CHECK-INST: addva za0.d, p0/m, p0/m, z0.d
// CHECK-ENCODING: [0x00,0x00,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 00 00 d1 c0 <unknown>
addva za5.d, p5/m, p2/m, z10.d
// CHECK-INST: addva za5.d, p5/m, p2/m, z10.d
// CHECK-ENCODING: [0x45,0x55,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 45 55 d1 c0 <unknown>
addva za7.d, p3/m, p7/m, z13.d
// CHECK-INST: addva za7.d, p3/m, p7/m, z13.d
// CHECK-ENCODING: [0xa7,0xed,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: a7 ed d1 c0 <unknown>
addva za7.d, p7/m, p7/m, z31.d
// CHECK-INST: addva za7.d, p7/m, p7/m, z31.d
// CHECK-ENCODING: [0xe7,0xff,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: e7 ff d1 c0 <unknown>
addva za5.d, p3/m, p0/m, z17.d
// CHECK-INST: addva za5.d, p3/m, p0/m, z17.d
// CHECK-ENCODING: [0x25,0x0e,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 25 0e d1 c0 <unknown>
addva za1.d, p1/m, p4/m, z1.d
// CHECK-INST: addva za1.d, p1/m, p4/m, z1.d
// CHECK-ENCODING: [0x21,0x84,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 21 84 d1 c0 <unknown>
addva za0.d, p5/m, p2/m, z19.d
// CHECK-INST: addva za0.d, p5/m, p2/m, z19.d
// CHECK-ENCODING: [0x60,0x56,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 60 56 d1 c0 <unknown>
addva za0.d, p6/m, p0/m, z12.d
// CHECK-INST: addva za0.d, p6/m, p0/m, z12.d
// CHECK-ENCODING: [0x80,0x19,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 80 19 d1 c0 <unknown>
addva za1.d, p2/m, p6/m, z1.d
// CHECK-INST: addva za1.d, p2/m, p6/m, z1.d
// CHECK-ENCODING: [0x21,0xc8,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 21 c8 d1 c0 <unknown>
addva za5.d, p2/m, p0/m, z22.d
// CHECK-INST: addva za5.d, p2/m, p0/m, z22.d
// CHECK-ENCODING: [0xc5,0x0a,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: c5 0a d1 c0 <unknown>
addva za2.d, p5/m, p7/m, z9.d
// CHECK-INST: addva za2.d, p5/m, p7/m, z9.d
// CHECK-ENCODING: [0x22,0xf5,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 22 f5 d1 c0 <unknown>
addva za7.d, p2/m, p5/m, z12.d
// CHECK-INST: addva za7.d, p2/m, p5/m, z12.d
// CHECK-ENCODING: [0x87,0xa9,0xd1,0xc0]
// CHECK-ERROR: instruction requires: sme-i64
// CHECK-UNKNOWN: 87 a9 d1 c0 <unknown>

View File

@ -20,4 +20,5 @@ set(LLVM_LINK_COMPONENTS
add_llvm_target_unittest(AArch64Tests
InstSizes.cpp
DecomposeStackOffsetTest.cpp
MatrixRegisterAliasing.cpp
)

View File

@ -0,0 +1,135 @@
#include "AArch64Subtarget.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "gtest/gtest.h"
using namespace llvm;
namespace {
std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
auto TT(Triple::normalize("aarch64--"));
std::string CPU("generic");
std::string FS("+sme");
LLVMInitializeAArch64TargetInfo();
LLVMInitializeAArch64Target();
LLVMInitializeAArch64TargetMC();
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
return std::unique_ptr<LLVMTargetMachine>(
static_cast<LLVMTargetMachine *>(TheTarget->createTargetMachine(
TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default)));
}
std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
std::string(TM->getTargetFeatureString()), *TM,
/* isLittle */ false);
return std::make_unique<AArch64InstrInfo>(ST);
}
TEST(MatrixRegisterAliasing, Aliasing) {
std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
ASSERT_TRUE(TM);
std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
const AArch64RegisterInfo &TRI = II->getRegisterInfo();
// za overlaps with za.b
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZA, AArch64::ZAB0));
// za0.b overlaps with all tiles
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ0));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ15));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD0));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD7));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS0));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS3));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH0));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH1));
// za0.h aliases with za0.q, za2.q, ..
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ0));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ2));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ4));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ6));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ8));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ10));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ12));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ14));
// za1.h aliases with za1.q, za3.q, ...
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ1));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ3));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ5));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ7));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ9));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ11));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ13));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ15));
// za1.h doesn't alias with za0.q, za2.q, ..
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ0));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ2));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ4));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ6));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ8));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ10));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ12));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ14));
// za0.h doesn't alias with za1.q, za3.q, ..
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ1));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ3));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ5));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ7));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ9));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ11));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ13));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ15));
// za0.s aliases with za0.q, za4.q, za8.q, za12.q
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ0));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ4));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ8));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ12));
// za1.s aliases with za1.q, za5.q, za9.q, za13.q
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ1));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ5));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ9));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ13));
// za0.s doesn't alias with za1.q, za5.q, za9.q, za13.q
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ1));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ5));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ9));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ13));
// za1.s doesn't alias with za0.q, za4.q, za8.q, za12.q
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ0));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ4));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ8));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ12));
// za0.d aliases za0.q and za8.q
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ0));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ8));
// za1.d aliases za1.q and za9.q
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ1));
ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ9));
// za0.d doesn't alias with za1.q and za9.q
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ1));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ9));
// za1.d doesn't alias with za0.q and za8.q
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ0));
ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ8));
}
} // end anonymous namespace