[AArch64][SME] Add matrix register definitions and parsing support
SME introduces the ZA array, a new piece of architectural register state
consisting of a matrix of [SVLb x SVLb] bytes, where SVL is the
implementation defined Streaming SVE vector length and SVLb is the
number of 8-bit elements in a vector of SVL bits.
SME instructions consist of three types of matrix operands:
* Tiles: a ZA tile is a square, two-dimensional sub-array of elements
within the ZA array. These tiles make up the larger accumulator array
and the granularity varies based on the element size, i.e.
- ZAQ0..ZAQ15 (smallest tile granule)
- ZAD0..ZAD7
- ZAS0..ZAS3
- ZAH0..ZAH1
or ZAB0 (largest tile granule, single tile)
* Tile vectors: similar to regular tiles, but have an extra 'h' or 'v'
to tell how the vector at [reg+offset] is layed out in the tile,
horizontally or vertically. E.g. za1h.h or za15v.q, which corresponds
to vectors in registers ZAH1 and ZAQ15, respectively.
* Accumulator matrix: this is the entire accumulator array ZA.
This patch adds the register classes and related operands and parsing
for SME instructions operating on the accumulator array.
The ADDHA and ADDVA instructions which operate on tiles are also added
in this patch to make some use of the code added, later patches will
make use of the other operands introduced here.
The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06
Co-authored by: Sander de Smalen (@sdesmalen)
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D105570
2021-07-14 10:01:19 +02:00
|
|
|
//=- AArch64SMEInstrInfo.td - AArch64 SME Instructions -*- tablegen -*-----=//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Add vector elements horizontally or vertically to ZA tile.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
let Predicates = [HasSME] in {
|
|
|
|
def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
|
|
|
|
def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasSMEI64] in {
|
|
|
|
def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">;
|
|
|
|
def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">;
|
|
|
|
}
|
[AArch64][SME] Add outer product instructions
This patch adds support for the following outer product instructions:
* BFMOPA, BFMOPS, FMOPA, FMOPS, SMOPA, SMOPS, SUMOPA, SUMOPS, UMOPA,
UMOPS, USMOPA, USMOPS.
Depends on D105570.
The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D105571
2021-07-15 10:41:08 +02:00
|
|
|
|
|
|
|
let Predicates = [HasSME] in {
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Outer products
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa">;
|
|
|
|
defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops">;
|
|
|
|
|
|
|
|
def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">;
|
|
|
|
def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasSMEF64] in {
|
|
|
|
def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">;
|
|
|
|
def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasSME] in {
|
|
|
|
defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa">;
|
|
|
|
defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops">;
|
|
|
|
|
|
|
|
def SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa">;
|
|
|
|
def SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops">;
|
|
|
|
def UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa">;
|
|
|
|
def UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops">;
|
|
|
|
def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">;
|
|
|
|
def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">;
|
|
|
|
def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">;
|
|
|
|
def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasSMEI64] in {
|
|
|
|
def SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa">;
|
|
|
|
def SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops">;
|
|
|
|
def UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa">;
|
|
|
|
def UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops">;
|
|
|
|
def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">;
|
|
|
|
def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">;
|
|
|
|
def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">;
|
|
|
|
def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">;
|
|
|
|
}
|
2021-07-16 11:14:08 +02:00
|
|
|
|
|
|
|
let Predicates = [HasSME] in {
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Loads and stores
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">;
|
|
|
|
defm ST1_MXIPXX : sme_mem_st_ss<"st1">;
|
|
|
|
|
2021-07-21 09:51:22 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Spill + fill
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
defm LDR_ZA : sme_fill<"ldr">;
|
|
|
|
defm STR_ZA : sme_spill<"str">;
|
|
|
|
|
2021-07-21 10:20:01 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Move instructions
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
defm INSERT_MXIPZ : sme_vector_to_tile<"mova">;
|
|
|
|
defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">;
|
|
|
|
|
[AArch64][SME] Add zero instruction
This patch adds the zero instruction for zeroing a list of 64-bit
element ZA tiles. The instruction takes a list of up to eight tiles
ZA0.D-ZA7.D, which must be in order, e.g.
zero {za0.d,za1.d,za2.d,za3.d,za4.d,za5.d,za6.d,za7.d}
zero {za1.d,za3.d,za5.d,za7.d}
The assembler also accepts 32-bit, 16-bit and 8-bit element tiles which
are mapped to corresponding 64-bit element tiles in accordance with the
architecturally defined mapping between different element size tiles,
e.g.
* Zeroing ZA0.B, or the entire array name ZA, is equivalent to zeroing
all eight 64-bit element tiles ZA0.D to ZA7.D.
* Zeroing ZA0.S is equivalent to zeroing ZA0.D and ZA4.D.
The preferred disassembly of this instruction uses the shortest list of
tile names that represent the encoded immediate mask, e.g.
* An immediate which encodes 64-bit element tiles ZA0.D, ZA1.D, ZA4.D and
ZA5.D is disassembled as {ZA0.S, ZA1.S}.
* An immediate which encodes 64-bit element tiles ZA0.D, ZA2.D, ZA4.D and
ZA6.D is disassembled as {ZA0.H}.
* An all-ones immediate is disassembled as {ZA}.
* An all-zeros immediate is disassembled as an empty list {}.
This patch adds the MatrixTileList asm operand and related parsing to support
this.
Depends on D105570.
The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D105575
2021-07-27 10:00:49 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Zero instruction
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
defm ZERO_M : sme_zero<"zero">;
|
|
|
|
|
[AArch64][SME] Add system registers and related instructions
This patch adds the new system registers introduced in SME:
- ID_AA64SMFR0_EL1 (ro) SME feature identifier.
- SMCR_ELx (r/w) streaming mode control register for configuring
effective SVE Streaming SVE Vector length when the PE is in
Streaming SVE mode.
- SVCR (r/w) streaming vector control register, visible at all
exception levels. Provides access to PSTATE.SM and PSTATE.ZA
using MSR and MRS instructions.
- SMPRI_EL1 (r/w) streaming mode execution priority register.
- SMPRIMAP_EL2 (r/w) streaming mode priority mapping register.
- SMIDR_EL1 (ro) streaming mode identification register.
- TPIDR2_EL0 (r/w) for use by SME software to manage per-thread
SME context.
- MPAMSM_EL1 (r/w) MPAM (v8.4) streaming mode register, for
labelling memory accesses performed in streaming mode.
Also added in this patch are the SME mode change instructions.
Three MSR immediate instructions are implemented to set or clear
PSTATE.SM, PSTATE.ZA, or both respectively:
- MSR SVCRSM, #<imm1>
- MSR SVCRZA, #<imm1>
- MSR SVCRSMZA, #<imm1>
The following smstart/smstop aliases are also implemented for
convenience:
smstart -> MSR SVCRSMZA, #1
smstart sm -> MSR SVCRSM, #1
smstart za -> MSR SVCRZA, #1
smstop -> MSR SVCRSMZA, #0
smstop sm -> MSR SVCRSM, #0
smstop za -> MSR SVCRZA, #0
The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D105576
2021-07-20 09:19:10 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Mode selection and state access instructions
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
|
|
|
|
// both fields:
|
|
|
|
//
|
|
|
|
// MSR SVCRSM, #<imm1>
|
|
|
|
// MSR SVCRZA, #<imm1>
|
|
|
|
// MSR SVCRSMZA, #<imm1>
|
|
|
|
//
|
|
|
|
// It's tricky to using the existing pstate operand defined in
|
|
|
|
// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
|
|
|
|
// when these fields are also encoded in CRm[3:1].
|
|
|
|
class MSRpstatesvcrImm0_1
|
|
|
|
: PstateWriteSimple<(ins svcr_op:$pstatefield, imm0_1:$imm), "msr",
|
|
|
|
"\t$pstatefield, $imm">,
|
|
|
|
Sched<[WriteSys]> {
|
|
|
|
bits<3> pstatefield;
|
|
|
|
bit imm;
|
|
|
|
let Inst{18-16} = 0b011; // op1
|
|
|
|
let Inst{11-9} = pstatefield;
|
|
|
|
let Inst{8} = imm;
|
|
|
|
let Inst{7-5} = 0b011; // op2
|
|
|
|
}
|
|
|
|
|
|
|
|
def MSRpstatesvcrImm1 : MSRpstatesvcrImm0_1;
|
|
|
|
def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>;
|
|
|
|
def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
|
|
|
|
def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
|
|
|
|
|
|
|
|
def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>;
|
|
|
|
def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>;
|
|
|
|
def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>;
|
|
|
|
|
2021-07-19 09:40:04 +02:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// SVE2 instructions
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
|
|
|
|
|
|
|
|
defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
|
|
|
|
defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
|
|
|
|
|
|
|
|
defm DUP_PPzPRI : sve2_int_perm_dup_p<"dup">;
|
|
|
|
|
2021-07-16 11:14:08 +02:00
|
|
|
} // End let Predicates = [HasSME]
|