mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AArch64] Add v8.1a "Rounding Double Multiply Add/Subtract" extension
Reviewers: t.p.northover, jmolloy Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8502 llvm-svn: 233693
This commit is contained in:
parent
272d4887f8
commit
22589e7b79
@ -5300,6 +5300,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
|
||||
let Inst{4-0} = Rd;
|
||||
}
|
||||
|
||||
let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
|
||||
class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
|
||||
dag oops, dag iops, string asm,
|
||||
list<dag> pattern>
|
||||
: I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
let Inst{31-30} = 0b01;
|
||||
let Inst{29} = U;
|
||||
let Inst{28-24} = 0b11110;
|
||||
let Inst{23-22} = size;
|
||||
let Inst{21} = R;
|
||||
let Inst{20-16} = Rm;
|
||||
let Inst{15-11} = opcode;
|
||||
let Inst{10} = 1;
|
||||
let Inst{9-5} = Rn;
|
||||
let Inst{4-0} = Rd;
|
||||
}
|
||||
|
||||
multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
|
||||
SDPatternOperator OpNode> {
|
||||
def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
|
||||
@ -5327,6 +5348,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
|
||||
def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
|
||||
}
|
||||
|
||||
multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
|
||||
SDPatternOperator OpNode = null_frag> {
|
||||
def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
|
||||
(ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm),
|
||||
asm, []>;
|
||||
def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst),
|
||||
(ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm),
|
||||
asm, []>;
|
||||
}
|
||||
|
||||
multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
|
||||
SDPatternOperator OpNode = null_frag> {
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
|
||||
@ -8518,6 +8549,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
|
||||
}
|
||||
} // end of 'let Predicates = [HasNEON]'
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
let Predicates = [HasNEON, HasV8_1a] in {
|
||||
|
||||
class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
|
||||
RegisterOperand regtype, string asm,
|
||||
string kind, list<dag> pattern>
|
||||
: BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind,
|
||||
pattern> {
|
||||
let Inst{21}=0;
|
||||
}
|
||||
multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
|
||||
SDPatternOperator Accum> {
|
||||
def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
|
||||
[(set (v4i16 V64:$dst),
|
||||
(Accum (v4i16 V64:$Rd),
|
||||
(v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
|
||||
(v4i16 V64:$Rm)))))]>;
|
||||
def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
|
||||
[(set (v8i16 V128:$dst),
|
||||
(Accum (v8i16 V128:$Rd),
|
||||
(v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
|
||||
(v8i16 V128:$Rm)))))]>;
|
||||
def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
|
||||
[(set (v2i32 V64:$dst),
|
||||
(Accum (v2i32 V64:$Rd),
|
||||
(v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
|
||||
(v2i32 V64:$Rm)))))]>;
|
||||
def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
|
||||
[(set (v4i32 V128:$dst),
|
||||
(Accum (v4i32 V128:$Rd),
|
||||
(v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
|
||||
(v4i32 V128:$Rm)))))]>;
|
||||
}
|
||||
|
||||
multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
|
||||
SDPatternOperator Accum> {
|
||||
def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
|
||||
V64, V64, V128_lo, VectorIndexH,
|
||||
asm, ".4h", ".4h", ".4h", ".h",
|
||||
[(set (v4i16 V64:$dst),
|
||||
(Accum (v4i16 V64:$Rd),
|
||||
(v4i16 (int_aarch64_neon_sqrdmulh
|
||||
(v4i16 V64:$Rn),
|
||||
(v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
|
||||
VectorIndexH:$idx))))))]> {
|
||||
bits<3> idx;
|
||||
let Inst{11} = idx{2};
|
||||
let Inst{21} = idx{1};
|
||||
let Inst{20} = idx{0};
|
||||
}
|
||||
|
||||
def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
|
||||
V128, V128, V128_lo, VectorIndexH,
|
||||
asm, ".8h", ".8h", ".8h", ".h",
|
||||
[(set (v8i16 V128:$dst),
|
||||
(Accum (v8i16 V128:$Rd),
|
||||
(v8i16 (int_aarch64_neon_sqrdmulh
|
||||
(v8i16 V128:$Rn),
|
||||
(v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
|
||||
VectorIndexH:$idx))))))]> {
|
||||
bits<3> idx;
|
||||
let Inst{11} = idx{2};
|
||||
let Inst{21} = idx{1};
|
||||
let Inst{20} = idx{0};
|
||||
}
|
||||
|
||||
def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
|
||||
V64, V64, V128, VectorIndexS,
|
||||
asm, ".2s", ".2s", ".2s", ".s",
|
||||
[(set (v2i32 V64:$dst),
|
||||
(Accum (v2i32 V64:$Rd),
|
||||
(v2i32 (int_aarch64_neon_sqrdmulh
|
||||
(v2i32 V64:$Rn),
|
||||
(v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
|
||||
VectorIndexS:$idx))))))]> {
|
||||
bits<2> idx;
|
||||
let Inst{11} = idx{1};
|
||||
let Inst{21} = idx{0};
|
||||
}
|
||||
|
||||
// FIXME: it would be nice to use the scalar (v1i32) instruction here, but
|
||||
// an intermediate EXTRACT_SUBREG would be untyped.
|
||||
// FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we
|
||||
// got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
|
||||
def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
|
||||
(i32 (vector_extract
|
||||
(v4i32 (insert_subvector
|
||||
(undef),
|
||||
(v2i32 (int_aarch64_neon_sqrdmulh
|
||||
(v2i32 V64:$Rn),
|
||||
(v2i32 (AArch64duplane32
|
||||
(v4i32 V128:$Rm),
|
||||
VectorIndexS:$idx)))),
|
||||
(i32 0))),
|
||||
(i64 0))))),
|
||||
(EXTRACT_SUBREG
|
||||
(v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
|
||||
(v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
|
||||
FPR32Op:$Rd,
|
||||
ssub)),
|
||||
V64:$Rn,
|
||||
V128:$Rm,
|
||||
VectorIndexS:$idx)),
|
||||
ssub)>;
|
||||
|
||||
def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
|
||||
V128, V128, V128, VectorIndexS,
|
||||
asm, ".4s", ".4s", ".4s", ".s",
|
||||
[(set (v4i32 V128:$dst),
|
||||
(Accum (v4i32 V128:$Rd),
|
||||
(v4i32 (int_aarch64_neon_sqrdmulh
|
||||
(v4i32 V128:$Rn),
|
||||
(v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
|
||||
VectorIndexS:$idx))))))]> {
|
||||
bits<2> idx;
|
||||
let Inst{11} = idx{1};
|
||||
let Inst{21} = idx{0};
|
||||
}
|
||||
|
||||
// FIXME: it would be nice to use the scalar (v1i32) instruction here, but
|
||||
// an intermediate EXTRACT_SUBREG would be untyped.
|
||||
def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
|
||||
(i32 (vector_extract
|
||||
(v4i32 (int_aarch64_neon_sqrdmulh
|
||||
(v4i32 V128:$Rn),
|
||||
(v4i32 (AArch64duplane32
|
||||
(v4i32 V128:$Rm),
|
||||
VectorIndexS:$idx)))),
|
||||
(i64 0))))),
|
||||
(EXTRACT_SUBREG
|
||||
(v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
|
||||
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
|
||||
FPR32Op:$Rd,
|
||||
ssub)),
|
||||
V128:$Rn,
|
||||
V128:$Rm,
|
||||
VectorIndexS:$idx)),
|
||||
ssub)>;
|
||||
|
||||
def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
|
||||
FPR16Op, FPR16Op, V128_lo,
|
||||
VectorIndexH, asm, ".h", "", "", ".h",
|
||||
[]> {
|
||||
bits<3> idx;
|
||||
let Inst{11} = idx{2};
|
||||
let Inst{21} = idx{1};
|
||||
let Inst{20} = idx{0};
|
||||
}
|
||||
|
||||
def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
|
||||
FPR32Op, FPR32Op, V128, VectorIndexS,
|
||||
asm, ".s", "", "", ".s",
|
||||
[(set (i32 FPR32Op:$dst),
|
||||
(Accum (i32 FPR32Op:$Rd),
|
||||
(i32 (int_aarch64_neon_sqrdmulh
|
||||
(i32 FPR32Op:$Rn),
|
||||
(i32 (vector_extract (v4i32 V128:$Rm),
|
||||
VectorIndexS:$idx))))))]> {
|
||||
bits<2> idx;
|
||||
let Inst{11} = idx{1};
|
||||
let Inst{21} = idx{0};
|
||||
}
|
||||
}
|
||||
} // let Predicates = [HasNeon, HasV8_1a]
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Crypto extensions
|
||||
//----------------------------------------------------------------------------
|
||||
|
@ -2778,6 +2778,10 @@ defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
|
||||
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
|
||||
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
|
||||
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
|
||||
defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
|
||||
int_aarch64_neon_sqadd>;
|
||||
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
|
||||
int_aarch64_neon_sqsub>;
|
||||
|
||||
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
|
||||
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
|
||||
@ -2994,6 +2998,20 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
|
||||
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
|
||||
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
|
||||
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
|
||||
let Predicates = [HasV8_1a] in {
|
||||
defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
|
||||
defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
|
||||
def : Pat<(i32 (int_aarch64_neon_sqadd
|
||||
(i32 FPR32:$Rd),
|
||||
(i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
|
||||
(i32 FPR32:$Rm))))),
|
||||
(SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
|
||||
def : Pat<(i32 (int_aarch64_neon_sqsub
|
||||
(i32 FPR32:$Rd),
|
||||
(i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
|
||||
(i32 FPR32:$Rm))))),
|
||||
(SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
|
||||
}
|
||||
|
||||
def : InstAlias<"cmls $dst, $src1, $src2",
|
||||
(CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
|
||||
@ -4324,6 +4342,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
|
||||
int_aarch64_neon_sqadd>;
|
||||
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
|
||||
int_aarch64_neon_sqsub>;
|
||||
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
|
||||
int_aarch64_neon_sqadd>;
|
||||
defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
|
||||
int_aarch64_neon_sqsub>;
|
||||
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
|
||||
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
|
||||
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
|
||||
|
456
test/CodeGen/AArch64/arm64-neon-v8.1a.ll
Normal file
456
test/CodeGen/AArch64/arm64-neon-v8.1a.ll
Normal file
@ -0,0 +1,456 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -march=arm64 | FileCheck %s --check-prefix=CHECK-V8a
|
||||
; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a | FileCheck %s --check-prefix=CHECK-V81a
|
||||
; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple
|
||||
|
||||
declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32)
|
||||
declare i16 @llvm.aarch64.neon.sqrdmulh.i16(i16, i16)
|
||||
|
||||
declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
|
||||
declare i16 @llvm.aarch64.neon.sqadd.i16(i16, i16)
|
||||
|
||||
declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
|
||||
declare i16 @llvm.aarch64.neon.sqsub.i16(i16, i16)
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; RDMA Vector
|
||||
; test for SIMDThreeSameVectorSQRDMLxHTiedHS
|
||||
|
||||
define <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlah_v4i16:
|
||||
%prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs)
|
||||
%retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h
|
||||
; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.4h
|
||||
; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2
|
||||
ret <4 x i16> %retval
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlah_v8i16:
|
||||
%prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
|
||||
%retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h
|
||||
; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.8h
|
||||
; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2
|
||||
ret <8 x i16> %retval
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlah_v2i32:
|
||||
%prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
|
||||
%retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s
|
||||
; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.2s
|
||||
; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2
|
||||
ret <2 x i32> %retval
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlah_v4i32:
|
||||
%prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
|
||||
%retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
|
||||
; CHECK-V81: sqrdmulh v1.4s, v1.4s, v2.4s
|
||||
; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.4s
|
||||
; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2
|
||||
ret <4 x i32> %retval
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_v4i16:
|
||||
%prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs)
|
||||
%retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h
|
||||
; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.4h
|
||||
; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2
|
||||
ret <4 x i16> %retval
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_v8i16:
|
||||
%prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
|
||||
%retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h
|
||||
; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.8h
|
||||
; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2
|
||||
ret <8 x i16> %retval
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_v2i32:
|
||||
%prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
|
||||
%retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s
|
||||
; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.2s
|
||||
; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2
|
||||
ret <2 x i32> %retval
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_v4i32:
|
||||
%prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
|
||||
%retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.4s
|
||||
; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.4s
|
||||
; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2
|
||||
ret <4 x i32> %retval
|
||||
}
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; RDMA Vector, by element
|
||||
; tests for vXiYY_indexed in SIMDIndexedSQRDMLxHSDTied
|
||||
|
||||
define <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlah_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
|
||||
%retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod)
|
||||
; CHECK-V8a : sqrdmulh v1.4h, v1.4h, v2.h[3]
|
||||
; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.h[3]
|
||||
; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2[3]
|
||||
ret <4 x i16> %retval
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlahq_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
%prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
|
||||
%retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2]
|
||||
; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.h[2]
|
||||
; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2[2]
|
||||
ret <8 x i16> %retval
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlah_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
|
||||
%retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1]
|
||||
; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.s[1]
|
||||
; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2[1]
|
||||
ret <2 x i32> %retval
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlahq_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
|
||||
%retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0]
|
||||
; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.s[0]
|
||||
; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2[0]
|
||||
ret <4 x i32> %retval
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
|
||||
%retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.h[3]
|
||||
; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.h[3]
|
||||
; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2[3]
|
||||
ret <4 x i16> %retval
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlshq_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
%prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
|
||||
%retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2]
|
||||
; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.h[2]
|
||||
; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2[2]
|
||||
ret <8 x i16> %retval
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
|
||||
%retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1]
|
||||
; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.s[1]
|
||||
; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2[1]
|
||||
ret <2 x i32> %retval
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlshq_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
|
||||
%retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
|
||||
; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0]
|
||||
; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.s[0]
|
||||
; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2[0]
|
||||
ret <4 x i32> %retval
|
||||
}
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; RDMA Vector, by element, extracted
|
||||
; i16 tests are for vXi16_indexed in SIMDIndexedSQRDMLxHSDTied, with IR in ACLE style
|
||||
; i32 tests are for "def : Pat" in SIMDIndexedSQRDMLxHSDTied
|
||||
|
||||
define i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlah_extracted_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
|
||||
%prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
|
||||
%acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
|
||||
%retval = extractelement <4 x i16> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, v0.4h, v1.h[1]
|
||||
; CHECK-V81a: sqrdmlah {{v[2-9]+}}.4h, v0.4h, v1.h[1]
|
||||
; CHECK-V81a-apple: sqrdmlah.4h {{v[2-9]+}}, v0, v1[1]
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i16 @test_sqrdmlahq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
|
||||
%prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
|
||||
%acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
|
||||
%retval = extractelement <8 x i16> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, v0.8h, v1.h[1]
|
||||
; CHECK-V81a: sqrdmlah {{v[2-9]+}}.8h, v0.8h, v1.h[1]
|
||||
; CHECK-V81a-apple: sqrdmlah.8h {{v[2-9]+}}, v0, v1[1]
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
%prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
|
||||
%extract = extractelement <2 x i32> %prod, i64 0
|
||||
%retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
|
||||
; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0]
|
||||
; CHECK-V81a: sqrdmlah v2.2s, v0.2s, v1.s[0]
|
||||
; CHECK-V81a-apple: sqrdmlah.2s v2, v0, v1[0]
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlahq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
|
||||
%extract = extractelement <4 x i32> %prod, i64 0
|
||||
%retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
|
||||
; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0]
|
||||
; CHECK-V81a: sqrdmlah v2.4s, v0.4s, v1.s[0]
|
||||
; CHECK-V81a-apple: sqrdmlah.4s v2, v0, v1[0]
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
define i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
|
||||
%prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
|
||||
%acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
|
||||
%retval = extractelement <4 x i16> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, v0.4h, v1.h[1]
|
||||
; CHECK-V81a: sqrdmlsh {{v[2-9]+}}.4h, v0.4h, v1.h[1]
|
||||
; CHECK-V81a-apple: sqrdmlsh.4h {{v[2-9]+}}, v0, v1[1]
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i16 @test_sqrdmlshq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s16:
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
|
||||
%prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
|
||||
%acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
|
||||
%retval = extractelement <8 x i16> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, v0.8h, v1.h[1]
|
||||
; CHECK-V81a: sqrdmlsh {{v[2-9]+}}.8h, v0.8h, v1.h[1]
|
||||
; CHECK-V81a-apple: sqrdmlsh.8h {{v[2-9]+}}, v0, v1[1]
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
%prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
|
||||
%extract = extractelement <2 x i32> %prod, i64 0
|
||||
%retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
|
||||
; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0]
|
||||
; CHECK-V81a: sqrdmlsh v2.2s, v0.2s, v1.s[0]
|
||||
; CHECK-V81a-apple: sqrdmlsh.2s v2, v0, v1[0]
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlshq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
|
||||
; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s32:
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
|
||||
%extract = extractelement <4 x i32> %prod, i64 0
|
||||
%retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
|
||||
; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0]
|
||||
; CHECK-V81a: sqrdmlsh v2.4s, v0.4s, v1.s[0]
|
||||
; CHECK-V81a-apple: sqrdmlsh.4s v2, v0, v1[0]
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; RDMA Scalar
|
||||
; test for "def : Pat" near SIMDThreeScalarHSTied in AArch64InstInfo.td
|
||||
|
||||
define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
|
||||
; CHECK-LABEL: test_sqrdmlah_v1i16:
|
||||
%x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
|
||||
%y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
|
||||
%prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %y_vec)
|
||||
%acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod_vec)
|
||||
%retval = extractelement <4 x i16> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
|
||||
; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
|
||||
; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlah_v1i32(i32 %acc, i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: test_sqrdmlah_v1i32:
|
||||
%x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
|
||||
%y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
|
||||
%prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec)
|
||||
%acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
|
||||
%retval_vec = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc_vec, <4 x i32> %prod_vec)
|
||||
%retval = extractelement <4 x i32> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
; CHECK-V81a-apple: sqrdmlah.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
|
||||
define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_v1i16:
|
||||
%x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
|
||||
%y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
|
||||
%prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %y_vec)
|
||||
%acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod_vec)
|
||||
%retval = extractelement <4 x i16> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
|
||||
; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
|
||||
; CHECK-V81a-apple: sqrdmlsh.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlsh_v1i32(i32 %acc, i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_v1i32:
|
||||
%x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
|
||||
%y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
|
||||
%prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec)
|
||||
%acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
|
||||
%retval_vec = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc_vec, <4 x i32> %prod_vec)
|
||||
%retval = extractelement <4 x i32> %retval_vec, i64 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
; CHECK-V81a-apple: sqrdmlsh.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
ret i32 %retval
|
||||
}
|
||||
define i32 @test_sqrdmlah_i32(i32 %acc, i32 %mhs, i32 %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlah_i32:
|
||||
%prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs)
|
||||
%retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod)
|
||||
; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK-V81a-apple: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlsh_i32(i32 %acc, i32 %mhs, i32 %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_i32:
|
||||
%prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs)
|
||||
%retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod)
|
||||
; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK-V81a-apple: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; RDMA Scalar, by element
|
||||
; i16 tests are performed via tests in above chapter, with IR in ACLE style
|
||||
; i32 tests are for i32_indexed in SIMDIndexedSQRDMLxHSDTied
|
||||
|
||||
define i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %x, <4 x i16> %y_vec) {
|
||||
; CHECK-LABEL: test_sqrdmlah_extract_i16:
|
||||
%shuffle = shufflevector <4 x i16> %y_vec, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
|
||||
%x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
|
||||
%prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %shuffle)
|
||||
%acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
|
||||
%retval = extractelement <4 x i16> %retval_vec, i32 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
|
||||
; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
|
||||
; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}}, {{v[0-9]+}}, v0[1]
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlah_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlah_extract_i32:
|
||||
%extract = extractelement <4 x i32> %rhs, i32 3
|
||||
%prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract)
|
||||
%retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod)
|
||||
; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
|
||||
; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
|
||||
; CHECK-V81a-apple: sqrdmlah.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
define i16 @test_sqrdmlshq_extract_i16(i16 %acc, i16 %x, <8 x i16> %y_vec) {
|
||||
; CHECK-LABEL: test_sqrdmlshq_extract_i16:
|
||||
%shuffle = shufflevector <8 x i16> %y_vec, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1>
|
||||
%x_vec = insertelement <8 x i16> undef, i16 %x, i64 0
|
||||
%prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x_vec, <8 x i16> %shuffle)
|
||||
%acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
|
||||
%retval_vec = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
|
||||
%retval = extractelement <8 x i16> %retval_vec, i32 0
|
||||
; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
|
||||
; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
|
||||
; CHECK-V81a-apple: sqrdmlsh.8h {{v[0-9]+}}, {{v[0-9]+}}, v0[1]
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: test_sqrdmlsh_extract_i32:
|
||||
%extract = extractelement <4 x i32> %rhs, i32 3
|
||||
%prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract)
|
||||
%retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod)
|
||||
; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
|
||||
; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
|
||||
; CHECK-V81a-apple: sqrdmlsh.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
|
||||
ret i32 %retval
|
||||
}
|
154
test/MC/AArch64/armv8.1a-rdma.s
Normal file
154
test/MC/AArch64/armv8.1a-rdma.s
Normal file
@ -0,0 +1,154 @@
|
||||
// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a -show-encoding < %s 2> %t | FileCheck %s
|
||||
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
|
||||
.text
|
||||
|
||||
//AdvSIMD RDMA vector
|
||||
sqrdmlah v0.4h, v1.4h, v2.4h
|
||||
sqrdmlsh v0.4h, v1.4h, v2.4h
|
||||
sqrdmlah v0.2s, v1.2s, v2.2s
|
||||
sqrdmlsh v0.2s, v1.2s, v2.2s
|
||||
sqrdmlah v0.4s, v1.4s, v2.4s
|
||||
sqrdmlsh v0.4s, v1.4s, v2.4s
|
||||
sqrdmlah v0.8h, v1.8h, v2.8h
|
||||
sqrdmlsh v0.8h, v1.8h, v2.8h
|
||||
// CHECK: sqrdmlah v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x42,0x2e]
|
||||
// CHECK: sqrdmlsh v0.4h, v1.4h, v2.4h // encoding: [0x20,0x8c,0x42,0x2e]
|
||||
// CHECK: sqrdmlah v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0x82,0x2e]
|
||||
// CHECK: sqrdmlsh v0.2s, v1.2s, v2.2s // encoding: [0x20,0x8c,0x82,0x2e]
|
||||
// CHECK: sqrdmlah v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0x82,0x6e]
|
||||
// CHECK: sqrdmlsh v0.4s, v1.4s, v2.4s // encoding: [0x20,0x8c,0x82,0x6e]
|
||||
// CHECK: sqrdmlah v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x42,0x6e]
|
||||
// CHECK: sqrdmlsh v0.8h, v1.8h, v2.8h // encoding: [0x20,0x8c,0x42,0x6e]
|
||||
|
||||
sqrdmlah v0.2h, v1.2h, v2.2h
|
||||
sqrdmlsh v0.2h, v1.2h, v2.2h
|
||||
sqrdmlah v0.8s, v1.8s, v2.8s
|
||||
sqrdmlsh v0.8s, v1.8s, v2.8s
|
||||
sqrdmlah v0.2s, v1.4h, v2.8h
|
||||
sqrdmlsh v0.4s, v1.8h, v2.2s
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid vector kind qualifier
|
||||
// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlah v0.2s, v1.4h, v2.8h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlsh v0.4s, v1.8h, v2.2s
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//AdvSIMD RDMA scalar
|
||||
sqrdmlah h0, h1, h2
|
||||
sqrdmlsh h0, h1, h2
|
||||
sqrdmlah s0, s1, s2
|
||||
sqrdmlsh s0, s1, s2
|
||||
// CHECK: sqrdmlah h0, h1, h2 // encoding: [0x20,0x84,0x42,0x7e]
|
||||
// CHECK: sqrdmlsh h0, h1, h2 // encoding: [0x20,0x8c,0x42,0x7e]
|
||||
// CHECK: sqrdmlah s0, s1, s2 // encoding: [0x20,0x84,0x82,0x7e]
|
||||
// CHECK: sqrdmlsh s0, s1, s2 // encoding: [0x20,0x8c,0x82,0x7e]
|
||||
|
||||
//AdvSIMD RDMA vector by-element
|
||||
sqrdmlah v0.4h, v1.4h, v2.h[3]
|
||||
sqrdmlsh v0.4h, v1.4h, v2.h[3]
|
||||
sqrdmlah v0.2s, v1.2s, v2.s[1]
|
||||
sqrdmlsh v0.2s, v1.2s, v2.s[1]
|
||||
sqrdmlah v0.8h, v1.8h, v2.h[3]
|
||||
sqrdmlsh v0.8h, v1.8h, v2.h[3]
|
||||
sqrdmlah v0.4s, v1.4s, v2.s[3]
|
||||
sqrdmlsh v0.4s, v1.4s, v2.s[3]
|
||||
// CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3] // encoding: [0x20,0xd0,0x72,0x2f]
|
||||
// CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3] // encoding: [0x20,0xf0,0x72,0x2f]
|
||||
// CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1] // encoding: [0x20,0xd0,0xa2,0x2f]
|
||||
// CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1] // encoding: [0x20,0xf0,0xa2,0x2f]
|
||||
// CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3] // encoding: [0x20,0xd0,0x72,0x6f]
|
||||
// CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3] // encoding: [0x20,0xf0,0x72,0x6f]
|
||||
// CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3] // encoding: [0x20,0xd8,0xa2,0x6f]
|
||||
// CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3] // encoding: [0x20,0xf8,0xa2,0x6f]
|
||||
|
||||
sqrdmlah v0.4s, v1.2s, v2.s[1]
|
||||
sqrdmlsh v0.2s, v1.2d, v2.s[1]
|
||||
sqrdmlah v0.8h, v1.8h, v2.s[3]
|
||||
sqrdmlsh v0.8h, v1.8h, v2.h[8]
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlah v0.4s, v1.2s, v2.s[1]
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlsh v0.2s, v1.2d, v2.s[1]
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlah v0.8h, v1.8h, v2.s[3]
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: vector lane must be an integer in range [0, 7].
|
||||
// CHECK-ERROR: sqrdmlsh v0.8h, v1.8h, v2.h[8]
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//AdvSIMD RDMA scalar by-element
|
||||
sqrdmlah h0, h1, v2.h[3]
|
||||
sqrdmlsh h0, h1, v2.h[3]
|
||||
sqrdmlah s0, s1, v2.s[3]
|
||||
sqrdmlsh s0, s1, v2.s[3]
|
||||
// CHECK: sqrdmlah h0, h1, v2.h[3] // encoding: [0x20,0xd0,0x72,0x7f]
|
||||
// CHECK: sqrdmlsh h0, h1, v2.h[3] // encoding: [0x20,0xf0,0x72,0x7f]
|
||||
// CHECK: sqrdmlah s0, s1, v2.s[3] // encoding: [0x20,0xd8,0xa2,0x7f]
|
||||
// CHECK: sqrdmlsh s0, s1, v2.s[3] // encoding: [0x20,0xf8,0xa2,0x7f]
|
||||
|
||||
sqrdmlah b0, h1, v2.h[3]
|
||||
sqrdmlah s0, d1, v2.s[3]
|
||||
sqrdmlsh h0, h1, v2.s[3]
|
||||
sqrdmlsh s0, s1, v2.s[4]
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlah b0, h1, v2.h[3]
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlah s0, d1, v2.s[3]
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmlsh h0, h1, v2.s[3]
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: vector lane must be an integer in range [0, 3].
|
||||
// CHECK-ERROR: sqrdmlsh s0, s1, v2.s[4]
|
||||
// CHECK-ERROR: ^
|
129
test/MC/Disassembler/AArch64/armv8.1a-rdma.txt
Normal file
129
test/MC/Disassembler/AArch64/armv8.1a-rdma.txt
Normal file
@ -0,0 +1,129 @@
|
||||
# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s 2>&1 | FileCheck %s
|
||||
|
||||
[0x20,0x84,0x02,0x2e] # sqrdmlah v0.8b, v1.8b, v2.8b
|
||||
[0x20,0x8c,0x02,0x2e] # sqrdmlsh v0.8b, v1.8b, v2.8b
|
||||
[0x20,0x84,0xc2,0x2e] # sqrdmlah v0.1d, v1.1d, v2.1d
|
||||
[0x20,0x8c,0xc2,0x2e] # sqrdmlsh v0.1d, v1.1d, v2.1d
|
||||
[0x20,0x84,0x02,0x6e] # sqrdmlah v0.16b, v1.16b, v2.16b
|
||||
[0x20,0x8c,0x02,0x6e] # sqrdmlsh v0.16b, v1.16b, v2.16b
|
||||
[0x20,0x84,0xc2,0x6e] # sqrdmlah v0.2d, v1.2d, v2.2d
|
||||
[0x20,0x8c,0xc2,0x6e] # sqrdmlsh v0.2d, v1.2d, v2.2d
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x84,0x02,0x2e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x8c,0x02,0x2e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x84,0xc2,0x2e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x8c,0xc2,0x2e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x84,0x02,0x6e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x8c,0x02,0x6e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x84,0xc2,0x6e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x8c,0xc2,0x6e]
|
||||
|
||||
[0x20,0x84,0x02,0x7e] # sqrdmlah b0, b1, b2
|
||||
[0x20,0x8c,0x02,0x7e] # sqrdmlsh b0, b1, b2
|
||||
[0x20,0x84,0xc2,0x7e] # sqrdmlah d0, d1, d2
|
||||
[0x20,0x8c,0xc2,0x7e] # sqrdmlsh d0, d1, d2
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x84,0x02,0x7e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x8c,0x02,0x7e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x84,0xc2,0x7e]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0x8c,0xc2,0x7e]
|
||||
|
||||
[0x20,0xd0,0x32,0x2f] # sqrdmlah v0.8b, v1.8b, v2.b[3]
|
||||
[0x20,0xf0,0x32,0x2f] # sqrdmlsh v0.8b, v1.8b, v2.b[3]
|
||||
[0x20,0xd0,0xe2,0x2f] # sqrdmlah v0.1d, v1.1d, v2.d[1]
|
||||
[0x20,0xf0,0xe2,0x2f] # sqrdmlsh v0.1d, v1.1d, v2.d[1]
|
||||
[0x20,0xd0,0x32,0x6f] # sqrdmlah v0.16b, v1.16b, v2.b[3]
|
||||
[0x20,0xf0,0x32,0x6f] # sqrdmlsh v0.16b, v1.16b, v2.b[3]
|
||||
[0x20,0xd8,0xe2,0x6f] # sqrdmlah v0.2d, v1.2d, v2.d[3]
|
||||
[0x20,0xf8,0xe2,0x6f] # sqrdmlsh v0.2d, v1.2d, v2.d[3]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xd0,0x32,0x2f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xf0,0x32,0x2f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xd0,0xe2,0x2f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xf0,0xe2,0x2f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xd0,0x32,0x6f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xf0,0x32,0x6f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xd8,0xe2,0x6f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xf8,0xe2,0x6f]
|
||||
|
||||
[0x20,0xd0,0x32,0x7f] # sqrdmlah b0, b1, v2.b[3]
|
||||
[0x20,0xf0,0x32,0x7f] # sqrdmlsh b0, b1, v2.b[3]
|
||||
[0x20,0xd8,0xe2,0x7f] # sqrdmlah d0, d1, v2.d[3]
|
||||
[0x20,0xf8,0xe2,0x7f] # sqrdmlsh d0, d1, v2.d[3]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xd0,0x32,0x7f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xf0,0x32,0x7f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xd8,0xe2,0x7f]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK: [0x20,0xf8,0xe2,0x7f]
|
||||
|
||||
[0x20,0x84,0x42,0x2e]
|
||||
[0x20,0x8c,0x42,0x2e]
|
||||
[0x20,0x84,0x82,0x2e]
|
||||
[0x20,0x8c,0x82,0x2e]
|
||||
[0x20,0x84,0x42,0x6e]
|
||||
[0x20,0x8c,0x42,0x6e]
|
||||
[0x20,0x84,0x82,0x6e]
|
||||
[0x20,0x8c,0x82,0x6e]
|
||||
# CHECK: sqrdmlah v0.4h, v1.4h, v2.4h
|
||||
# CHECK: sqrdmlsh v0.4h, v1.4h, v2.4h
|
||||
# CHECK: sqrdmlah v0.2s, v1.2s, v2.2s
|
||||
# CHECK: sqrdmlsh v0.2s, v1.2s, v2.2s
|
||||
# CHECK: sqrdmlah v0.8h, v1.8h, v2.8h
|
||||
# CHECK: sqrdmlsh v0.8h, v1.8h, v2.8h
|
||||
# CHECK: sqrdmlah v0.4s, v1.4s, v2.4s
|
||||
# CHECK: sqrdmlsh v0.4s, v1.4s, v2.4s
|
||||
|
||||
[0x20,0x84,0x42,0x7e]
|
||||
[0x20,0x8c,0x42,0x7e]
|
||||
[0x20,0x84,0x82,0x7e]
|
||||
[0x20,0x8c,0x82,0x7e]
|
||||
# CHECK: sqrdmlah h0, h1, h2
|
||||
# CHECK: sqrdmlsh h0, h1, h2
|
||||
# CHECK: sqrdmlah s0, s1, s2
|
||||
# CHECK: sqrdmlsh s0, s1, s2
|
||||
|
||||
0x20,0xd0,0x72,0x2f
|
||||
0x20,0xf0,0x72,0x2f
|
||||
0x20,0xd0,0xa2,0x2f
|
||||
0x20,0xf0,0xa2,0x2f
|
||||
0x20,0xd0,0x72,0x6f
|
||||
0x20,0xf0,0x72,0x6f
|
||||
0x20,0xd8,0xa2,0x6f
|
||||
0x20,0xf8,0xa2,0x6f
|
||||
# CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3]
|
||||
# CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3]
|
||||
# CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1]
|
||||
# CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1]
|
||||
# CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3]
|
||||
# CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3]
|
||||
# CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3]
|
||||
# CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3]
|
||||
|
||||
0x20,0xd0,0x72,0x7f
|
||||
0x20,0xf0,0x72,0x7f
|
||||
0x20,0xd8,0xa2,0x7f
|
||||
0x20,0xf8,0xa2,0x7f
|
||||
# CHECK: sqrdmlah h0, h1, v2.h[3]
|
||||
# CHECK: sqrdmlsh h0, h1, v2.h[3]
|
||||
# CHECK: sqrdmlah s0, s1, v2.s[3]
|
||||
# CHECK: sqrdmlsh s0, s1, v2.s[3]
|
Loading…
Reference in New Issue
Block a user