1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[AArch64] Adding SHA3 Intrinsics support

This patch adds the following SHA3 Intrinsics:
        vsha512hq_u64,
        vsha512h2q_u64,
        vsha512su0q_u64,
        vsha512su1q_u64
        veor3q_u8
        veor3q_u16
        veor3q_u32
        veor3q_u64
        veor3q_s8
        veor3q_s16
        veor3q_s32
        veor3q_s64
        vrax1q_u64
        vxarq_u64
        vbcaxq_u8
        vbcaxq_u16
        vbcaxq_u32
        vbcaxq_u64
        vbcaxq_s8
        vbcaxq_s16
        vbcaxq_s32
        vbcaxq_s64

    Note need to include +sha3 and +crypto when building from the front-end

Reviewed By: DavidSpickett

Differential Revision: https://reviews.llvm.org/D96381
This commit is contained in:
Ryan Santhiraraja 2021-02-22 11:23:31 +00:00 committed by David Spickett
parent d8fddd2027
commit 47e298740e
4 changed files with 330 additions and 0 deletions

View File

@ -715,6 +715,31 @@ let TargetPrefix = "aarch64" in {
: DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// SHA512 intrinsic taking 2 arguments
class Crypto_SHA512_2Arg_Intrinsic
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
// SHA512 intrinsic taking 3 Arguments
class Crypto_SHA512_3Arg_Intrinsic
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
// SHA3 Intrinsics taking 3 arguments
class Crypto_SHA3_3Arg_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
// SHA3 Intrinsic taking 2 arguments
class Crypto_SHA3_2Arg_Intrinsic
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
// SHA3 Intrinsic taking 3 Arguments 1 immediate
class Crypto_SHA3_2ArgImm_Intrinsic
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
class Crypto_SM3_3Vector_Intrinsic
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
@ -748,6 +773,20 @@ def int_aarch64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
//SHA3
def int_aarch64_crypto_eor3s : Crypto_SHA3_3Arg_Intrinsic;
def int_aarch64_crypto_eor3u : Crypto_SHA3_3Arg_Intrinsic;
def int_aarch64_crypto_bcaxs : Crypto_SHA3_3Arg_Intrinsic;
def int_aarch64_crypto_bcaxu : Crypto_SHA3_3Arg_Intrinsic;
def int_aarch64_crypto_rax1 : Crypto_SHA3_2Arg_Intrinsic;
def int_aarch64_crypto_xar : Crypto_SHA3_2ArgImm_Intrinsic;
// SHA512
def int_aarch64_crypto_sha512h : Crypto_SHA512_3Arg_Intrinsic;
def int_aarch64_crypto_sha512h2 : Crypto_SHA512_3Arg_Intrinsic;
def int_aarch64_crypto_sha512su0 : Crypto_SHA512_2Arg_Intrinsic;
def int_aarch64_crypto_sha512su1 : Crypto_SHA512_3Arg_Intrinsic;
//SM3 & SM4
def int_aarch64_crypto_sm3partw1 : Crypto_SM3_3Vector_Intrinsic;
def int_aarch64_crypto_sm3partw2 : Crypto_SM3_3Vector_Intrinsic;

View File

@ -890,6 +890,12 @@ def imm0_63 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_63Operand;
}
def timm0_63 : Operand<i64>, TImmLeaf<i64, [{
return ((uint64_t)Imm) < 64;
}]> {
let ParserMatchClass = Imm0_63Operand;
}
// imm0_31 predicate - True if the immediate is in the range [0,31]
def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 32;

View File

@ -924,6 +924,45 @@ def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">;
def EOR3 : CryptoRRRR_16B<0b00, "eor3">;
def BCAX : CryptoRRRR_16B<0b01, "bcax">;
def XAR : CryptoRRRi6<"xar">;
class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
: Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
(INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
(SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
(RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
(XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
} // HasSHA3
let Predicates = [HasSM4] in {

View File

@ -0,0 +1,246 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s
define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: test_vsha512h:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sha512h q0, q1, v2.2d
; CHECK-NEXT: ret
entry:
%vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %vsha512h.i
}
define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: test_vsha512h2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sha512h2 q0, q1, v2.2d
; CHECK-NEXT: ret
entry:
%vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %vsha512h2.i
}
define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vsha512su0:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sha512su0 v0.2d, v1.2d
; CHECK-NEXT: ret
entry:
%vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %vsha512su0.i
}
define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: test_vsha512su1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sha512su1 v0.2d, v1.2d, v2.2d
; CHECK-NEXT: ret
entry:
%vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %vsha512su1.i
}
define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vrax1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rax1 v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
entry:
%vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %vrax1.i
}
define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vxar:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: xar v0.2d, v0.2d, v1.2d, #1
; CHECK-NEXT: ret
entry:
%vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1)
ret <2 x i64> %vxar.i
}
define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_bcax_8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %vbcax_8.i
}
define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_eor3_8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %veor3_8.i
}
define <16 x i8> @test_bcax_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_bcax_s8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %vbcax_8.i
}
define <16 x i8> @test_eor3_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_eor3_s8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %veor3_8.i
}
define <8 x i16> @test_bcax_16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_bcax_16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_16.i = tail call <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
ret <8 x i16> %vbcax_16.i
}
define <8 x i16> @test_eor3_16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_eor3_16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_16.i = tail call <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
ret <8 x i16> %veor3_16.i
}
define <8 x i16> @test_bcax_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_bcax_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_16.i = tail call <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
ret <8 x i16> %vbcax_16.i
}
define <8 x i16> @test_eor3_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_eor3_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_16.i = tail call <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
ret <8 x i16> %veor3_16.i
}
define <4 x i32> @test_bcax_32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_bcax_32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_32.i = tail call <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
ret <4 x i32> %vbcax_32.i
}
define <4 x i32> @test_eor3_32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_eor3_32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_32.i = tail call <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
ret <4 x i32> %veor3_32.i
}
define <4 x i32> @test_bcax_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_bcax_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_32.i = tail call <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
ret <4 x i32> %vbcax_32.i
}
define <4 x i32> @test_eor3_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_eor3_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_32.i = tail call <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
ret <4 x i32> %veor3_32.i
}
define <2 x i64> @test_bcax_64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: test_bcax_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_64.i = tail call <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %vbcax_64.i
}
define <2 x i64> @test_eor3_64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: test_eor3_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_64.i = tail call <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %veor3_64.i
}
define <2 x i64> @test_bcax_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: test_bcax_s64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%vbcax_64.i = tail call <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %vbcax_64.i
}
define <2 x i64> @test_eor3_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: test_eor3_s64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
%veor3_64.i = tail call <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %veor3_64.i
}
declare <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64>, <2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64>, <2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64>, <2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64>, <2 x i64>, i64 immarg)
declare <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
declare <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
declare <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
declare <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)