1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[AArch64][SVE] Put zeroing pseudos and patterns under flag.

This patch puts the _ZERO pseudos and corresponding patterns
under the predicate 'UseExperimentalZeroingPseudos', so that they
can be enabled/disabled through compile flags.

This is done because the zeroing pseudos use MOVPRFX to do merging of
the inactive lanes, but it depends on the uarch whether this operation
is actually merged with the destructive operation. If not, it may be
more profitable to use a SELECT and to give the compiler the freedom to
schedule these instructions as normal, rather than keeping them bundled
together. Additionally, this feature is not yet fully implemented and
there are still known bugs (see D80410) that need to be resolved before
the 'experimental' can be dropped from the name.

Reviewers: paulwalker-arm, cameron.mcinally, efriedma

Reviewed By: paulwalker-arm

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82780
This commit is contained in:
Sander de Smalen 2020-07-02 14:14:24 +01:00
parent cee41879a1
commit 67ab949978
9 changed files with 81 additions and 43 deletions

View File

@ -103,6 +103,24 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>;
// This flag is currently still labeled as Experimental, but when fully
// implemented this should tell the compiler to use the zeroing pseudos to
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
// lanes are known to be zero. The pseudos will then be expanded using the
// MOVPRFX instruction to zero the inactive lanes. This feature should only be
// enabled if MOVPRFX instructions are known to merge with the destructive
// operations they prefix.
//
// This feature could similarly be extended to support cheap merging of _any_
// value into the inactive lanes using the MOVPRFX instruction that uses
// merging-predication.
def FeatureExperimentalZeroingPseudos
: SubtargetFeature<"use-experimental-zeroing-pseudos",
"UseExperimentalZeroingPseudos", "true",
"Hint to the compiler that the MOVPRFX instruction is "
"merged with destructive operations",
[]>;
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
"Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;

View File

@ -155,6 +155,8 @@ def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
def UseExperimentalZeroingPseudos
: Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
def UseAlternateSExtLoadCVTF32
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;

View File

@ -226,9 +226,11 @@ let Predicates = [HasSVE] in {
defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
defm ADD_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_add>;
defm SUB_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_sub>;
defm SUBR_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_subr>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
}
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>;
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>;
@ -354,18 +356,20 @@ let Predicates = [HasSVE] in {
defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
defm FADD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fadd>;
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsub>;
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmul>;
defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsubr>;
defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmaxnm>;
defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fminnm>;
defm FMAX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmax>;
defm FMIN_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmin>;
defm FABD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fabd>;
defm FMULX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmulx>;
defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdivr>;
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdiv>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>;
defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>;
defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>;
defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>;
defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>;
defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>;
defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
}
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
@ -1260,10 +1264,12 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
defm ASR_ZPZZ : sve_int_bin_pred_zx<AArch64asr_m1>;
defm LSR_ZPZZ : sve_int_bin_pred_zx<AArch64lsr_m1>;
defm LSL_ZPZZ : sve_int_bin_pred_zx<AArch64lsl_m1>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_0_right_zx<int_aarch64_sve_asrd>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64asr_m1>;
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsr_m1>;
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsl_m1>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
}
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ", 1>;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ", 1>;
@ -2289,6 +2295,14 @@ let Predicates = [HasSVE2] in {
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>;
defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in {
defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
}
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>;

View File

@ -104,6 +104,10 @@ protected:
bool HasPAN_RWV = false;
bool HasCCPP = false;
// SVE extensions
bool HasSVE = false;
bool UseExperimentalZeroingPseudos = false;
// Armv8.2 Crypto extensions
bool HasSM4 = false;
bool HasSHA3 = false;
@ -130,8 +134,6 @@ protected:
bool HasRCPC_IMMO = false;
bool HasLSLFast = false;
bool HasSVE = false;
bool HasSVE2 = false;
bool HasRCPC = false;
bool HasAggressiveFMA = false;
@ -158,6 +160,7 @@ protected:
bool HasEnhancedCounterVirtualization = false;
// Arm SVE2 extensions
bool HasSVE2 = false;
bool HasSVE2AES = false;
bool HasSVE2SM4 = false;
bool HasSVE2SHA3 = false;
@ -398,6 +401,10 @@ public:
unsigned getWideningBaseCost() const { return WideningBaseCost; }
bool useExperimentalZeroingPseudos() const {
return UseExperimentalZeroingPseudos;
}
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;

View File

@ -1596,7 +1596,7 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_2op_p_zds_zx<SDPatternOperator op> {
multiclass sve_fp_2op_p_zds_zeroing_hsd<SDPatternOperator op> {
def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
@ -4764,27 +4764,24 @@ multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm,
let Inst{9-8} = imm{4-3};
}
def _B_Z_UNDEF : PredTwoOpImmPseudo<psName # _B, ZPR8, tvecshiftL8, FalseLanesUndef>;
def _H_Z_UNDEF : PredTwoOpImmPseudo<psName # _H, ZPR16, tvecshiftL16, FalseLanesUndef>;
def _S_Z_UNDEF : PredTwoOpImmPseudo<psName # _S, ZPR32, tvecshiftL32, FalseLanesUndef>;
def _D_Z_UNDEF : PredTwoOpImmPseudo<psName # _D, ZPR64, tvecshiftL64, FalseLanesUndef>;
def _B_Z_ZERO : PredTwoOpImmPseudo<psName # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
def _H_Z_ZERO : PredTwoOpImmPseudo<psName # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
def _S_Z_ZERO : PredTwoOpImmPseudo<psName # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
def _D_Z_ZERO : PredTwoOpImmPseudo<psName # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_Z_ZERO)>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_Z_ZERO)>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_Z_ZERO)>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_Z_ZERO)>;
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _ZERO_B)>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _ZERO_H)>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _ZERO_S)>;
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _ZERO_D)>;
}
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
SDPatternOperator op = null_frag> {
def _B : SVEPseudo2Instr<Ps # _B, 1>,
@ -4809,7 +4806,7 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_bin_pred_shift_0_right_zx<SDPatternOperator op = null_frag> {
multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = null_frag> {
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
@ -4863,7 +4860,7 @@ multiclass sve_int_bin_pred_shift<bits<3> opc, string asm, string Ps,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_bin_pred_zx<SDPatternOperator op> {
multiclass sve_int_bin_pred_zeroing_bhsd<SDPatternOperator op> {
def _ZERO_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesZero>;
def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve < %s 2>%t | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; WARN-NOT: warning

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; WARN-NOT: warning

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; WARN-NOT: warning

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
;
; SQSHLU