mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AArch64][SVE] Put zeroing pseudos and patterns under flag.
This patch puts the _ZERO pseudos and corresponding patterns under the predicate 'UseExperimentalZeroingPseudos', so that they can be enabled/disabled through compile flags. This is done because the zeroing pseudos use MOVPRFX to do merging of the inactive lanes, but it depends on the uarch whether this operation is actually merged with the destructive operation. If not, it may be more profitable to use a SELECT and to give the compiler the freedom to schedule these instructions as normal, rather than keeping them bundled together. Additionally, this feature is not yet fully implemented and there are still known bugs (see D80410) that need to be resolved before the 'experimental' can be dropped from the name. Reviewers: paulwalker-arm, cameron.mcinally, efriedma Reviewed By: paulwalker-arm Tags: #llvm Differential Revision: https://reviews.llvm.org/D82780
This commit is contained in:
parent
cee41879a1
commit
67ab949978
@ -103,6 +103,24 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
|
||||
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
|
||||
"Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>;
|
||||
|
||||
// This flag is currently still labeled as Experimental, but when fully
|
||||
// implemented this should tell the compiler to use the zeroing pseudos to
|
||||
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
|
||||
// lanes are known to be zero. The pseudos will then be expanded using the
|
||||
// MOVPRFX instruction to zero the inactive lanes. This feature should only be
|
||||
// enabled if MOVPRFX instructions are known to merge with the destructive
|
||||
// operations they prefix.
|
||||
//
|
||||
// This feature could similarly be extended to support cheap merging of _any_
|
||||
// value into the inactive lanes using the MOVPRFX instruction that uses
|
||||
// merging-predication.
|
||||
def FeatureExperimentalZeroingPseudos
|
||||
: SubtargetFeature<"use-experimental-zeroing-pseudos",
|
||||
"UseExperimentalZeroingPseudos", "true",
|
||||
"Hint to the compiler that the MOVPRFX instruction is "
|
||||
"merged with destructive operations",
|
||||
[]>;
|
||||
|
||||
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
|
||||
"Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
|
||||
|
||||
|
@ -155,6 +155,8 @@ def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
|
||||
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
|
||||
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
|
||||
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
|
||||
def UseExperimentalZeroingPseudos
|
||||
: Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
|
||||
def UseAlternateSExtLoadCVTF32
|
||||
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
|
||||
|
||||
|
@ -226,9 +226,11 @@ let Predicates = [HasSVE] in {
|
||||
|
||||
defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
|
||||
|
||||
defm ADD_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_add>;
|
||||
defm SUB_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_sub>;
|
||||
defm SUBR_ZPZZ : sve_int_bin_pred_zx<int_aarch64_sve_subr>;
|
||||
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
|
||||
defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
|
||||
defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
|
||||
defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
|
||||
}
|
||||
|
||||
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>;
|
||||
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>;
|
||||
@ -354,18 +356,20 @@ let Predicates = [HasSVE] in {
|
||||
|
||||
defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
|
||||
|
||||
defm FADD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fadd>;
|
||||
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsub>;
|
||||
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmul>;
|
||||
defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsubr>;
|
||||
defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmaxnm>;
|
||||
defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fminnm>;
|
||||
defm FMAX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmax>;
|
||||
defm FMIN_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmin>;
|
||||
defm FABD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fabd>;
|
||||
defm FMULX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmulx>;
|
||||
defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdivr>;
|
||||
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdiv>;
|
||||
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
|
||||
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
|
||||
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
|
||||
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
|
||||
defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>;
|
||||
defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>;
|
||||
defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>;
|
||||
defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>;
|
||||
defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>;
|
||||
defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>;
|
||||
defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
|
||||
defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
|
||||
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
|
||||
}
|
||||
|
||||
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
|
||||
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
|
||||
@ -1260,10 +1264,12 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
||||
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
|
||||
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
|
||||
|
||||
defm ASR_ZPZZ : sve_int_bin_pred_zx<AArch64asr_m1>;
|
||||
defm LSR_ZPZZ : sve_int_bin_pred_zx<AArch64lsr_m1>;
|
||||
defm LSL_ZPZZ : sve_int_bin_pred_zx<AArch64lsl_m1>;
|
||||
defm ASRD_ZPZI : sve_int_bin_pred_shift_0_right_zx<int_aarch64_sve_asrd>;
|
||||
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
|
||||
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64asr_m1>;
|
||||
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsr_m1>;
|
||||
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsl_m1>;
|
||||
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
|
||||
}
|
||||
|
||||
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ", 1>;
|
||||
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ", 1>;
|
||||
@ -2289,6 +2295,14 @@ let Predicates = [HasSVE2] in {
|
||||
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>;
|
||||
defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
|
||||
|
||||
let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in {
|
||||
defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
|
||||
defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
|
||||
defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
|
||||
defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
|
||||
defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
|
||||
}
|
||||
|
||||
// SVE2 integer add/subtract long
|
||||
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
|
||||
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>;
|
||||
|
@ -104,6 +104,10 @@ protected:
|
||||
bool HasPAN_RWV = false;
|
||||
bool HasCCPP = false;
|
||||
|
||||
// SVE extensions
|
||||
bool HasSVE = false;
|
||||
bool UseExperimentalZeroingPseudos = false;
|
||||
|
||||
// Armv8.2 Crypto extensions
|
||||
bool HasSM4 = false;
|
||||
bool HasSHA3 = false;
|
||||
@ -130,8 +134,6 @@ protected:
|
||||
bool HasRCPC_IMMO = false;
|
||||
|
||||
bool HasLSLFast = false;
|
||||
bool HasSVE = false;
|
||||
bool HasSVE2 = false;
|
||||
bool HasRCPC = false;
|
||||
bool HasAggressiveFMA = false;
|
||||
|
||||
@ -158,6 +160,7 @@ protected:
|
||||
bool HasEnhancedCounterVirtualization = false;
|
||||
|
||||
// Arm SVE2 extensions
|
||||
bool HasSVE2 = false;
|
||||
bool HasSVE2AES = false;
|
||||
bool HasSVE2SM4 = false;
|
||||
bool HasSVE2SHA3 = false;
|
||||
@ -398,6 +401,10 @@ public:
|
||||
|
||||
unsigned getWideningBaseCost() const { return WideningBaseCost; }
|
||||
|
||||
bool useExperimentalZeroingPseudos() const {
|
||||
return UseExperimentalZeroingPseudos;
|
||||
}
|
||||
|
||||
/// CPU has TBI (top byte of addresses is ignored during HW address
|
||||
/// translation) and OS enables it.
|
||||
bool supportsAddressTopByteIgnored() const;
|
||||
|
@ -1596,7 +1596,7 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
|
||||
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_fp_2op_p_zds_zx<SDPatternOperator op> {
|
||||
multiclass sve_fp_2op_p_zds_zeroing_hsd<SDPatternOperator op> {
|
||||
def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
|
||||
def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
|
||||
def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
|
||||
@ -4764,27 +4764,24 @@ multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm,
|
||||
let Inst{9-8} = imm{4-3};
|
||||
}
|
||||
|
||||
def _B_Z_UNDEF : PredTwoOpImmPseudo<psName # _B, ZPR8, tvecshiftL8, FalseLanesUndef>;
|
||||
def _H_Z_UNDEF : PredTwoOpImmPseudo<psName # _H, ZPR16, tvecshiftL16, FalseLanesUndef>;
|
||||
def _S_Z_UNDEF : PredTwoOpImmPseudo<psName # _S, ZPR32, tvecshiftL32, FalseLanesUndef>;
|
||||
def _D_Z_UNDEF : PredTwoOpImmPseudo<psName # _D, ZPR64, tvecshiftL64, FalseLanesUndef>;
|
||||
|
||||
def _B_Z_ZERO : PredTwoOpImmPseudo<psName # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
|
||||
def _H_Z_ZERO : PredTwoOpImmPseudo<psName # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
|
||||
def _S_Z_ZERO : PredTwoOpImmPseudo<psName # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
|
||||
def _D_Z_ZERO : PredTwoOpImmPseudo<psName # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
|
||||
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_Z_ZERO)>;
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_Z_ZERO)>;
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_Z_ZERO)>;
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_Z_ZERO)>;
|
||||
|
||||
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
|
||||
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
|
||||
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
|
||||
def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
|
||||
def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
|
||||
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _ZERO_B)>;
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _ZERO_H)>;
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _ZERO_S)>;
|
||||
def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _ZERO_D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
|
||||
SDPatternOperator op = null_frag> {
|
||||
def _B : SVEPseudo2Instr<Ps # _B, 1>,
|
||||
@ -4809,7 +4806,7 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
|
||||
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_bin_pred_shift_0_right_zx<SDPatternOperator op = null_frag> {
|
||||
multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = null_frag> {
|
||||
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
|
||||
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
|
||||
def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
|
||||
@ -4863,7 +4860,7 @@ multiclass sve_int_bin_pred_shift<bits<3> opc, string asm, string Ps,
|
||||
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_bin_pred_zx<SDPatternOperator op> {
|
||||
multiclass sve_int_bin_pred_zeroing_bhsd<SDPatternOperator op> {
|
||||
def _ZERO_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesZero>;
|
||||
def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
|
||||
def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve < %s 2>%t | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; WARN-NOT: warning
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; WARN-NOT: warning
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; WARN-NOT: warning
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; SQSHLU
|
||||
|
Loading…
x
Reference in New Issue
Block a user