1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[AMDGPU] Mark relevant rematerializable VOP3 instructions

Differential Revision: https://reviews.llvm.org/D106110
This commit is contained in:
Stanislav Mekhanoshin 2021-07-15 16:17:02 -07:00
parent b044663832
commit 54565ba720
3 changed files with 1196 additions and 12 deletions

View File

@ -275,6 +275,7 @@ class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
let isCommutable = 1 in { let isCommutable = 1 in {
let isReMaterializable = 1 in {
let mayRaiseFPException = 0 in { let mayRaiseFPException = 0 in {
let SubtargetPredicate = HasMadMacF32Insts in { let SubtargetPredicate = HasMadMacF32Insts in {
defm V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; defm V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@ -308,6 +309,7 @@ defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mul
defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>; defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>; defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteIntMul] } // End SchedRW = [WriteIntMul]
} // End isReMaterializable = 1
let Uses = [MODE, VCC, EXEC] in { let Uses = [MODE, VCC, EXEC] in {
// v_div_fmas_f32: // v_div_fmas_f32:
@ -328,6 +330,7 @@ defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F6
} // End isCommutable = 1 } // End isCommutable = 1
let isReMaterializable = 1 in {
let mayRaiseFPException = 0 in { let mayRaiseFPException = 0 in {
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>; defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>; defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
@ -370,6 +373,7 @@ let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>; defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>; defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1 } // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
} // End isReMaterializable = 1
let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does. let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
@ -381,6 +385,7 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d
defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1>; defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1>;
} // End mayRaiseFPException = 0 } // End mayRaiseFPException = 0
let isReMaterializable = 1 in
defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
let Constraints = "@earlyclobber $vdst" in { let Constraints = "@earlyclobber $vdst" in {
@ -388,6 +393,7 @@ defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64
} // End Constraints = "@earlyclobber $vdst" } // End Constraints = "@earlyclobber $vdst"
let isReMaterializable = 1 in {
let SchedRW = [WriteDouble] in { let SchedRW = [WriteDouble] in {
defm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop>; defm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop>;
} // End SchedRW = [WriteDouble] } // End SchedRW = [WriteDouble]
@ -405,12 +411,14 @@ let SchedRW = [Write64Bit] in {
defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>; defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
} // End SubtargetPredicate = isGFX8Plus } // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write64Bit] } // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
def : GCNPat< def : GCNPat<
(i32 (getDivergentFrag<sext>.ret i16:$src)), (i32 (getDivergentFrag<sext>.ret i16:$src)),
(i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))) (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
>; >;
let isReMaterializable = 1 in {
let SubtargetPredicate = isGFX6GFX7GFX10 in { let SubtargetPredicate = isGFX6GFX7GFX10 in {
defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
} // End SubtargetPredicate = isGFX6GFX7GFX10 } // End SubtargetPredicate = isGFX6GFX7GFX10
@ -420,6 +428,7 @@ let SubtargetPredicate = isGFX8Plus in {
defm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>; defm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
} // End SubtargetPredicate = isGFX8Plus } // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write32Bit] } // End SchedRW = [Write32Bit]
} // End isReMaterializable = 1
let SubtargetPredicate = isGFX7Plus in { let SubtargetPredicate = isGFX7Plus in {
@ -601,14 +610,14 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
} }
let SubtargetPredicate = isGFX9Plus in { let SubtargetPredicate = isGFX9Plus in {
let isCommutable = 1 in { let isCommutable = 1, isReMaterializable = 1 in {
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>; defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
} // End isCommutable = 1 } // End isCommutable = 1, isReMaterializable = 1
// TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this // TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this
// to the new src0. // to the new src0.
defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>; defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>;
@ -632,11 +641,13 @@ defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
let isReMaterializable = 1 in {
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
} // End isReMaterializable = 1
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat < class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
@ -715,9 +726,9 @@ class PermlaneDiscardVDstIn<SDPatternOperator permlane,
let SubtargetPredicate = isGFX10Plus in { let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1 in { let isCommutable = 1, isReMaterializable = 1 in {
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
} // End isCommutable = 1 } // End isCommutable = 1, isReMaterializable = 1
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>; def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {

File diff suppressed because it is too large Load Diff

View File

@ -106,11 +106,8 @@ body: |
bb.7: bb.7:
successors: %bb.13(0x80000000) successors: %bb.13(0x80000000)
; In reality we are checking that this code doesn't assert when splitting
; and inserting a spill. Here we just check that the point where the error
; occurs we see a correctly generated spill.
; GCN-LABEL: bb.7: ; GCN-LABEL: bb.7:
; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr32, 0, implicit $exec ; GCN: undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
%15.sub1:vreg_128 = COPY %15.sub0 %15.sub1:vreg_128 = COPY %15.sub0
@ -126,7 +123,7 @@ body: |
successors: %bb.12(0x80000000) successors: %bb.12(0x80000000)
; GCN-LABEL: bb.9: ; GCN-LABEL: bb.9:
; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr32, 0, implicit $exec ; GCN: undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
%15.sub1:vreg_128 = COPY %15.sub0 %15.sub1:vreg_128 = COPY %15.sub0
@ -137,7 +134,7 @@ body: |
successors: %bb.12(0x80000000) successors: %bb.12(0x80000000)
; GCN-LABEL: bb.10: ; GCN-LABEL: bb.10:
; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr32, 0, implicit $exec ; GCN: undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
%15.sub1:vreg_128 = COPY %15.sub0 %15.sub1:vreg_128 = COPY %15.sub0