mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[AMDGPU] Split dot-insts feature
Differential Revision: https://reviews.llvm.org/D57971 llvm-svn: 353587
This commit is contained in:
parent
4ef25cfd76
commit
4063d7d54a
@ -269,10 +269,16 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts",
|
||||
"Has v_fmac_f32 and v_xnor_b32 instructions"
|
||||
>;
|
||||
|
||||
def FeatureDotInsts : SubtargetFeature<"dot-insts",
|
||||
"HasDotInsts",
|
||||
def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
|
||||
"HasDot1Insts",
|
||||
"true",
|
||||
"Has v_dot* instructions"
|
||||
"Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions"
|
||||
>;
|
||||
|
||||
def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
|
||||
"HasDot2Insts",
|
||||
"true",
|
||||
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
|
||||
>;
|
||||
|
||||
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
|
||||
@ -570,7 +576,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<
|
||||
FeatureFmaMixInsts,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureDotInsts,
|
||||
FeatureDot1Insts,
|
||||
FeatureDot2Insts,
|
||||
FeatureSRAMECC,
|
||||
FeatureCodeObjectV3]>;
|
||||
|
||||
@ -769,8 +776,11 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
|
||||
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
|
||||
AssemblerPredicate<"FeatureDLInsts">;
|
||||
|
||||
def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
|
||||
AssemblerPredicate<"FeatureDotInsts">;
|
||||
def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
|
||||
AssemblerPredicate<"FeatureDot1Insts">;
|
||||
|
||||
def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
|
||||
AssemblerPredicate<"FeatureDot2Insts">;
|
||||
|
||||
|
||||
def EnableLateCFGStructurize : Predicate<
|
||||
|
@ -206,7 +206,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasDPP(false),
|
||||
HasR128A16(false),
|
||||
HasDLInsts(false),
|
||||
HasDotInsts(false),
|
||||
HasDot1Insts(false),
|
||||
HasDot2Insts(false),
|
||||
EnableSRAMECC(false),
|
||||
FlatAddressSpace(false),
|
||||
FlatInstOffsets(false),
|
||||
|
@ -332,7 +332,8 @@ protected:
|
||||
bool HasDPP;
|
||||
bool HasR128A16;
|
||||
bool HasDLInsts;
|
||||
bool HasDotInsts;
|
||||
bool HasDot1Insts;
|
||||
bool HasDot2Insts;
|
||||
bool EnableSRAMECC;
|
||||
bool FlatAddressSpace;
|
||||
bool FlatInstOffsets;
|
||||
@ -666,8 +667,12 @@ public:
|
||||
return HasDLInsts;
|
||||
}
|
||||
|
||||
bool hasDotInsts() const {
|
||||
return HasDotInsts;
|
||||
bool hasDot1Insts() const {
|
||||
return HasDot1Insts;
|
||||
}
|
||||
|
||||
bool hasDot2Insts() const {
|
||||
return HasDot2Insts;
|
||||
}
|
||||
|
||||
bool isSRAMECCEnabled() const {
|
||||
|
@ -8708,7 +8708,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc SL(N);
|
||||
|
||||
if (!Subtarget->hasDotInsts() || VT != MVT::f32)
|
||||
if (!Subtarget->hasDot2Insts() || VT != MVT::f32)
|
||||
return SDValue();
|
||||
|
||||
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
|
||||
|
@ -238,29 +238,39 @@ class UDot2Pat<Instruction Inst> : GCNPat <
|
||||
(AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
|
||||
(and i32:$src1, (i32 65535)))
|
||||
),
|
||||
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
|
||||
>;
|
||||
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
|
||||
let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
|
||||
}
|
||||
|
||||
class SDot2Pat<Instruction Inst> : GCNPat <
|
||||
(add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
|
||||
(sra i32:$src1, (i32 16))), i32:$src2),
|
||||
(AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
|
||||
(sext_inreg i32:$src1, i16))),
|
||||
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
|
||||
>;
|
||||
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
|
||||
let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = HasDotInsts in {
|
||||
let SubtargetPredicate = HasDot2Insts in {
|
||||
|
||||
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
|
||||
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
|
||||
def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
|
||||
def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
|
||||
def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
|
||||
def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
|
||||
def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
|
||||
let SubtargetPredicate = HasDot1Insts in {
|
||||
|
||||
def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
|
||||
def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot1Insts
|
||||
|
||||
multiclass DotPats<SDPatternOperator dot_op,
|
||||
VOP3PInst dot_inst> {
|
||||
let SubtargetPredicate = dot_inst.SubtargetPredicate in
|
||||
def : GCNPat <
|
||||
(dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
|
||||
(dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
@ -280,12 +290,14 @@ def : UDot2Pat<V_DOT2_U32_U16>;
|
||||
def : SDot2Pat<V_DOT2_I32_I16>;
|
||||
|
||||
foreach Type = ["U", "I"] in
|
||||
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in
|
||||
def : GCNPat <
|
||||
!cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
|
||||
(add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
|
||||
(!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
|
||||
|
||||
foreach Type = ["U", "I"] in
|
||||
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
|
||||
def : GCNPat <
|
||||
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
|
||||
[1, 2, 3, 4, 5, 6, 7], lhs, y,
|
||||
@ -295,14 +307,13 @@ foreach Type = ["U", "I"] in
|
||||
// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
|
||||
// in the compile time. Directly handle the pattern generated by the FE here.
|
||||
foreach Type = ["U", "I"] in
|
||||
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
|
||||
def : GCNPat <
|
||||
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
|
||||
[7, 1, 2, 3, 4, 5, 6], lhs, y,
|
||||
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
|
||||
(!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
|
||||
|
||||
} // End SubtargetPredicate = HasDotInsts
|
||||
|
||||
multiclass VOP3P_Real_vi<bits<10> op> {
|
||||
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
|
||||
@ -351,14 +362,19 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
|
||||
}
|
||||
|
||||
|
||||
let SubtargetPredicate = HasDotInsts in {
|
||||
let SubtargetPredicate = HasDot2Insts in {
|
||||
|
||||
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
|
||||
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
|
||||
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>;
|
||||
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
|
||||
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
|
||||
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
|
||||
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
|
||||
|
||||
} // End SubtargetPredicate = HasDotInsts
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
|
||||
let SubtargetPredicate = HasDot1Insts in {
|
||||
|
||||
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
|
||||
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot1Insts
|
||||
|
Loading…
Reference in New Issue
Block a user