mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AMDGPU] Refactor VOP1 and VOP2 instruction TD definitions
Differential revision: https://reviews.llvm.org/D24738 llvm-svn: 282234
This commit is contained in:
parent
8e59adb433
commit
8bae56ae1d
@ -12,36 +12,4 @@
|
||||
// S_CBRANCH_CDBGUSER
|
||||
// S_CBRANCH_CDBGSYS
|
||||
// S_CBRANCH_CDBGSYS_OR_USER
|
||||
// S_CBRANCH_CDBGSYS_AND_USER
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP1 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SubtargetPredicate = isCIVI in {
|
||||
|
||||
let SchedRW = [WriteDoubleAdd] in {
|
||||
defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
|
||||
VOP_F64_F64, ftrunc
|
||||
>;
|
||||
defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
|
||||
VOP_F64_F64, fceil
|
||||
>;
|
||||
defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
|
||||
VOP_F64_F64, ffloor
|
||||
>;
|
||||
defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
|
||||
VOP_F64_F64, frint
|
||||
>;
|
||||
} // End SchedRW = [WriteDoubleAdd]
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
|
||||
VOP_F32_F32
|
||||
>;
|
||||
defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
|
||||
VOP_F32_F32
|
||||
>;
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
} // End SubtargetPredicate = isCIVI
|
||||
// S_CBRANCH_CDBGSYS_AND_USER
|
@ -145,161 +145,6 @@ class Enc64 {
|
||||
|
||||
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
|
||||
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
let VALU = 1;
|
||||
}
|
||||
|
||||
class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
|
||||
let VOP1 = 1;
|
||||
let Size = 4;
|
||||
}
|
||||
|
||||
class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
|
||||
let VOP2 = 1;
|
||||
let Size = 4;
|
||||
}
|
||||
|
||||
class VOP3Common <dag outs, dag ins, string asm = "",
|
||||
list<dag> pattern = [], bit HasMods = 0,
|
||||
bit VOP3Only = 0> :
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
|
||||
// Using complex patterns gives VOP3 patterns a very high complexity rating,
|
||||
// but standalone patterns are almost always prefered, so we need to adjust the
|
||||
// priority lower. The goal is to use a high number to reduce complexity to
|
||||
// zero (or less than zero).
|
||||
let AddedComplexity = -1000;
|
||||
|
||||
let VOP3 = 1;
|
||||
let VALU = 1;
|
||||
|
||||
let AsmMatchConverter =
|
||||
!if(!eq(VOP3Only,1),
|
||||
"cvtVOP3",
|
||||
!if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
|
||||
|
||||
let AsmVariantName = AMDGPUAsmVariants.VOP3;
|
||||
|
||||
let isCodeGenOnly = 0;
|
||||
|
||||
int Size = 8;
|
||||
|
||||
// Because SGPRs may be allowed if there are multiple operands, we
|
||||
// need a post-isel hook to insert copies in order to avoid
|
||||
// violating constant bus requirements.
|
||||
let hasPostISelHook = 1;
|
||||
}
|
||||
|
||||
} // End Uses = [EXEC]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector ALU operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP1e <bits<8> op> : Enc32 {
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
|
||||
let Inst{8-0} = src0;
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{31-25} = 0x3f; //encoding
|
||||
}
|
||||
|
||||
class VOP2e <bits<6> op> : Enc32 {
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
bits<8> src1;
|
||||
|
||||
let Inst{8-0} = src0;
|
||||
let Inst{16-9} = src1;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; //encoding
|
||||
}
|
||||
|
||||
class VOP2_MADKe <bits<6> op> : Enc64 {
|
||||
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
bits<8> src1;
|
||||
bits<32> imm;
|
||||
|
||||
let Inst{8-0} = src0;
|
||||
let Inst{16-9} = src1;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; // encoding
|
||||
let Inst{63-32} = imm;
|
||||
}
|
||||
|
||||
class VOP3a <bits<9> op> : Enc64 {
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<9> src1;
|
||||
bits<2> src2_modifiers;
|
||||
bits<9> src2;
|
||||
bits<1> clamp;
|
||||
bits<2> omod;
|
||||
|
||||
let Inst{8} = src0_modifiers{1};
|
||||
let Inst{9} = src1_modifiers{1};
|
||||
let Inst{10} = src2_modifiers{1};
|
||||
let Inst{11} = clamp;
|
||||
let Inst{25-17} = op;
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = src0;
|
||||
let Inst{49-41} = src1;
|
||||
let Inst{58-50} = src2;
|
||||
let Inst{60-59} = omod;
|
||||
let Inst{61} = src0_modifiers{0};
|
||||
let Inst{62} = src1_modifiers{0};
|
||||
let Inst{63} = src2_modifiers{0};
|
||||
}
|
||||
|
||||
class VOP3e <bits<9> op> : VOP3a <op> {
|
||||
bits<8> vdst;
|
||||
|
||||
let Inst{7-0} = vdst;
|
||||
}
|
||||
|
||||
class VOP3be <bits<9> op> : Enc64 {
|
||||
bits<8> vdst;
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<9> src1;
|
||||
bits<2> src2_modifiers;
|
||||
bits<9> src2;
|
||||
bits<7> sdst;
|
||||
bits<2> omod;
|
||||
|
||||
let Inst{7-0} = vdst;
|
||||
let Inst{14-8} = sdst;
|
||||
let Inst{25-17} = op;
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = src0;
|
||||
let Inst{49-41} = src1;
|
||||
let Inst{58-50} = src2;
|
||||
let Inst{60-59} = omod;
|
||||
let Inst{61} = src0_modifiers{0};
|
||||
let Inst{62} = src1_modifiers{0};
|
||||
let Inst{63} = src2_modifiers{0};
|
||||
}
|
||||
|
||||
class VINTRPe <bits<2> op> : Enc32 {
|
||||
bits<8> vdst;
|
||||
bits<8> vsrc;
|
||||
@ -369,17 +214,6 @@ class EXPe : Enc64 {
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
|
||||
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
VOP1Common <outs, ins, asm, pattern>,
|
||||
VOP1e<op> {
|
||||
let isCodeGenOnly = 0;
|
||||
}
|
||||
|
||||
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
|
||||
let isCodeGenOnly = 0;
|
||||
}
|
||||
|
||||
class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
let mayLoad = 1;
|
||||
|
@ -14,38 +14,6 @@ def isCIOnly : Predicate<"Subtarget->getGeneration() =="
|
||||
|
||||
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
|
||||
|
||||
class vop {
|
||||
field bits<9> SI3;
|
||||
field bits<10> VI3;
|
||||
}
|
||||
|
||||
class vop1 <bits<8> si, bits<8> vi = si> : vop {
|
||||
field bits<8> SI = si;
|
||||
field bits<8> VI = vi;
|
||||
|
||||
field bits<9> SI3 = {1, 1, si{6-0}};
|
||||
field bits<10> VI3 = !add(0x140, vi);
|
||||
}
|
||||
|
||||
class vop2 <bits<6> si, bits<6> vi = si> : vop {
|
||||
field bits<6> SI = si;
|
||||
field bits<6> VI = vi;
|
||||
|
||||
field bits<9> SI3 = {1, 0, 0, si{5-0}};
|
||||
field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
|
||||
}
|
||||
|
||||
// Specify a VOP2 opcode for SI and VOP3 opcode for VI
|
||||
// that doesn't have VOP2 encoding on VI
|
||||
class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
|
||||
let VI3 = vi;
|
||||
}
|
||||
|
||||
class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
|
||||
let SI3 = si;
|
||||
let VI3 = vi;
|
||||
}
|
||||
|
||||
// Execpt for the NONE field, this must be kept in sync with the
|
||||
// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
|
||||
def SIEncodingFamily {
|
||||
@ -639,18 +607,20 @@ class getVOP3SrcForVT<ValueType VT> {
|
||||
|
||||
// Returns 1 if the source arguments have modifiers, 0 if they do not.
|
||||
// XXX - do f16 instructions?
|
||||
class hasModifiers<ValueType SrcVT> {
|
||||
class isFloatType<ValueType SrcVT> {
|
||||
bit ret =
|
||||
!if(!eq(SrcVT.Value, f16.Value), 1,
|
||||
!if(!eq(SrcVT.Value, f32.Value), 1,
|
||||
!if(!eq(SrcVT.Value, f64.Value), 1,
|
||||
0));
|
||||
0)));
|
||||
}
|
||||
|
||||
class hasIntModifiers<ValueType SrcVT> {
|
||||
class isIntType<ValueType SrcVT> {
|
||||
bit ret =
|
||||
!if(!eq(SrcVT.Value, i16.Value), 1,
|
||||
!if(!eq(SrcVT.Value, i32.Value), 1,
|
||||
!if(!eq(SrcVT.Value, i64.Value), 1,
|
||||
0));
|
||||
0)));
|
||||
}
|
||||
|
||||
|
||||
@ -756,39 +726,21 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
||||
// VOP1 without input operands (V_NOP)
|
||||
(ins),
|
||||
!if(!eq(NumSrcArgs, 1),
|
||||
!if(HasFloatModifiers,
|
||||
// VOP1_SDWA with float modifiers
|
||||
(ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel),
|
||||
// VOP1_SDWA with int modifiers
|
||||
(ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel))
|
||||
/* NumSrcArgs == 2 */,
|
||||
!if(HasFloatModifiers,
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
// VOPC_SDWA with float modifiers
|
||||
(ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_fmodifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
|
||||
// VOP2_SDWA or VOPC_SDWA with float modifiers
|
||||
(ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_fmodifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel)),
|
||||
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
// VOPC_SDWA with int modifiers
|
||||
(ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_imodifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
|
||||
// VOP2_SDWA or VOPC_SDWA with int modifiers
|
||||
(ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_imodifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel))
|
||||
/* endif */)));
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel),
|
||||
!if(!eq(NumSrcArgs, 2),
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
// VOPC_SDWA with float modifiers
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
|
||||
// VOP2_SDWA or VOPC_SDWA with float modifiers
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel)),
|
||||
(ins)/* endif */)));
|
||||
}
|
||||
|
||||
// Outs for DPP and SDWA
|
||||
@ -852,8 +804,8 @@ class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers,
|
||||
" vcc", // use vcc token as dst for VOPC instructioins
|
||||
"$vdst"),
|
||||
"");
|
||||
string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers");
|
||||
string src1 = !if(HasFloatModifiers, "$src1_fmodifiers", "$src1_imodifiers");
|
||||
string src0 = "$src0_modifiers";
|
||||
string src1 = "$src1_modifiers";
|
||||
string args = !if(!eq(NumSrcArgs, 0),
|
||||
"",
|
||||
!if(!eq(NumSrcArgs, 1),
|
||||
@ -892,6 +844,14 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
|
||||
);
|
||||
}
|
||||
|
||||
class BitOr<bit a, bit b> {
|
||||
bit ret = !if(a, 1, !if(b, 1, 0));
|
||||
}
|
||||
|
||||
class BitAnd<bit a, bit b> {
|
||||
bit ret = !if(a, !if(b, 1, 0), 0);
|
||||
}
|
||||
|
||||
class VOPProfile <list<ValueType> _ArgVT> {
|
||||
|
||||
field list<ValueType> ArgVT = _ArgVT;
|
||||
@ -918,19 +878,27 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
||||
|
||||
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
|
||||
field bit HasDst32 = HasDst;
|
||||
field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
|
||||
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
|
||||
field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
|
||||
field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
|
||||
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
|
||||
field bit HasSrc0Mods = hasModifiers<Src0VT>.ret;
|
||||
field bit HasSrc1Mods = hasModifiers<Src1VT>.ret;
|
||||
field bit HasSrc2Mods = hasModifiers<Src2VT>.ret;
|
||||
|
||||
field bit HasSrc0IntMods = hasIntModifiers<Src0VT>.ret;
|
||||
field bit HasSrc1IntMods = hasIntModifiers<Src1VT>.ret;
|
||||
field bit HasSrc2IntMods = hasIntModifiers<Src2VT>.ret;
|
||||
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
|
||||
field bit HasModifiers = isFloatType<Src0VT>.ret;
|
||||
|
||||
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
|
||||
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
|
||||
field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
|
||||
|
||||
field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
|
||||
field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
|
||||
field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
|
||||
|
||||
field bit HasSrc0Mods = HasModifiers;
|
||||
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
|
||||
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
|
||||
|
||||
field bit HasModifiers = HasSrc0Mods;
|
||||
field bit HasOMod = HasModifiers;
|
||||
field bit HasClamp = HasModifiers;
|
||||
field bit HasSDWAClamp = HasSrc0;
|
||||
@ -997,115 +965,11 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
|
||||
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
|
||||
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
|
||||
|
||||
// Restrict src0 to be VGPR
|
||||
def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
|
||||
let Src0RC32 = VRegSrc_32;
|
||||
let Src0RC64 = VRegSrc_32;
|
||||
|
||||
let HasExt = 0;
|
||||
}
|
||||
|
||||
// Special case because there are no true output operands. Hack vdst
|
||||
// to be a src operand. The custom inserter must add a tied implicit
|
||||
// def and use of the super register since there seems to be no way to
|
||||
// add an implicit def of a virtual register in tablegen.
|
||||
def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
|
||||
let Src0RC32 = VOPDstOperand<VGPR_32>;
|
||||
let Src0RC64 = VOPDstOperand<VGPR_32>;
|
||||
|
||||
let Outs = (outs);
|
||||
let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
|
||||
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
|
||||
|
||||
let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
||||
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
||||
let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel);
|
||||
|
||||
let Asm32 = getAsm32<1, 1>.ret;
|
||||
let Asm64 = getAsm64<1, 1, 0>.ret;
|
||||
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
|
||||
let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
|
||||
|
||||
let HasExt = 0;
|
||||
let HasDst = 0;
|
||||
}
|
||||
|
||||
// Write out to vcc or arbitrary SGPR.
|
||||
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
|
||||
let Asm32 = "$vdst, vcc, $src0, $src1";
|
||||
let Asm64 = "$vdst, $sdst, $src0, $src1";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
}
|
||||
|
||||
// Write out to vcc or arbitrary SGPR and read in from vcc or
|
||||
// arbitrary SGPR.
|
||||
def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
|
||||
// We use VCSrc_b32 to exclude literal constants, even though the
|
||||
// encoding normally allows them since the implicit VCC use means
|
||||
// using one would always violate the constant bus
|
||||
// restriction. SGPRs are still allowed because it should
|
||||
// technically be possible to use VCC again as src0.
|
||||
let Src0RC32 = VCSrc_b32;
|
||||
let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
|
||||
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
|
||||
// Suppress src2 implied by type since the 32-bit encoding uses an
|
||||
// implicit VCC use.
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
|
||||
}
|
||||
|
||||
// Read in from vcc or arbitrary SGPR
|
||||
def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
|
||||
let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
|
||||
let Asm32 = "$vdst, $src0, $src1, vcc";
|
||||
let Asm64 = "$vdst, $src0, $src1, $src2";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst);
|
||||
|
||||
// Suppress src2 implied by type since the 32-bit encoding uses an
|
||||
// implicit VCC use.
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
|
||||
}
|
||||
|
||||
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
|
||||
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
|
||||
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
|
||||
|
||||
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
|
||||
def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
|
||||
field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
|
||||
field string Asm32 = "$vdst, $src0, $src1, $imm";
|
||||
field bit HasExt = 0;
|
||||
}
|
||||
def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
|
||||
field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
|
||||
field string Asm32 = "$vdst, $src0, $imm, $src1";
|
||||
field bit HasExt = 0;
|
||||
}
|
||||
def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
|
||||
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
|
||||
HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
|
||||
let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
|
||||
FP32InputMods:$src1_modifiers, Src1RC32:$src1,
|
||||
VGPR_32:$src2, // stub argument
|
||||
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
||||
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
||||
let InsSDWA = (ins FP32InputMods:$src0_fmodifiers, Src0RC32:$src0,
|
||||
FP32InputMods:$src1_fmodifiers, Src1RC32:$src1,
|
||||
VGPR_32:$src2, // stub argument
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel);
|
||||
let Asm32 = getAsm32<1, 2, f32>.ret;
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
|
||||
let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
|
||||
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
|
||||
}
|
||||
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
|
||||
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
|
||||
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
|
||||
@ -1113,10 +977,6 @@ def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
|
||||
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
|
||||
def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
|
||||
|
||||
class VOP <string opName> {
|
||||
string OpName = opName;
|
||||
}
|
||||
|
||||
class Commutable_REV <string revOp, bit isOrig> {
|
||||
string RevOp = revOp;
|
||||
bit IsOrig = isOrig;
|
||||
@ -1127,556 +987,6 @@ class AtomicNoRet <string noRetOp, bit isRet> {
|
||||
bit IsRet = isRet;
|
||||
}
|
||||
|
||||
class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
|
||||
VOP1Common <outs, ins, "", pattern>,
|
||||
VOP <opName>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
|
||||
MnemonicAlias<opName#"_e32", opName> {
|
||||
let isPseudo = 1;
|
||||
let isCodeGenOnly = 1;
|
||||
|
||||
field bits<8> vdst;
|
||||
field bits<9> src0;
|
||||
}
|
||||
|
||||
class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
|
||||
VOP1<op.SI, outs, ins, asm, []>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
|
||||
let AssemblerPredicate = SIAssemblerPredicate;
|
||||
let DecoderNamespace = "SICI";
|
||||
let DisableDecoder = DisableSIDecoder;
|
||||
}
|
||||
|
||||
class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
|
||||
VOP1<op.VI, outs, ins, asm, []>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
|
||||
let AssemblerPredicates = [isVI];
|
||||
let DecoderNamespace = "VI";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
|
||||
string asm = opName#p.Asm32> {
|
||||
def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
|
||||
|
||||
def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
|
||||
|
||||
def _vi : VOP1_Real_vi <opName, op, p.Outs, p.Ins32, asm>;
|
||||
|
||||
}
|
||||
|
||||
class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
|
||||
VOP1_DPPe <op.VI>,
|
||||
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP,
|
||||
AMDGPUAsmVariants.Disable);
|
||||
let DecoderNamespace = "DPP";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
let src0_modifiers = !if(p.HasModifiers, ?, 0);
|
||||
let src1_modifiers = 0;
|
||||
}
|
||||
|
||||
class SDWADisableFields <VOPProfile p> {
|
||||
bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
|
||||
bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
|
||||
bits<2> src0_fmodifiers = !if(!eq(p.NumSrcArgs, 0),
|
||||
0,
|
||||
!if(p.HasModifiers, ?, 0));
|
||||
bits<1> src0_imodifiers = !if(!eq(p.NumSrcArgs, 0),
|
||||
0,
|
||||
!if(p.HasModifiers, 0, ?));
|
||||
bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6,
|
||||
!if(!eq(p.NumSrcArgs, 1), 6,
|
||||
?));
|
||||
bits<2> src1_fmodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
|
||||
!if(!eq(p.NumSrcArgs, 1), 0,
|
||||
!if(p.HasModifiers, ?, 0)));
|
||||
bits<1> src1_imodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
|
||||
!if(!eq(p.NumSrcArgs, 1), 0,
|
||||
!if(p.HasModifiers, 0, ?)));
|
||||
bits<3> dst_sel = !if(p.HasDst, ?, 6);
|
||||
bits<2> dst_unused = !if(p.HasDst, ?, 2);
|
||||
bits<1> clamp = !if(!eq(p.NumSrcArgs, 0), 0, ?);
|
||||
}
|
||||
|
||||
class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
|
||||
VOP1_SDWAe <op.VI>,
|
||||
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
|
||||
SDWADisableFields <p> {
|
||||
let AsmMatchConverter = "cvtSdwaVOP1";
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
|
||||
AMDGPUAsmVariants.Disable);
|
||||
let DecoderNamespace = "SDWA";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
|
||||
string asm = opName#p.Asm32> {
|
||||
|
||||
def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
|
||||
|
||||
def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
|
||||
}
|
||||
|
||||
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
|
||||
VOP2Common <outs, ins, "", pattern>,
|
||||
VOP <opName>,
|
||||
SIMCInstr<opName#"_e32", SIEncodingFamily.NONE>,
|
||||
MnemonicAlias<opName#"_e32", opName> {
|
||||
let isPseudo = 1;
|
||||
let isCodeGenOnly = 1;
|
||||
}
|
||||
|
||||
class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
|
||||
VOP2 <op.SI, outs, ins, opName#asm, []>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
|
||||
let AssemblerPredicates = [isSICI];
|
||||
let DecoderNamespace = "SICI";
|
||||
let DisableDecoder = DisableSIDecoder;
|
||||
}
|
||||
|
||||
class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
|
||||
VOP2 <op.VI, outs, ins, opName#asm, []>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
|
||||
let AssemblerPredicates = [isVI];
|
||||
let DecoderNamespace = "VI";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
multiclass VOP2SI_m <vop2 op, string opName, VOPProfile p, list<dag> pattern,
|
||||
string revOp> {
|
||||
|
||||
def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
|
||||
}
|
||||
|
||||
multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
|
||||
string revOp> {
|
||||
|
||||
def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
|
||||
|
||||
def _vi : VOP2_Real_vi <opName, op, p.Outs32, p.Ins32, p.Asm32>;
|
||||
|
||||
}
|
||||
|
||||
class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
|
||||
VOP2_DPPe <op.VI>,
|
||||
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP,
|
||||
AMDGPUAsmVariants.Disable);
|
||||
let DecoderNamespace = "DPP";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
let src0_modifiers = !if(p.HasModifiers, ?, 0);
|
||||
let src1_modifiers = !if(p.HasModifiers, ?, 0);
|
||||
}
|
||||
|
||||
class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
|
||||
VOP2_SDWAe <op.VI>,
|
||||
VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
|
||||
SDWADisableFields <p> {
|
||||
let AsmMatchConverter = "cvtSdwaVOP2";
|
||||
let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
|
||||
let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
|
||||
AMDGPUAsmVariants.Disable);
|
||||
let DecoderNamespace = "SDWA";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
|
||||
|
||||
bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
|
||||
bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
|
||||
bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
|
||||
bits<2> omod = !if(HasModifiers, ?, 0);
|
||||
bits<1> clamp = !if(HasModifiers, ?, 0);
|
||||
bits<9> src1 = !if(HasSrc1, ?, 0);
|
||||
bits<9> src2 = !if(HasSrc2, ?, 0);
|
||||
}
|
||||
|
||||
class VOP3DisableModFields <bit HasSrc0Mods,
|
||||
bit HasSrc1Mods = 0,
|
||||
bit HasSrc2Mods = 0,
|
||||
bit HasOutputMods = 0> {
|
||||
bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
|
||||
bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
|
||||
bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
|
||||
bits<2> omod = !if(HasOutputMods, ?, 0);
|
||||
bits<1> clamp = !if(HasOutputMods, ?, 0);
|
||||
}
|
||||
|
||||
class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName,
|
||||
bit HasMods = 0, bit VOP3Only = 0> :
|
||||
VOP3Common <outs, ins, "", pattern, HasMods, VOP3Only>,
|
||||
VOP <opName>,
|
||||
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
|
||||
MnemonicAlias<opName#"_e64", opName> {
|
||||
let isPseudo = 1;
|
||||
let isCodeGenOnly = 1;
|
||||
|
||||
field bit vdst;
|
||||
field bit src0;
|
||||
}
|
||||
|
||||
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
|
||||
bit HasMods = 0, bit VOP3Only = 0> :
|
||||
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
|
||||
VOP3e <op>,
|
||||
SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
|
||||
let AssemblerPredicates = [isSICI];
|
||||
let DecoderNamespace = "SICI";
|
||||
let DisableDecoder = DisableSIDecoder;
|
||||
}
|
||||
|
||||
class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
|
||||
bit HasMods = 0, bit VOP3Only = 0> :
|
||||
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
|
||||
VOP3e_vi <op>,
|
||||
SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
|
||||
let AssemblerPredicates = [isVI];
|
||||
let DecoderNamespace = "VI";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
|
||||
bit HasMods = 0, bit VOP3Only = 0> :
|
||||
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
|
||||
VOP3be <op>,
|
||||
SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
|
||||
let AssemblerPredicates = [isSICI];
|
||||
let DecoderNamespace = "SICI";
|
||||
let DisableDecoder = DisableSIDecoder;
|
||||
}
|
||||
|
||||
class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
|
||||
bit HasMods = 0, bit VOP3Only = 0> :
|
||||
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
|
||||
VOP3be_vi <op>,
|
||||
SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
|
||||
let AssemblerPredicates = [isVI];
|
||||
let DecoderNamespace = "VI";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
class VOP3e_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
|
||||
bit HasMods = 0, bit VOP3Only = 0> :
|
||||
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
|
||||
VOP3e <op>,
|
||||
SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
|
||||
let AssemblerPredicates = [isSICI];
|
||||
let DecoderNamespace = "SICI";
|
||||
let DisableDecoder = DisableSIDecoder;
|
||||
}
|
||||
|
||||
class VOP3e_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
|
||||
bit HasMods = 0, bit VOP3Only = 0> :
|
||||
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
|
||||
VOP3e_vi <op>,
|
||||
SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
|
||||
let AssemblerPredicates = [isVI];
|
||||
let DecoderNamespace = "VI";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
|
||||
multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, bit HasMods = 1> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>;
|
||||
|
||||
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
|
||||
VOP3DisableFields<0, 0, HasMods>;
|
||||
|
||||
def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName, HasMods>,
|
||||
VOP3DisableFields<0, 0, HasMods>;
|
||||
}
|
||||
|
||||
multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, bit HasMods = 1> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>;
|
||||
|
||||
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
|
||||
VOP3DisableFields<0, 0, HasMods>;
|
||||
// No VI instruction. This class is for SI only.
|
||||
}
|
||||
|
||||
multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
|
||||
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
|
||||
VOP3DisableFields<1, 0, HasMods>;
|
||||
|
||||
def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName, HasMods>,
|
||||
VOP3DisableFields<1, 0, HasMods>;
|
||||
}
|
||||
|
||||
multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
|
||||
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
|
||||
VOP3DisableFields<1, 0, HasMods>;
|
||||
|
||||
// No VI instruction. This class is for SI only.
|
||||
}
|
||||
|
||||
// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
|
||||
// instead of an implicit VCC as in the VOP2b format.
|
||||
multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>;
|
||||
|
||||
def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
|
||||
VOP3DisableFields<1, useSrc2Input, HasMods>;
|
||||
|
||||
def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
|
||||
VOP3DisableFields<1, useSrc2Input, HasMods>;
|
||||
}
|
||||
|
||||
// Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32.
|
||||
multiclass VOP3e_2_3_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>;
|
||||
|
||||
def _si : VOP3e_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
|
||||
VOP3DisableFields<1, useSrc2Input, HasMods>;
|
||||
|
||||
def _vi : VOP3e_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
|
||||
VOP3DisableFields<1, useSrc2Input, HasMods>;
|
||||
}
|
||||
|
||||
|
||||
// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
|
||||
multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
|
||||
string asm, list<dag> pattern = []> {
|
||||
let isPseudo = 1, isCodeGenOnly = 1 in {
|
||||
def "" : VOPAnyCommon <outs, ins, "", pattern>,
|
||||
SIMCInstr<opName, SIEncodingFamily.NONE>;
|
||||
}
|
||||
|
||||
def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
|
||||
SIMCInstr <opName, SIEncodingFamily.SI> {
|
||||
let AssemblerPredicates = [isSICI];
|
||||
let DecoderNamespace = "SICI";
|
||||
let DisableDecoder = DisableSIDecoder;
|
||||
}
|
||||
|
||||
def _vi : VOP3Common <outs, ins, asm, []>,
|
||||
VOP3e_vi <op.VI3>,
|
||||
VOP3DisableFields <1, 0, 0>,
|
||||
SIMCInstr <opName, SIEncodingFamily.VI> {
|
||||
let AssemblerPredicates = [isVI];
|
||||
let DecoderNamespace = "VI";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
|
||||
list<dag> pat64> {
|
||||
|
||||
defm _e32 : VOP1_m <op, opName, p, pat32>;
|
||||
|
||||
defm _e64 : VOP3_1_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
|
||||
p.HasModifiers>;
|
||||
|
||||
def _dpp : VOP1_DPP <op, opName, p>;
|
||||
|
||||
def _sdwa : VOP1_SDWA <op, opName, p>;
|
||||
}
|
||||
|
||||
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag> : VOP1_Helper <
|
||||
op, opName, P, [],
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
|
||||
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))])
|
||||
>;
|
||||
|
||||
multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag> {
|
||||
|
||||
defm _e32 : VOP1SI_m <op, opName, P, []>;
|
||||
|
||||
defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
|
||||
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))]),
|
||||
opName, P.HasModifiers>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
|
||||
list<dag> pat64, string revOp> {
|
||||
|
||||
defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
|
||||
|
||||
defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
|
||||
revOp, p.HasModifiers>;
|
||||
|
||||
def _dpp : VOP2_DPP <op, opName, p>;
|
||||
|
||||
def _sdwa : VOP2_SDWA <op, opName, p>;
|
||||
}
|
||||
|
||||
multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> : VOP2_Helper <
|
||||
op, opName, P, [],
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
revOp
|
||||
>;
|
||||
|
||||
multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> {
|
||||
|
||||
defm _e32 : VOP2SI_m <op, opName, P, [], revOp>;
|
||||
|
||||
defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
opName, revOp, P.HasModifiers>;
|
||||
}
|
||||
|
||||
multiclass VOP2e_Helper <vop2 op, string opName, VOPProfile p,
|
||||
list<dag> pat32, list<dag> pat64,
|
||||
string revOp, bit useSGPRInput> {
|
||||
|
||||
let SchedRW = [Write32Bit] in {
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
|
||||
defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
|
||||
}
|
||||
|
||||
defm _e64 : VOP3e_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
|
||||
opName, revOp, p.HasModifiers, useSGPRInput>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP2eInst <vop2 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> : VOP2e_Helper <
|
||||
op, opName, P, [],
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
revOp, !eq(P.NumSrcArgs, 3)
|
||||
>;
|
||||
|
||||
multiclass VOP2b_Helper <vop2 op, string opName, VOPProfile p,
|
||||
list<dag> pat32, list<dag> pat64,
|
||||
string revOp, bit useSGPRInput> {
|
||||
|
||||
let SchedRW = [Write32Bit, WriteSALU] in {
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
|
||||
defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
|
||||
}
|
||||
|
||||
defm _e64 : VOP3b_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
|
||||
opName, revOp, p.HasModifiers, useSGPRInput>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> : VOP2b_Helper <
|
||||
op, opName, P, [],
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
revOp, !eq(P.NumSrcArgs, 3)
|
||||
>;
|
||||
|
||||
// A VOP2 instruction that is VOP3-only on VI.
|
||||
multiclass VOP2_VI3_Helper <vop23 op, string opName, VOPProfile p,
|
||||
list<dag> pat32, list<dag> pat64, string revOp> {
|
||||
|
||||
defm _e32 : VOP2SI_m <op, opName, p, pat32, revOp>;
|
||||
|
||||
defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
|
||||
revOp, p.HasModifiers>;
|
||||
}
|
||||
|
||||
multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName>
|
||||
: VOP2_VI3_Helper <
|
||||
op, opName, P, [],
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
revOp
|
||||
>;
|
||||
|
||||
multiclass VOP2MADK <vop2 op, string opName, VOPProfile P, list<dag> pattern = []> {
|
||||
|
||||
def "" : VOP2_Pseudo <P.Outs, P.Ins32, pattern, opName>;
|
||||
|
||||
let isCodeGenOnly = 0 in {
|
||||
def _si : VOP2Common <P.Outs, P.Ins32,
|
||||
!strconcat(opName, P.Asm32), []>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.SI>,
|
||||
VOP2_MADKe <op.SI> {
|
||||
let AssemblerPredicates = [isSICI];
|
||||
let DecoderNamespace = "SICI";
|
||||
let DisableDecoder = DisableSIDecoder;
|
||||
}
|
||||
|
||||
def _vi : VOP2Common <P.Outs, P.Ins32,
|
||||
!strconcat(opName, P.Asm32), []>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.VI>,
|
||||
VOP2_MADKe <op.VI> {
|
||||
let AssemblerPredicates = [isVI];
|
||||
let DecoderNamespace = "VI";
|
||||
let DisableDecoder = DisableVIDecoder;
|
||||
}
|
||||
} // End isCodeGenOnly = 0
|
||||
}
|
||||
|
||||
class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
|
||||
(Inst i32:$src0_modifiers, P.Src0VT:$src0,
|
||||
i32:$src1_modifiers, P.Src1VT:$src1,
|
||||
i32:$src2_modifiers, P.Src2VT:$src2,
|
||||
i1:$clamp,
|
||||
i32:$omod)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Interpolation opcodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -35,236 +35,6 @@ let SubtargetPredicate = isGCN in {
|
||||
|
||||
defm EXP : EXP_m;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP1 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
|
||||
defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
|
||||
}
|
||||
|
||||
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
|
||||
} // End isMoveImm = 1
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
|
||||
// FIXME: Specify SchedRW for READFIRSTLANE_B32
|
||||
|
||||
def V_READFIRSTLANE_B32 : VOP1 <
|
||||
0x00000002,
|
||||
(outs SReg_32:$vdst),
|
||||
(ins VGPR_32:$src0),
|
||||
"v_readfirstlane_b32 $vdst, $src0",
|
||||
[(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]
|
||||
> {
|
||||
let isConvergent = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
|
||||
defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
|
||||
VOP_I32_F64, fp_to_sint
|
||||
>;
|
||||
defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
|
||||
VOP_F64_I32, sint_to_fp
|
||||
>;
|
||||
defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
|
||||
VOP_F32_I32, sint_to_fp
|
||||
>;
|
||||
defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
|
||||
VOP_F32_I32, uint_to_fp
|
||||
>;
|
||||
defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
|
||||
VOP_I32_F32, fp_to_uint
|
||||
>;
|
||||
defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
|
||||
VOP_I32_F32, fp_to_sint
|
||||
>;
|
||||
defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
|
||||
VOP_I32_F32, fp_to_f16
|
||||
>;
|
||||
defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
|
||||
VOP_F32_I32, f16_to_fp
|
||||
>;
|
||||
defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
|
||||
VOP_I32_F32, cvt_rpi_i32_f32>;
|
||||
defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
|
||||
VOP_I32_F32, cvt_flr_i32_f32>;
|
||||
defm V_CVT_OFF_F32_I4 : VOP1Inst <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>;
|
||||
defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
|
||||
VOP_F32_F64, fpround
|
||||
>;
|
||||
defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
|
||||
VOP_F64_F32, fpextend
|
||||
>;
|
||||
defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
|
||||
VOP_F32_I32, AMDGPUcvt_f32_ubyte0
|
||||
>;
|
||||
defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
|
||||
VOP_F32_I32, AMDGPUcvt_f32_ubyte1
|
||||
>;
|
||||
defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
|
||||
VOP_F32_I32, AMDGPUcvt_f32_ubyte2
|
||||
>;
|
||||
defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
|
||||
VOP_F32_I32, AMDGPUcvt_f32_ubyte3
|
||||
>;
|
||||
defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
|
||||
VOP_I32_F64, fp_to_uint
|
||||
>;
|
||||
defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
|
||||
VOP_F64_I32, uint_to_fp
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
|
||||
VOP_F32_F32, AMDGPUfract
|
||||
>;
|
||||
defm V_TRUNC_F32 : VOP1Inst <vop1<0x21, 0x1c>, "v_trunc_f32",
|
||||
VOP_F32_F32, ftrunc
|
||||
>;
|
||||
defm V_CEIL_F32 : VOP1Inst <vop1<0x22, 0x1d>, "v_ceil_f32",
|
||||
VOP_F32_F32, fceil
|
||||
>;
|
||||
defm V_RNDNE_F32 : VOP1Inst <vop1<0x23, 0x1e>, "v_rndne_f32",
|
||||
VOP_F32_F32, frint
|
||||
>;
|
||||
defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32",
|
||||
VOP_F32_F32, ffloor
|
||||
>;
|
||||
defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32",
|
||||
VOP_F32_F32, fexp2
|
||||
>;
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
|
||||
defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32",
|
||||
VOP_F32_F32, flog2
|
||||
>;
|
||||
defm V_RCP_F32 : VOP1Inst <vop1<0x2a, 0x22>, "v_rcp_f32",
|
||||
VOP_F32_F32, AMDGPUrcp
|
||||
>;
|
||||
defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32",
|
||||
VOP_F32_F32
|
||||
>;
|
||||
defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
|
||||
VOP_F32_F32, AMDGPUrsq
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
|
||||
defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64",
|
||||
VOP_F64_F64, AMDGPUrcp
|
||||
>;
|
||||
defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
|
||||
VOP_F64_F64, AMDGPUrsq
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteDouble];
|
||||
|
||||
defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
|
||||
VOP_F32_F32, fsqrt
|
||||
>;
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
|
||||
defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
|
||||
VOP_F64_F64, fsqrt
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteDouble]
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
|
||||
defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
|
||||
VOP_F32_F32, AMDGPUsin
|
||||
>;
|
||||
defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
|
||||
VOP_F32_F32, AMDGPUcos
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
|
||||
defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
|
||||
defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
|
||||
defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
|
||||
defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
|
||||
defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
|
||||
VOP_I32_F64, int_amdgcn_frexp_exp
|
||||
>;
|
||||
|
||||
let SchedRW = [WriteDoubleAdd] in {
|
||||
defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
|
||||
VOP_F64_F64, int_amdgcn_frexp_mant
|
||||
>;
|
||||
|
||||
defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
|
||||
VOP_F64_F64, AMDGPUfract
|
||||
>;
|
||||
} // End SchedRW = [WriteDoubleAdd]
|
||||
|
||||
|
||||
defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
|
||||
VOP_I32_F32, int_amdgcn_frexp_exp
|
||||
>;
|
||||
defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
|
||||
VOP_F32_F32, int_amdgcn_frexp_mant
|
||||
>;
|
||||
let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
|
||||
defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
|
||||
}
|
||||
|
||||
let Uses = [M0, EXEC] in {
|
||||
// v_movreld_b32 is a special case because the destination output
|
||||
// register is really a source. It isn't actually read (but may be
|
||||
// written), and is only to provide the base register to start
|
||||
// indexing from. Tablegen seems to not let you define an implicit
|
||||
// virtual register output for the super register being written into,
|
||||
// so this must have an implicit def of the register added to it.
|
||||
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_MOVRELD>;
|
||||
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_VI32_NO_EXT>;
|
||||
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
|
||||
|
||||
} // End Uses = [M0, EXEC]
|
||||
|
||||
// These instruction only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
|
||||
defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
|
||||
defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32",
|
||||
VOP_F32_F32, int_amdgcn_log_clamp>;
|
||||
defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
|
||||
defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32",
|
||||
VOP_F32_F32, AMDGPUrcp_legacy>;
|
||||
defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
|
||||
VOP_F32_F32, AMDGPUrsq_clamp
|
||||
>;
|
||||
defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
|
||||
VOP_F32_F32, AMDGPUrsq_legacy
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
|
||||
defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
|
||||
defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
|
||||
VOP_F64_F64, AMDGPUrsq_clamp
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteDouble]
|
||||
|
||||
} // End SubtargetPredicate = isSICI
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VINTRP Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -316,198 +86,6 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
|
||||
|
||||
} // End Uses = [M0, EXEC]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm V_CNDMASK_B32 : VOP2eInst <vop2<0x0, 0x0>, "v_cndmask_b32",
|
||||
VOP2e_I32_I32_I32_I1
|
||||
>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
|
||||
VOP_F32_F32_F32, fadd
|
||||
>;
|
||||
|
||||
defm V_SUB_F32 : VOP2Inst <vop2<0x4, 0x2>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
|
||||
defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32",
|
||||
VOP_F32_F32_F32, null_frag, "v_sub_f32"
|
||||
>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
let isCommutable = 1 in {
|
||||
|
||||
defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmul_legacy
|
||||
>;
|
||||
|
||||
defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
|
||||
VOP_F32_F32_F32, fmul
|
||||
>;
|
||||
|
||||
defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
|
||||
VOP_I32_I32_I32, AMDGPUmul_i24
|
||||
>;
|
||||
|
||||
defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
|
||||
VOP_I32_I32_I32, AMDGPUmulhi_i24
|
||||
>;
|
||||
|
||||
defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
|
||||
VOP_I32_I32_I32, AMDGPUmul_u24
|
||||
>;
|
||||
|
||||
defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
|
||||
VOP_I32_I32_I32, AMDGPUmulhi_u24
|
||||
>;
|
||||
|
||||
defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
|
||||
fminnum>;
|
||||
defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
|
||||
fmaxnum>;
|
||||
defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
|
||||
defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
|
||||
defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
|
||||
defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
|
||||
|
||||
defm V_LSHRREV_B32 : VOP2Inst <
|
||||
vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
|
||||
"v_lshr_b32"
|
||||
>;
|
||||
|
||||
defm V_ASHRREV_I32 : VOP2Inst <
|
||||
vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
|
||||
"v_ashr_i32"
|
||||
>;
|
||||
|
||||
defm V_LSHLREV_B32 : VOP2Inst <
|
||||
vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
|
||||
"v_lshl_b32"
|
||||
>;
|
||||
|
||||
defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
|
||||
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
|
||||
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
|
||||
|
||||
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
|
||||
isConvertibleToThreeAddress = 1 in {
|
||||
defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>;
|
||||
}
|
||||
} // End isCommutable = 1
|
||||
|
||||
defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32", VOP_MADMK>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32", VOP_MADAK>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
let isCommutable = 1 in {
|
||||
// No patterns so that the scalar instructions are always selected.
|
||||
// The scalar versions will be replaced with vector when needed later.
|
||||
|
||||
// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
|
||||
// but the VI instructions behave the same as the SI versions.
|
||||
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
|
||||
VOP2b_I32_I1_I32_I32
|
||||
>;
|
||||
defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
|
||||
|
||||
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
|
||||
VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
|
||||
>;
|
||||
|
||||
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
|
||||
VOP2b_I32_I1_I32_I32_I1
|
||||
>;
|
||||
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
|
||||
VOP2b_I32_I1_I32_I32_I1
|
||||
>;
|
||||
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
|
||||
VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
|
||||
>;
|
||||
|
||||
} // End isCommutable = 1
|
||||
|
||||
// These are special and do not read the exec mask.
|
||||
let isConvergent = 1, Uses = []<Register> in {
|
||||
|
||||
defm V_READLANE_B32 : VOP2SI_3VI_m <
|
||||
vop3 <0x001, 0x289>,
|
||||
"v_readlane_b32",
|
||||
(outs SReg_32:$vdst),
|
||||
(ins VGPR_32:$src0, SCSrc_b32:$src1),
|
||||
"v_readlane_b32 $vdst, $src0, $src1",
|
||||
[(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
defm V_WRITELANE_B32 : VOP2SI_3VI_m <
|
||||
vop3 <0x002, 0x28a>,
|
||||
"v_writelane_b32",
|
||||
(outs VGPR_32:$vdst),
|
||||
(ins SReg_32:$src0, SCSrc_b32:$src1),
|
||||
"v_writelane_b32 $vdst, $src0, $src1"
|
||||
>;
|
||||
|
||||
} // End isConvergent = 1
|
||||
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32",
|
||||
VOP_F32_F32_F32
|
||||
>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmin_legacy
|
||||
>;
|
||||
defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmax_legacy
|
||||
>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
|
||||
defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
|
||||
defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
|
||||
} // End isCommutable = 1
|
||||
} // End let SubtargetPredicate = SICI
|
||||
|
||||
defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
|
||||
VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
|
||||
>;
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
|
||||
VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
|
||||
>;
|
||||
defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
|
||||
VOP_F32_F32_I32, AMDGPUldexp
|
||||
>;
|
||||
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
|
||||
VOP_I32_F32_I32>; // TODO: set "Uses = dst"
|
||||
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
|
||||
VOP_I32_F32_F32
|
||||
>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
|
||||
VOP_I32_F32_F32
|
||||
>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
|
||||
VOP_I32_F32_F32, int_SI_packf16
|
||||
>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pseudo Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -11,163 +11,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP3a_vi <bits<10> op> : Enc64 {
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<9> src1;
|
||||
bits<2> src2_modifiers;
|
||||
bits<9> src2;
|
||||
bits<1> clamp;
|
||||
bits<2> omod;
|
||||
|
||||
let Inst{8} = src0_modifiers{1};
|
||||
let Inst{9} = src1_modifiers{1};
|
||||
let Inst{10} = src2_modifiers{1};
|
||||
let Inst{15} = clamp;
|
||||
let Inst{25-16} = op;
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = src0;
|
||||
let Inst{49-41} = src1;
|
||||
let Inst{58-50} = src2;
|
||||
let Inst{60-59} = omod;
|
||||
let Inst{61} = src0_modifiers{0};
|
||||
let Inst{62} = src1_modifiers{0};
|
||||
let Inst{63} = src2_modifiers{0};
|
||||
}
|
||||
|
||||
class VOP3e_vi <bits<10> op> : VOP3a_vi <op> {
|
||||
bits<8> vdst;
|
||||
|
||||
let Inst{7-0} = vdst;
|
||||
}
|
||||
|
||||
class VOP3be_vi <bits<10> op> : Enc64 {
|
||||
bits<8> vdst;
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<9> src1;
|
||||
bits<2> src2_modifiers;
|
||||
bits<9> src2;
|
||||
bits<7> sdst;
|
||||
bits<2> omod;
|
||||
bits<1> clamp;
|
||||
|
||||
let Inst{7-0} = vdst;
|
||||
let Inst{14-8} = sdst;
|
||||
let Inst{15} = clamp;
|
||||
let Inst{25-16} = op;
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = src0;
|
||||
let Inst{49-41} = src1;
|
||||
let Inst{58-50} = src2;
|
||||
let Inst{60-59} = omod;
|
||||
let Inst{61} = src0_modifiers{0};
|
||||
let Inst{62} = src1_modifiers{0};
|
||||
let Inst{63} = src2_modifiers{0};
|
||||
}
|
||||
|
||||
class VOP_DPP <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
let DPP = 1;
|
||||
let Size = 8;
|
||||
|
||||
let AsmMatchConverter = !if(!eq(HasMods,1), "cvtDPP", "");
|
||||
let AsmVariantName = AMDGPUAsmVariants.DPP;
|
||||
}
|
||||
|
||||
class VOP_DPPe : Enc64 {
|
||||
bits<2> src0_modifiers;
|
||||
bits<8> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<9> dpp_ctrl;
|
||||
bits<1> bound_ctrl;
|
||||
bits<4> bank_mask;
|
||||
bits<4> row_mask;
|
||||
|
||||
let Inst{39-32} = src0;
|
||||
let Inst{48-40} = dpp_ctrl;
|
||||
let Inst{51} = bound_ctrl;
|
||||
let Inst{52} = src0_modifiers{0}; // src0_neg
|
||||
let Inst{53} = src0_modifiers{1}; // src0_abs
|
||||
let Inst{54} = src1_modifiers{0}; // src1_neg
|
||||
let Inst{55} = src1_modifiers{1}; // src1_abs
|
||||
let Inst{59-56} = bank_mask;
|
||||
let Inst{63-60} = row_mask;
|
||||
}
|
||||
|
||||
class VOP1_DPPe <bits<8> op> : VOP_DPPe {
|
||||
bits<8> vdst;
|
||||
|
||||
let Inst{8-0} = 0xfa; // dpp
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{31-25} = 0x3f; //encoding
|
||||
}
|
||||
|
||||
class VOP2_DPPe <bits<6> op> : VOP_DPPe {
|
||||
bits<8> vdst;
|
||||
bits<8> src1;
|
||||
|
||||
let Inst{8-0} = 0xfa; //dpp
|
||||
let Inst{16-9} = src1;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; //encoding
|
||||
}
|
||||
|
||||
class VOP_SDWA <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
let SDWA = 1;
|
||||
let Size = 8;
|
||||
let AsmVariantName = AMDGPUAsmVariants.SDWA;
|
||||
}
|
||||
|
||||
class VOP_SDWAe : Enc64 {
|
||||
bits<8> src0;
|
||||
bits<3> src0_sel;
|
||||
bits<2> src0_fmodifiers; // {abs,neg}
|
||||
bits<1> src0_imodifiers; // sext
|
||||
bits<3> src1_sel;
|
||||
bits<2> src1_fmodifiers;
|
||||
bits<1> src1_imodifiers;
|
||||
bits<3> dst_sel;
|
||||
bits<2> dst_unused;
|
||||
bits<1> clamp;
|
||||
|
||||
let Inst{39-32} = src0;
|
||||
let Inst{42-40} = dst_sel;
|
||||
let Inst{44-43} = dst_unused;
|
||||
let Inst{45} = clamp;
|
||||
let Inst{50-48} = src0_sel;
|
||||
let Inst{53-52} = src0_fmodifiers;
|
||||
let Inst{51} = src0_imodifiers;
|
||||
let Inst{58-56} = src1_sel;
|
||||
let Inst{61-60} = src1_fmodifiers;
|
||||
let Inst{59} = src1_imodifiers;
|
||||
}
|
||||
|
||||
class VOP1_SDWAe <bits<8> op> : VOP_SDWAe {
|
||||
bits<8> vdst;
|
||||
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{31-25} = 0x3f; // encoding
|
||||
}
|
||||
|
||||
class VOP2_SDWAe <bits<6> op> : VOP_SDWAe {
|
||||
bits<8> vdst;
|
||||
bits<8> src1;
|
||||
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = src1;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; // encoding
|
||||
}
|
||||
|
||||
class EXPe_vi : EXPe {
|
||||
let Inst{31-26} = 0x31; //encoding
|
||||
}
|
||||
|
@ -8,109 +8,3 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction definitions for VI and newer.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
|
||||
|
||||
let DisableSIDecoder = 1 in {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP1 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm V_CVT_F16_U16 : VOP1Inst <vop1<0, 0x39>, "v_cvt_f16_u16", VOP_F16_I16>;
|
||||
defm V_CVT_F16_I16 : VOP1Inst <vop1<0, 0x3a>, "v_cvt_f16_i16", VOP_F16_I16>;
|
||||
defm V_CVT_U16_F16 : VOP1Inst <vop1<0, 0x3b>, "v_cvt_u16_f16", VOP_I16_F16>;
|
||||
defm V_CVT_I16_F16 : VOP1Inst <vop1<0, 0x3c>, "v_cvt_i16_f16", VOP_I16_F16>;
|
||||
defm V_RCP_F16 : VOP1Inst <vop1<0, 0x3d>, "v_rcp_f16", VOP_F16_F16>;
|
||||
defm V_SQRT_F16 : VOP1Inst <vop1<0, 0x3e>, "v_sqrt_f16", VOP_F16_F16>;
|
||||
defm V_RSQ_F16 : VOP1Inst <vop1<0, 0x3f>, "v_rsq_f16", VOP_F16_F16>;
|
||||
defm V_LOG_F16 : VOP1Inst <vop1<0, 0x40>, "v_log_f16", VOP_F16_F16>;
|
||||
defm V_EXP_F16 : VOP1Inst <vop1<0, 0x41>, "v_exp_f16", VOP_F16_F16>;
|
||||
defm V_FREXP_MANT_F16 : VOP1Inst <vop1<0, 0x42>, "v_frexp_mant_f16",
|
||||
VOP_F16_F16
|
||||
>;
|
||||
defm V_FREXP_EXP_I16_F16 : VOP1Inst <vop1<0, 0x43>, "v_frexp_exp_i16_f16",
|
||||
VOP_I16_F16
|
||||
>;
|
||||
defm V_FLOOR_F16 : VOP1Inst <vop1<0, 0x44>, "v_floor_f16", VOP_F16_F16>;
|
||||
defm V_CEIL_F16 : VOP1Inst <vop1<0, 0x45>, "v_ceil_f16", VOP_F16_F16>;
|
||||
defm V_TRUNC_F16 : VOP1Inst <vop1<0, 0x46>, "v_trunc_f16", VOP_F16_F16>;
|
||||
defm V_RNDNE_F16 : VOP1Inst <vop1<0, 0x47>, "v_rndne_f16", VOP_F16_F16>;
|
||||
defm V_FRACT_F16 : VOP1Inst <vop1<0, 0x48>, "v_fract_f16", VOP_F16_F16>;
|
||||
defm V_SIN_F16 : VOP1Inst <vop1<0, 0x49>, "v_sin_f16", VOP_F16_F16>;
|
||||
defm V_COS_F16 : VOP1Inst <vop1<0, 0x4a>, "v_cos_f16", VOP_F16_F16>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let isCommutable = 1 in {
|
||||
|
||||
defm V_ADD_F16 : VOP2Inst <vop2<0, 0x1f>, "v_add_f16", VOP_F16_F16_F16>;
|
||||
defm V_SUB_F16 : VOP2Inst <vop2<0, 0x20>, "v_sub_f16", VOP_F16_F16_F16>;
|
||||
defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16,
|
||||
null_frag, "v_sub_f16"
|
||||
>;
|
||||
defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>;
|
||||
defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>;
|
||||
} // End isCommutable = 1
|
||||
defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16", VOP_MADMK>;
|
||||
let isCommutable = 1 in {
|
||||
defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16", VOP_MADAK>;
|
||||
defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>;
|
||||
defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>;
|
||||
defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>;
|
||||
defm V_MUL_LO_U16 : VOP2Inst <vop2<0,0x29>, "v_mul_lo_u16", VOP_I16_I16_I16>;
|
||||
} // End isCommutable = 1
|
||||
defm V_LSHLREV_B16 : VOP2Inst <vop2<0,0x2a>, "v_lshlrev_b16", VOP_I16_I16_I16>;
|
||||
defm V_LSHRREV_B16 : VOP2Inst <vop2<0,0x2b>, "v_lshrrev_b16", VOP_I16_I16_I16>;
|
||||
defm V_ASHRREV_B16 : VOP2Inst <vop2<0,0x2c>, "v_ashrrev_b16", VOP_I16_I16_I16>;
|
||||
let isCommutable = 1 in {
|
||||
defm V_MAX_F16 : VOP2Inst <vop2<0,0x2d>, "v_max_f16", VOP_F16_F16_F16>;
|
||||
defm V_MIN_F16 : VOP2Inst <vop2<0,0x2e>, "v_min_f16", VOP_F16_F16_F16>;
|
||||
defm V_MAX_U16 : VOP2Inst <vop2<0,0x2f>, "v_max_u16", VOP_I16_I16_I16>;
|
||||
defm V_MAX_I16 : VOP2Inst <vop2<0,0x30>, "v_max_i16", VOP_I16_I16_I16>;
|
||||
defm V_MIN_U16 : VOP2Inst <vop2<0,0x31>, "v_min_u16", VOP_I16_I16_I16>;
|
||||
defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
|
||||
} // End isCommutable = 1
|
||||
defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
|
||||
|
||||
} // let DisableSIDecoder = 1
|
||||
|
||||
// Aliases to simplify matching of floating-point instructions that
|
||||
// are VOP2 on SI and VOP3 on VI.
|
||||
|
||||
class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
|
||||
name#" $dst, $src0, $src1",
|
||||
(inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
|
||||
>, PredicateControl {
|
||||
let UseInstAsmMatchConverter = 0;
|
||||
let AsmVariantName = AMDGPUAsmVariants.VOP3;
|
||||
}
|
||||
|
||||
def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
|
||||
|
||||
} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// DPP Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat <
|
||||
(int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
|
||||
imm:$bound_ctrl),
|
||||
(V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
|
||||
(as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Misc Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
} // End Predicates = [isVI]
|
||||
|
539
lib/Target/AMDGPU/VOP1Instructions.td
Normal file
539
lib/Target/AMDGPU/VOP1Instructions.td
Normal file
@ -0,0 +1,539 @@
|
||||
//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP1 Classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
|
||||
let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0);
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
let Inst{31-25} = 0x3f; //encoding
|
||||
}
|
||||
|
||||
class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
|
||||
InstSI <P.Outs32, P.Ins32, "", pattern>,
|
||||
VOP <opName>,
|
||||
SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
|
||||
MnemonicAlias<opName#"_e32", opName> {
|
||||
|
||||
let isPseudo = 1;
|
||||
let isCodeGenOnly = 1;
|
||||
let UseNamedOperandTable = 1;
|
||||
|
||||
string Mnemonic = opName;
|
||||
string AsmOperands = P.Asm32;
|
||||
|
||||
let Size = 4;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let SubtargetPredicate = isGCN;
|
||||
|
||||
let VOP1 = 1;
|
||||
let VALU = 1;
|
||||
let Uses = [EXEC];
|
||||
|
||||
let AsmVariantName = AMDGPUAsmVariants.Default;
|
||||
|
||||
VOPProfile Pfl = P;
|
||||
}
|
||||
|
||||
class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
|
||||
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
||||
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
|
||||
|
||||
let isPseudo = 0;
|
||||
let isCodeGenOnly = 0;
|
||||
|
||||
// copy relevant pseudo op flags
|
||||
let SubtargetPredicate = ps.SubtargetPredicate;
|
||||
let AsmMatchConverter = ps.AsmMatchConverter;
|
||||
let AsmVariantName = ps.AsmVariantName;
|
||||
let Constraints = ps.Constraints;
|
||||
let DisableEncoding = ps.DisableEncoding;
|
||||
let TSFlags = ps.TSFlags;
|
||||
}
|
||||
|
||||
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
|
||||
list<dag> ret = !if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
|
||||
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
|
||||
}
|
||||
|
||||
multiclass VOP1Inst <string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag> {
|
||||
def _e32 : VOP1_Pseudo <opName, P>;
|
||||
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP1 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let VOPAsmPrefer32Bit = 1 in {
|
||||
defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>;
|
||||
}
|
||||
|
||||
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
|
||||
} // End isMoveImm = 1
|
||||
|
||||
// FIXME: Specify SchedRW for READFIRSTLANE_B32
|
||||
// TODO: Make profile for this, there is VOP3 encoding also
|
||||
def V_READFIRSTLANE_B32 :
|
||||
InstSI <(outs SReg_32:$vdst),
|
||||
(ins VGPR_32:$src0),
|
||||
"v_readfirstlane_b32 $vdst, $src0",
|
||||
[(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
|
||||
Enc32 {
|
||||
|
||||
let isCodeGenOnly = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
|
||||
let Size = 4;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let SubtargetPredicate = isGCN;
|
||||
|
||||
let VOP1 = 1;
|
||||
let VALU = 1;
|
||||
let Uses = [EXEC];
|
||||
let isConvergent = 1;
|
||||
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
|
||||
let Inst{8-0} = src0;
|
||||
let Inst{16-9} = 0x2;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{31-25} = 0x3f; //encoding
|
||||
}
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
|
||||
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
|
||||
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
|
||||
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
|
||||
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
|
||||
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
|
||||
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
|
||||
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
|
||||
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
|
||||
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
|
||||
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
|
||||
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
|
||||
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
|
||||
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
|
||||
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
|
||||
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
|
||||
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
|
||||
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
|
||||
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
|
||||
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
|
||||
defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
|
||||
defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
|
||||
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
|
||||
defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
|
||||
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
|
||||
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>;
|
||||
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
|
||||
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
|
||||
} // End SchedRW = [WriteDouble];
|
||||
|
||||
defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>;
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>;
|
||||
} // End SchedRW = [WriteDouble]
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
|
||||
defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
|
||||
defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>;
|
||||
defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>;
|
||||
defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
|
||||
defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
|
||||
defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
|
||||
|
||||
let SchedRW = [WriteDoubleAdd] in {
|
||||
defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
|
||||
defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
|
||||
} // End SchedRW = [WriteDoubleAdd]
|
||||
|
||||
defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
|
||||
defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>;
|
||||
|
||||
let VOPAsmPrefer32Bit = 1 in {
|
||||
defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
|
||||
}
|
||||
|
||||
// Restrict src0 to be VGPR
|
||||
def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
|
||||
let Src0RC32 = VRegSrc_32;
|
||||
let Src0RC64 = VRegSrc_32;
|
||||
|
||||
let HasExt = 0;
|
||||
}
|
||||
|
||||
// Special case because there are no true output operands. Hack vdst
|
||||
// to be a src operand. The custom inserter must add a tied implicit
|
||||
// def and use of the super register since there seems to be no way to
|
||||
// add an implicit def of a virtual register in tablegen.
|
||||
def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
|
||||
let Src0RC32 = VOPDstOperand<VGPR_32>;
|
||||
let Src0RC64 = VOPDstOperand<VGPR_32>;
|
||||
|
||||
let Outs = (outs);
|
||||
let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
|
||||
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
|
||||
|
||||
let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
||||
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
||||
let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_modifiers, VCSrc_b32:$src0,
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel);
|
||||
|
||||
let Asm32 = getAsm32<1, 1>.ret;
|
||||
let Asm64 = getAsm64<1, 1, 0>.ret;
|
||||
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
|
||||
let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
|
||||
|
||||
let HasExt = 0;
|
||||
let HasDst = 0;
|
||||
let EmitDst = 1; // force vdst emission
|
||||
}
|
||||
|
||||
let Uses = [M0, EXEC] in {
|
||||
// v_movreld_b32 is a special case because the destination output
|
||||
// register is really a source. It isn't actually read (but may be
|
||||
// written), and is only to provide the base register to start
|
||||
// indexing from. Tablegen seems to not let you define an implicit
|
||||
// virtual register output for the super register being written into,
|
||||
// so this must have an implicit def of the register added to it.
|
||||
defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>;
|
||||
defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
|
||||
defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
|
||||
} // End Uses = [M0, EXEC]
|
||||
|
||||
// These instruction only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
|
||||
defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
|
||||
defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
|
||||
defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
|
||||
defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
|
||||
defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>;
|
||||
defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
|
||||
} // End SchedRW = [WriteDouble]
|
||||
|
||||
} // End SubtargetPredicate = isSICI
|
||||
|
||||
|
||||
let SubtargetPredicate = isCIVI in {
|
||||
|
||||
let SchedRW = [WriteDoubleAdd] in {
|
||||
defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>;
|
||||
defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>;
|
||||
defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>;
|
||||
defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>;
|
||||
} // End SchedRW = [WriteDoubleAdd]
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>;
|
||||
defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
} // End SubtargetPredicate = isCIVI
|
||||
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16>;
|
||||
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16>;
|
||||
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16>;
|
||||
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16>;
|
||||
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16>;
|
||||
defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16>;
|
||||
defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16>;
|
||||
defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16>;
|
||||
defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16>;
|
||||
defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16>;
|
||||
defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16>;
|
||||
defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16>;
|
||||
defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16>;
|
||||
defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16>;
|
||||
defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16>;
|
||||
defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16>;
|
||||
defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16>;
|
||||
defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16>;
|
||||
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass VOP1_Real_si <bits<9> op> {
|
||||
let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
|
||||
def _e32_si :
|
||||
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
|
||||
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
|
||||
def _e64_si :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
|
||||
VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
}
|
||||
|
||||
defm V_NOP : VOP1_Real_si <0x0>;
|
||||
defm V_MOV_B32 : VOP1_Real_si <0x1>;
|
||||
defm V_CVT_I32_F64 : VOP1_Real_si <0x3>;
|
||||
defm V_CVT_F64_I32 : VOP1_Real_si <0x4>;
|
||||
defm V_CVT_F32_I32 : VOP1_Real_si <0x5>;
|
||||
defm V_CVT_F32_U32 : VOP1_Real_si <0x6>;
|
||||
defm V_CVT_U32_F32 : VOP1_Real_si <0x7>;
|
||||
defm V_CVT_I32_F32 : VOP1_Real_si <0x8>;
|
||||
defm V_MOV_FED_B32 : VOP1_Real_si <0x9>;
|
||||
defm V_CVT_F16_F32 : VOP1_Real_si <0xa>;
|
||||
defm V_CVT_F32_F16 : VOP1_Real_si <0xb>;
|
||||
defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>;
|
||||
defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>;
|
||||
defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>;
|
||||
defm V_CVT_F32_F64 : VOP1_Real_si <0xf>;
|
||||
defm V_CVT_F64_F32 : VOP1_Real_si <0x10>;
|
||||
defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>;
|
||||
defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>;
|
||||
defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>;
|
||||
defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>;
|
||||
defm V_CVT_U32_F64 : VOP1_Real_si <0x15>;
|
||||
defm V_CVT_F64_U32 : VOP1_Real_si <0x16>;
|
||||
defm V_FRACT_F32 : VOP1_Real_si <0x20>;
|
||||
defm V_TRUNC_F32 : VOP1_Real_si <0x21>;
|
||||
defm V_CEIL_F32 : VOP1_Real_si <0x22>;
|
||||
defm V_RNDNE_F32 : VOP1_Real_si <0x23>;
|
||||
defm V_FLOOR_F32 : VOP1_Real_si <0x24>;
|
||||
defm V_EXP_F32 : VOP1_Real_si <0x25>;
|
||||
defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>;
|
||||
defm V_LOG_F32 : VOP1_Real_si <0x27>;
|
||||
defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>;
|
||||
defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>;
|
||||
defm V_RCP_F32 : VOP1_Real_si <0x2a>;
|
||||
defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>;
|
||||
defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>;
|
||||
defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>;
|
||||
defm V_RSQ_F32 : VOP1_Real_si <0x2e>;
|
||||
defm V_RCP_F64 : VOP1_Real_si <0x2f>;
|
||||
defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>;
|
||||
defm V_RSQ_F64 : VOP1_Real_si <0x31>;
|
||||
defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>;
|
||||
defm V_SQRT_F32 : VOP1_Real_si <0x33>;
|
||||
defm V_SQRT_F64 : VOP1_Real_si <0x34>;
|
||||
defm V_SIN_F32 : VOP1_Real_si <0x35>;
|
||||
defm V_COS_F32 : VOP1_Real_si <0x36>;
|
||||
defm V_NOT_B32 : VOP1_Real_si <0x37>;
|
||||
defm V_BFREV_B32 : VOP1_Real_si <0x38>;
|
||||
defm V_FFBH_U32 : VOP1_Real_si <0x39>;
|
||||
defm V_FFBL_B32 : VOP1_Real_si <0x3a>;
|
||||
defm V_FFBH_I32 : VOP1_Real_si <0x3b>;
|
||||
defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>;
|
||||
defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>;
|
||||
defm V_FRACT_F64 : VOP1_Real_si <0x3e>;
|
||||
defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>;
|
||||
defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>;
|
||||
defm V_CLREXCP : VOP1_Real_si <0x41>;
|
||||
defm V_MOVRELD_B32 : VOP1_Real_si <0x42>;
|
||||
defm V_MOVRELS_B32 : VOP1_Real_si <0x43>;
|
||||
defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CI
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass VOP1_Real_ci <bits<9> op> {
|
||||
let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in {
|
||||
def _e32_ci :
|
||||
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
|
||||
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
|
||||
def _e64_ci :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
|
||||
VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
}
|
||||
|
||||
defm V_TRUNC_F64 : VOP1_Real_ci <0x17>;
|
||||
defm V_CEIL_F64 : VOP1_Real_ci <0x18>;
|
||||
defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>;
|
||||
defm V_RNDNE_F64 : VOP1_Real_ci <0x19>;
|
||||
defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>;
|
||||
defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VI
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP1_SDWA <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
|
||||
VOP_SDWA <ps.OpName, P> {
|
||||
let Defs = ps.Defs;
|
||||
let Uses = ps.Uses;
|
||||
let SchedRW = ps.SchedRW;
|
||||
let hasSideEffects = ps.hasSideEffects;
|
||||
let AsmMatchConverter = "cvtSdwaVOP1";
|
||||
|
||||
bits<8> vdst;
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
let Inst{31-25} = 0x3f; // encoding
|
||||
}
|
||||
|
||||
class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
|
||||
VOP_DPP <ps.OpName, P> {
|
||||
let Defs = ps.Defs;
|
||||
let Uses = ps.Uses;
|
||||
let SchedRW = ps.SchedRW;
|
||||
let hasSideEffects = ps.hasSideEffects;
|
||||
|
||||
bits<8> vdst;
|
||||
let Inst{8-0} = 0xfa; // dpp
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
let Inst{31-25} = 0x3f; //encoding
|
||||
}
|
||||
|
||||
multiclass VOP1_Real_vi <bits<10> op> {
|
||||
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
|
||||
def _e32_vi :
|
||||
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
|
||||
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
// for now left sdwa/dpp only for asm/dasm
|
||||
// TODO: add corresponding pseudo
|
||||
def _sdwa : VOP1_SDWA<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
|
||||
def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
|
||||
}
|
||||
|
||||
defm V_NOP : VOP1_Real_vi <0x0>;
|
||||
defm V_MOV_B32 : VOP1_Real_vi <0x1>;
|
||||
defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>;
|
||||
defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>;
|
||||
defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
|
||||
defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
|
||||
defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
|
||||
defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
|
||||
defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
|
||||
defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
|
||||
defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
|
||||
defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>;
|
||||
defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>;
|
||||
defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>;
|
||||
defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>;
|
||||
defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>;
|
||||
defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>;
|
||||
defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>;
|
||||
defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>;
|
||||
defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>;
|
||||
defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>;
|
||||
defm V_FRACT_F32 : VOP1_Real_vi <0x1b>;
|
||||
defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>;
|
||||
defm V_CEIL_F32 : VOP1_Real_vi <0x1d>;
|
||||
defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>;
|
||||
defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>;
|
||||
defm V_EXP_F32 : VOP1_Real_vi <0x20>;
|
||||
defm V_LOG_F32 : VOP1_Real_vi <0x21>;
|
||||
defm V_RCP_F32 : VOP1_Real_vi <0x22>;
|
||||
defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>;
|
||||
defm V_RSQ_F32 : VOP1_Real_vi <0x24>;
|
||||
defm V_RCP_F64 : VOP1_Real_vi <0x25>;
|
||||
defm V_RSQ_F64 : VOP1_Real_vi <0x26>;
|
||||
defm V_SQRT_F32 : VOP1_Real_vi <0x27>;
|
||||
defm V_SQRT_F64 : VOP1_Real_vi <0x28>;
|
||||
defm V_SIN_F32 : VOP1_Real_vi <0x29>;
|
||||
defm V_COS_F32 : VOP1_Real_vi <0x2a>;
|
||||
defm V_NOT_B32 : VOP1_Real_vi <0x2b>;
|
||||
defm V_BFREV_B32 : VOP1_Real_vi <0x2c>;
|
||||
defm V_FFBH_U32 : VOP1_Real_vi <0x2d>;
|
||||
defm V_FFBL_B32 : VOP1_Real_vi <0x2e>;
|
||||
defm V_FFBH_I32 : VOP1_Real_vi <0x2f>;
|
||||
defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>;
|
||||
defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>;
|
||||
defm V_FRACT_F64 : VOP1_Real_vi <0x32>;
|
||||
defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>;
|
||||
defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>;
|
||||
defm V_CLREXCP : VOP1_Real_vi <0x35>;
|
||||
defm V_MOVRELD_B32 : VOP1_Real_vi <0x36>;
|
||||
defm V_MOVRELS_B32 : VOP1_Real_vi <0x37>;
|
||||
defm V_MOVRELSD_B32 : VOP1_Real_vi <0x38>;
|
||||
defm V_TRUNC_F64 : VOP1_Real_vi <0x17>;
|
||||
defm V_CEIL_F64 : VOP1_Real_vi <0x18>;
|
||||
defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>;
|
||||
defm V_RNDNE_F64 : VOP1_Real_vi <0x19>;
|
||||
defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>;
|
||||
defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>;
|
||||
defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>;
|
||||
defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>;
|
||||
defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>;
|
||||
defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>;
|
||||
defm V_RCP_F16 : VOP1_Real_vi <0x3d>;
|
||||
defm V_SQRT_F16 : VOP1_Real_vi <0x3e>;
|
||||
defm V_RSQ_F16 : VOP1_Real_vi <0x3f>;
|
||||
defm V_LOG_F16 : VOP1_Real_vi <0x40>;
|
||||
defm V_EXP_F16 : VOP1_Real_vi <0x41>;
|
||||
defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>;
|
||||
defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>;
|
||||
defm V_FLOOR_F16 : VOP1_Real_vi <0x44>;
|
||||
defm V_CEIL_F16 : VOP1_Real_vi <0x45>;
|
||||
defm V_TRUNC_F16 : VOP1_Real_vi <0x46>;
|
||||
defm V_RNDNE_F16 : VOP1_Real_vi <0x47>;
|
||||
defm V_FRACT_F16 : VOP1_Real_vi <0x48>;
|
||||
defm V_SIN_F16 : VOP1_Real_vi <0x49>;
|
||||
defm V_COS_F16 : VOP1_Real_vi <0x4a>;
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
||||
def : Pat <
|
||||
(int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
|
||||
imm:$bound_ctrl),
|
||||
(V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
|
||||
(as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
|
||||
>;
|
||||
|
||||
} // End Predicates = [isVI]
|
608
lib/Target/AMDGPU/VOP2Instructions.td
Normal file
608
lib/Target/AMDGPU/VOP2Instructions.td
Normal file
@ -0,0 +1,608 @@
|
||||
//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP2e <bits<6> op, VOPProfile P> : Enc32 {
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
bits<8> src1;
|
||||
|
||||
let Inst{8-0} = !if(P.HasSrc0, src0, 0);
|
||||
let Inst{16-9} = !if(P.HasSrc1, src1, 0);
|
||||
let Inst{24-17} = !if(P.EmitDst, vdst, 0);
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; //encoding
|
||||
}
|
||||
|
||||
class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
bits<8> src1;
|
||||
bits<32> imm;
|
||||
|
||||
let Inst{8-0} = !if(P.HasSrc0, src0, 0);
|
||||
let Inst{16-9} = !if(P.HasSrc1, src1, 0);
|
||||
let Inst{24-17} = !if(P.EmitDst, vdst, 0);
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; // encoding
|
||||
let Inst{63-32} = imm;
|
||||
}
|
||||
|
||||
class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> :
|
||||
InstSI <P.Outs32, P.Ins32, "", pattern>,
|
||||
VOP <opName>,
|
||||
SIMCInstr <opName#suffix, SIEncodingFamily.NONE>,
|
||||
MnemonicAlias<opName#suffix, opName> {
|
||||
|
||||
let isPseudo = 1;
|
||||
let isCodeGenOnly = 1;
|
||||
let UseNamedOperandTable = 1;
|
||||
|
||||
string Mnemonic = opName;
|
||||
string AsmOperands = P.Asm32;
|
||||
|
||||
let Size = 4;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let SubtargetPredicate = isGCN;
|
||||
|
||||
let VOP2 = 1;
|
||||
let VALU = 1;
|
||||
let Uses = [EXEC];
|
||||
|
||||
let AsmVariantName = AMDGPUAsmVariants.Default;
|
||||
|
||||
VOPProfile Pfl = P;
|
||||
}
|
||||
|
||||
class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
|
||||
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
||||
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
|
||||
|
||||
let isPseudo = 0;
|
||||
let isCodeGenOnly = 0;
|
||||
|
||||
// copy relevant pseudo op flags
|
||||
let SubtargetPredicate = ps.SubtargetPredicate;
|
||||
let AsmMatchConverter = ps.AsmMatchConverter;
|
||||
let AsmVariantName = ps.AsmVariantName;
|
||||
let Constraints = ps.Constraints;
|
||||
let DisableEncoding = ps.DisableEncoding;
|
||||
let TSFlags = ps.TSFlags;
|
||||
}
|
||||
|
||||
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
|
||||
list<dag> ret = !if(P.HasModifiers,
|
||||
[(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
|
||||
}
|
||||
|
||||
multiclass VOP2Inst <string opName,
|
||||
VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> {
|
||||
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
}
|
||||
|
||||
multiclass VOP2bInst <string opName,
|
||||
VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName,
|
||||
bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
|
||||
|
||||
let SchedRW = [Write32Bit, WriteSALU] in {
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
}
|
||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP2eInst <string opName,
|
||||
VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName,
|
||||
bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
|
||||
|
||||
let SchedRW = [Write32Bit] in {
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
}
|
||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
}
|
||||
}
|
||||
|
||||
def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
|
||||
field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
|
||||
field string Asm32 = "$vdst, $src0, $src1, $imm";
|
||||
field bit HasExt = 0;
|
||||
}
|
||||
|
||||
def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
|
||||
field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
|
||||
field string Asm32 = "$vdst, $src0, $imm, $src1";
|
||||
field bit HasExt = 0;
|
||||
}
|
||||
|
||||
def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
|
||||
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
|
||||
HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
|
||||
let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
|
||||
FP32InputMods:$src1_modifiers, Src1RC32:$src1,
|
||||
VGPR_32:$src2, // stub argument
|
||||
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
||||
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
||||
let InsSDWA = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
|
||||
FP32InputMods:$src1_modifiers, Src1RC32:$src1,
|
||||
VGPR_32:$src2, // stub argument
|
||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel);
|
||||
let Asm32 = getAsm32<1, 2, f32>.ret;
|
||||
let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
|
||||
let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
|
||||
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
|
||||
let HasSrc2 = 0;
|
||||
let HasSrc2Mods = 0;
|
||||
}
|
||||
|
||||
// Write out to vcc or arbitrary SGPR.
|
||||
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
|
||||
let Asm32 = "$vdst, vcc, $src0, $src1";
|
||||
let Asm64 = "$vdst, $sdst, $src0, $src1";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
}
|
||||
|
||||
// Write out to vcc or arbitrary SGPR and read in from vcc or
|
||||
// arbitrary SGPR.
|
||||
def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
|
||||
// We use VCSrc_b32 to exclude literal constants, even though the
|
||||
// encoding normally allows them since the implicit VCC use means
|
||||
// using one would always violate the constant bus
|
||||
// restriction. SGPRs are still allowed because it should
|
||||
// technically be possible to use VCC again as src0.
|
||||
let Src0RC32 = VCSrc_b32;
|
||||
let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
|
||||
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
|
||||
// Suppress src2 implied by type since the 32-bit encoding uses an
|
||||
// implicit VCC use.
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
|
||||
}
|
||||
|
||||
// Read in from vcc or arbitrary SGPR
|
||||
def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
|
||||
let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
|
||||
let Asm32 = "$vdst, $src0, $src1, vcc";
|
||||
let Asm64 = "$vdst, $src0, $src1, $src2";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst);
|
||||
|
||||
// Suppress src2 implied by type since the 32-bit encoding uses an
|
||||
// implicit VCC use.
|
||||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
|
||||
}
|
||||
|
||||
def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
|
||||
let Outs32 = (outs SReg_32:$vdst);
|
||||
let Outs64 = Outs32;
|
||||
let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1);
|
||||
let Ins64 = Ins32;
|
||||
let Asm32 = " $vdst, $src0, $src1";
|
||||
let Asm64 = Asm32;
|
||||
}
|
||||
|
||||
def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
|
||||
let Outs32 = (outs VGPR_32:$vdst);
|
||||
let Outs64 = Outs32;
|
||||
let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1);
|
||||
let Ins64 = Ins32;
|
||||
let Asm32 = " $vdst, $src0, $src1";
|
||||
let Asm64 = Asm32;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SubtargetPredicate = isGCN in {
|
||||
|
||||
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
|
||||
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
|
||||
defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
|
||||
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
|
||||
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
|
||||
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
|
||||
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
|
||||
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
|
||||
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
|
||||
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
|
||||
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
|
||||
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
|
||||
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>;
|
||||
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>;
|
||||
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>;
|
||||
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>;
|
||||
defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
|
||||
defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
|
||||
defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
|
||||
defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>;
|
||||
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>;
|
||||
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>;
|
||||
|
||||
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
|
||||
isConvertibleToThreeAddress = 1 in {
|
||||
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC>;
|
||||
}
|
||||
|
||||
def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK>;
|
||||
|
||||
// No patterns so that the scalar instructions are always selected.
|
||||
// The scalar versions will be replaced with vector when needed later.
|
||||
|
||||
// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
|
||||
// but the VI instructions behave the same as the SI versions.
|
||||
defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>;
|
||||
defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>;
|
||||
defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">;
|
||||
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
|
||||
defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
|
||||
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
|
||||
} // End isCommutable = 1
|
||||
|
||||
// These are special and do not read the exec mask.
|
||||
let isConvergent = 1, Uses = []<Register> in {
|
||||
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
|
||||
[(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">;
|
||||
|
||||
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">;
|
||||
} // End isConvergent = 1
|
||||
|
||||
defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
|
||||
defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>;
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
|
||||
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst"
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>;
|
||||
|
||||
} // End SubtargetPredicate = isGCN
|
||||
|
||||
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
|
||||
defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
|
||||
defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
|
||||
defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
|
||||
defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
} // End let SubtargetPredicate = SICI
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK>;
|
||||
defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
|
||||
defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
|
||||
defm V_ASHRREV_B16 : VOP2Inst <"v_ashrrev_b16", VOP_I16_I16_I16>;
|
||||
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16>;
|
||||
defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16>;
|
||||
defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
|
||||
defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16>;
|
||||
defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_F16_F16_F16>;
|
||||
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK>;
|
||||
defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
|
||||
defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
|
||||
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16>;
|
||||
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
|
||||
defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16>;
|
||||
defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16>;
|
||||
defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
|
||||
defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
|
||||
defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
|
||||
defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
|
||||
|
||||
multiclass VOP2_Real_si <bits<6> op> {
|
||||
def _si :
|
||||
VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_MADK_si <bits<6> op> {
|
||||
def _si : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_e32_si <bits<6> op> {
|
||||
def _e32_si :
|
||||
VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
|
||||
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
|
||||
def _e64_si :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
|
||||
VOP3e_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2be_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
|
||||
def _e64_si :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
|
||||
VOP3be_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
|
||||
|
||||
defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>;
|
||||
defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>;
|
||||
defm V_SUB_F32 : VOP2_Real_e32e64_si <0x4>;
|
||||
defm V_SUBREV_F32 : VOP2_Real_e32e64_si <0x5>;
|
||||
defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_si <0x7>;
|
||||
defm V_MUL_F32 : VOP2_Real_e32e64_si <0x8>;
|
||||
defm V_MUL_I32_I24 : VOP2_Real_e32e64_si <0x9>;
|
||||
defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_si <0xa>;
|
||||
defm V_MUL_U32_U24 : VOP2_Real_e32e64_si <0xb>;
|
||||
defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_si <0xc>;
|
||||
defm V_MIN_F32 : VOP2_Real_e32e64_si <0xf>;
|
||||
defm V_MAX_F32 : VOP2_Real_e32e64_si <0x10>;
|
||||
defm V_MIN_I32 : VOP2_Real_e32e64_si <0x11>;
|
||||
defm V_MAX_I32 : VOP2_Real_e32e64_si <0x12>;
|
||||
defm V_MIN_U32 : VOP2_Real_e32e64_si <0x13>;
|
||||
defm V_MAX_U32 : VOP2_Real_e32e64_si <0x14>;
|
||||
defm V_LSHRREV_B32 : VOP2_Real_e32e64_si <0x16>;
|
||||
defm V_ASHRREV_I32 : VOP2_Real_e32e64_si <0x18>;
|
||||
defm V_LSHLREV_B32 : VOP2_Real_e32e64_si <0x1a>;
|
||||
defm V_AND_B32 : VOP2_Real_e32e64_si <0x1b>;
|
||||
defm V_OR_B32 : VOP2_Real_e32e64_si <0x1c>;
|
||||
defm V_XOR_B32 : VOP2_Real_e32e64_si <0x1d>;
|
||||
defm V_MAC_F32 : VOP2_Real_e32e64_si <0x1f>;
|
||||
defm V_MADMK_F32 : VOP2_Real_MADK_si <0x20>;
|
||||
defm V_MADAK_F32 : VOP2_Real_MADK_si <0x21>;
|
||||
defm V_ADD_I32 : VOP2be_Real_e32e64_si <0x25>;
|
||||
defm V_SUB_I32 : VOP2be_Real_e32e64_si <0x26>;
|
||||
defm V_SUBREV_I32 : VOP2be_Real_e32e64_si <0x27>;
|
||||
defm V_ADDC_U32 : VOP2be_Real_e32e64_si <0x28>;
|
||||
defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>;
|
||||
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>;
|
||||
|
||||
defm V_READLANE_B32 : VOP2_Real_si <0x01>;
|
||||
defm V_WRITELANE_B32 : VOP2_Real_si <0x02>;
|
||||
|
||||
defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>;
|
||||
defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>;
|
||||
defm V_MAX_LEGACY_F32 : VOP2_Real_e32e64_si <0xe>;
|
||||
defm V_LSHR_B32 : VOP2_Real_e32e64_si <0x15>;
|
||||
defm V_ASHR_I32 : VOP2_Real_e32e64_si <0x17>;
|
||||
defm V_LSHL_B32 : VOP2_Real_e32e64_si <0x19>;
|
||||
|
||||
defm V_BFM_B32 : VOP2_Real_e32e64_si <0x1e>;
|
||||
defm V_BCNT_U32_B32 : VOP2_Real_e32e64_si <0x22>;
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2_Real_e32e64_si <0x23>;
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2_Real_e32e64_si <0x24>;
|
||||
defm V_LDEXP_F32 : VOP2_Real_e32e64_si <0x2b>;
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>;
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e32e64_si <0x2f>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2_Real_e32e64_si <0x30>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VI
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP2_SDWA <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
|
||||
VOP_SDWA <ps.OpName, P> {
|
||||
let Defs = ps.Defs;
|
||||
let Uses = ps.Uses;
|
||||
let SchedRW = ps.SchedRW;
|
||||
let hasSideEffects = ps.hasSideEffects;
|
||||
let AsmMatchConverter = "cvtSdwaVOP2";
|
||||
|
||||
bits<8> vdst;
|
||||
bits<8> src1;
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
|
||||
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; // encoding
|
||||
}
|
||||
|
||||
class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
|
||||
VOP_DPP <ps.OpName, P> {
|
||||
let Defs = ps.Defs;
|
||||
let Uses = ps.Uses;
|
||||
let SchedRW = ps.SchedRW;
|
||||
let hasSideEffects = ps.hasSideEffects;
|
||||
|
||||
bits<8> vdst;
|
||||
bits<8> src1;
|
||||
let Inst{8-0} = 0xfa; //dpp
|
||||
let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
|
||||
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; //encoding
|
||||
}
|
||||
|
||||
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
|
||||
|
||||
multiclass VOP32_Real_vi <bits<10> op> {
|
||||
def _vi :
|
||||
VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_MADK_vi <bits<6> op> {
|
||||
def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_e32_vi <bits<6> op> {
|
||||
def _e32_vi :
|
||||
VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
|
||||
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_e64_vi <bits<10> op> {
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> {
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
|
||||
VOP2_Real_e32_vi<op>,
|
||||
VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
|
||||
|
||||
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
|
||||
|
||||
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
|
||||
Base_VOP2_Real_e32e64_vi<op> {
|
||||
// for now left sdwa/dpp only for asm/dasm
|
||||
// TODO: add corresponding pseudo
|
||||
def _sdwa : VOP2_SDWA<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
|
||||
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
|
||||
}
|
||||
|
||||
defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>;
|
||||
defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>;
|
||||
defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>;
|
||||
defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>;
|
||||
defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>;
|
||||
defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>;
|
||||
defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>;
|
||||
defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>;
|
||||
defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>;
|
||||
defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>;
|
||||
defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>;
|
||||
defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>;
|
||||
defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>;
|
||||
defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>;
|
||||
defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>;
|
||||
defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>;
|
||||
defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>;
|
||||
defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>;
|
||||
defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>;
|
||||
defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>;
|
||||
defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>;
|
||||
defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>;
|
||||
defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>;
|
||||
defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>;
|
||||
defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>;
|
||||
defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>;
|
||||
defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>;
|
||||
defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>;
|
||||
defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>;
|
||||
defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>;
|
||||
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>;
|
||||
|
||||
defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
|
||||
defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
|
||||
|
||||
defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>;
|
||||
defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>;
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>;
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>;
|
||||
defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>;
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>;
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>;
|
||||
|
||||
defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>;
|
||||
defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>;
|
||||
defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>;
|
||||
defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>;
|
||||
defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>;
|
||||
defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>;
|
||||
defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>;
|
||||
defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>;
|
||||
defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>;
|
||||
defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>;
|
||||
defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>;
|
||||
defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>;
|
||||
defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>;
|
||||
defm V_ASHRREV_B16 : VOP2_Real_e32e64_vi <0x2c>;
|
||||
defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>;
|
||||
defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>;
|
||||
defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>;
|
||||
defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>;
|
||||
defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>;
|
||||
defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>;
|
||||
defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>;
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
// Aliases to simplify matching of floating-point instructions that
|
||||
// are VOP2 on SI and VOP3 on VI.
|
||||
class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
|
||||
name#" $dst, $src0, $src1",
|
||||
(inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
|
||||
>, PredicateControl {
|
||||
let UseInstAsmMatchConverter = 0;
|
||||
let AsmVariantName = AMDGPUAsmVariants.VOP3;
|
||||
}
|
||||
|
||||
def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
@ -39,7 +39,7 @@ class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
|
||||
}
|
||||
|
||||
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
|
||||
VOP3_PseudoNew<OpName, P,
|
||||
VOP3_Pseudo<OpName, P,
|
||||
!if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret),
|
||||
VOP3Only>;
|
||||
|
||||
@ -118,7 +118,7 @@ let Uses = [VCC, EXEC] in {
|
||||
// if (vcc)
|
||||
// result *= 2^32
|
||||
//
|
||||
def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
|
||||
def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
|
||||
getVOP3VCC<VOP_F32_F32_F32_F32_VCC, AMDGPUdiv_fmas>.ret> {
|
||||
let SchedRW = [WriteFloatFMA];
|
||||
}
|
||||
@ -127,7 +127,7 @@ def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
|
||||
// if (vcc)
|
||||
// result *= 2^64
|
||||
//
|
||||
def V_DIV_FMAS_F64 : VOP3_PseudoNew <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
|
||||
def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
|
||||
getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
|
||||
let SchedRW = [WriteDouble];
|
||||
}
|
||||
@ -165,12 +165,12 @@ def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_
|
||||
def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
|
||||
} // End SchedRW = [WriteDoubleAdd]
|
||||
|
||||
def V_DIV_SCALE_F32 : VOP3_PseudoNew <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
|
||||
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
|
||||
let SchedRW = [WriteFloatFMA, WriteSALU];
|
||||
}
|
||||
|
||||
// Double precision division pre-scale.
|
||||
def V_DIV_SCALE_F64 : VOP3_PseudoNew <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
|
||||
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
|
||||
let SchedRW = [WriteDouble, WriteSALU];
|
||||
}
|
||||
|
||||
@ -234,13 +234,13 @@ let isCommutable = 1 in {
|
||||
let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
|
||||
|
||||
multiclass VOP3_Real_si<bits<9> op> {
|
||||
def _si : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_siNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
|
||||
def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP3be_Real_si<bits<9> op> {
|
||||
def _si : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3be_siNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
|
||||
def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
|
||||
@ -303,8 +303,8 @@ defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass VOP3_Real_ci<bits<9> op> {
|
||||
def _ci : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_siNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl> {
|
||||
def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
|
||||
let AssemblerPredicates = [isCIOnly];
|
||||
let DecoderNamespace = "CI";
|
||||
}
|
||||
@ -323,13 +323,13 @@ defm V_MAD_I64_I32 : VOP3_Real_ci <0x177>;
|
||||
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
|
||||
|
||||
multiclass VOP3_Real_vi<bits<10> op> {
|
||||
def _vi : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3e_viNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
|
||||
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP3be_Real_vi<bits<10> op> {
|
||||
def _vi : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3be_viNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
|
||||
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
|
||||
|
@ -80,7 +80,7 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily> :
|
||||
}
|
||||
|
||||
// This class is used only with VOPC instructions. Use $sdst for out operand
|
||||
class VOPCInstAlias <VOP3_PseudoNew ps, Instruction inst, VOPProfile p = ps.Pfl> :
|
||||
class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
|
||||
InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
|
||||
|
||||
field bit isCompare;
|
||||
@ -128,7 +128,7 @@ multiclass VOPC_Pseudos <string opName,
|
||||
let isCompare = 1;
|
||||
let isCommutable = 1;
|
||||
}
|
||||
def _e64 : VOP3_PseudoNew<opName, P,
|
||||
def _e64 : VOP3_Pseudo<opName, P,
|
||||
!if(P.HasModifiers,
|
||||
[(set i1:$sdst,
|
||||
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
@ -398,10 +398,11 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
|
||||
VOPC_Profile<sched, vt, i32> {
|
||||
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
|
||||
let Asm64 = "$sdst, $src0_modifiers, $src1";
|
||||
let InsSDWA = (ins Src0Mod:$src0_fmodifiers, Src0RC64:$src0,
|
||||
Int32InputMods:$src1_imodifiers, Src1RC64:$src1,
|
||||
let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
|
||||
Int32InputMods:$src1_modifiers, Src1RC64:$src1,
|
||||
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
|
||||
let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel";
|
||||
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
|
||||
let HasSrc1Mods = 0;
|
||||
let HasClamp = 0;
|
||||
let HasOMod = 0;
|
||||
}
|
||||
@ -422,7 +423,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> {
|
||||
let SchedRW = p.Schedule;
|
||||
let isConvergent = DefExec;
|
||||
}
|
||||
def _e64 : VOP3_PseudoNew<opName, p, getVOPCClassPat64<p>.ret> {
|
||||
def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret> {
|
||||
let Defs = !if(DefExec, [EXEC], []);
|
||||
let SchedRW = p.Schedule;
|
||||
}
|
||||
@ -533,15 +534,15 @@ multiclass VOPC_Real_si <bits<9> op> {
|
||||
VOPCe<op{7-0}>;
|
||||
|
||||
def _e64_si :
|
||||
VOP3_Real<!cast<VOP3_PseudoNew>(NAME#"_e64"), SIEncodingFamily.SI>,
|
||||
VOP3a_siNew <op, !cast<VOP3_PseudoNew>(NAME#"_e64").Pfl> {
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
|
||||
VOP3a_si <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
|
||||
// Encoding used for VOPC instructions encoded as VOP3
|
||||
// Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
|
||||
bits<8> sdst;
|
||||
let Inst{7-0} = sdst;
|
||||
}
|
||||
}
|
||||
def : VOPCInstAlias <!cast<VOP3_PseudoNew>(NAME#"_e64"),
|
||||
def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
|
||||
!cast<Instruction>(NAME#"_e32_si")> {
|
||||
let AssemblerPredicate = isSICI;
|
||||
}
|
||||
@ -764,9 +765,15 @@ defm V_CMPX_CLASS_F64 : VOPC_Real_si <0xb8>;
|
||||
// VI
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOPC_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAeNew<P> {
|
||||
bits<8> src1;
|
||||
class VOPC_SDWA<bits<8> op, VOPC_Pseudo ps, VOPProfile P = ps.Pfl> :
|
||||
VOP_SDWA <ps.OpName, P> {
|
||||
let Defs = ps.Defs;
|
||||
let hasSideEffects = ps.hasSideEffects;
|
||||
let AsmMatchConverter = "cvtSdwaVOPC";
|
||||
let isCompare = ps.isCompare;
|
||||
let isCommutable = ps.isCommutable;
|
||||
|
||||
bits<8> src1;
|
||||
let Inst{8-0} = 0xf9; // sdwa
|
||||
let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
|
||||
let Inst{24-17} = op;
|
||||
@ -777,21 +784,6 @@ class VOPC_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAeNew<P> {
|
||||
let Inst{44-43} = SDWA_UNUSED_PRESERVE;
|
||||
}
|
||||
|
||||
class VOPC_SDWA<bits<8> op, VOPC_Pseudo ps, VOPProfile p = ps.Pfl> :
|
||||
VOP_SDWA <p.OutsSDWA, p.InsSDWA, ps.OpName#p.AsmSDWA, [], p.HasModifiers>,
|
||||
VOPC_SDWAe <op, p> {
|
||||
let Defs = ps.Defs;
|
||||
let hasSideEffects = ps.hasSideEffects;
|
||||
let AsmMatchConverter = "cvtSdwaVOPC";
|
||||
let SubtargetPredicate = isVI;
|
||||
let AssemblerPredicate = !if(p.HasExt, isVI, DisableInst);
|
||||
let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
|
||||
AMDGPUAsmVariants.Disable);
|
||||
let DecoderNamespace = "SDWA";
|
||||
let isCompare = ps.isCompare;
|
||||
let isCommutable = ps.isCommutable;
|
||||
}
|
||||
|
||||
multiclass VOPC_Real_vi <bits<10> op> {
|
||||
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
|
||||
def _e32_vi :
|
||||
@ -799,8 +791,8 @@ multiclass VOPC_Real_vi <bits<10> op> {
|
||||
VOPCe<op{7-0}>;
|
||||
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_PseudoNew>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3a_viNew <op, !cast<VOP3_PseudoNew>(NAME#"_e64").Pfl> {
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3a_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
|
||||
// Encoding used for VOPC instructions encoded as VOP3
|
||||
// Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
|
||||
bits<8> sdst;
|
||||
@ -812,7 +804,7 @@ multiclass VOPC_Real_vi <bits<10> op> {
|
||||
// TODO: add corresponding pseudo
|
||||
def _sdwa : VOPC_SDWA<op{7-0}, !cast<VOPC_Pseudo>(NAME#"_e32")>;
|
||||
|
||||
def : VOPCInstAlias <!cast<VOP3_PseudoNew>(NAME#"_e64"),
|
||||
def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
|
||||
!cast<Instruction>(NAME#"_e32_vi")> {
|
||||
let AssemblerPredicate = isVI;
|
||||
}
|
||||
|
@ -7,7 +7,70 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP3_PseudoNew <string opName, VOPProfile P, list<dag> pattern, bit VOP3Only = 0> :
|
||||
// dummies for outer let
|
||||
class LetDummies {
|
||||
bit isCommutable;
|
||||
bit isConvertibleToThreeAddress;
|
||||
bit isMoveImm;
|
||||
bit isReMaterializable;
|
||||
bit isAsCheapAsAMove;
|
||||
bit VOPAsmPrefer32Bit;
|
||||
Predicate SubtargetPredicate;
|
||||
string Constraints;
|
||||
string DisableEncoding;
|
||||
list<SchedReadWrite> SchedRW;
|
||||
list<Register> Uses;
|
||||
list<Register> Defs;
|
||||
}
|
||||
|
||||
class VOP <string opName> {
|
||||
string OpName = opName;
|
||||
}
|
||||
|
||||
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
let VALU = 1;
|
||||
}
|
||||
|
||||
class VOP3Common <dag outs, dag ins, string asm = "",
|
||||
list<dag> pattern = [], bit HasMods = 0,
|
||||
bit VOP3Only = 0> :
|
||||
VOPAnyCommon <outs, ins, asm, pattern> {
|
||||
|
||||
// Using complex patterns gives VOP3 patterns a very high complexity rating,
|
||||
// but standalone patterns are almost always prefered, so we need to adjust the
|
||||
// priority lower. The goal is to use a high number to reduce complexity to
|
||||
// zero (or less than zero).
|
||||
let AddedComplexity = -1000;
|
||||
|
||||
let VOP3 = 1;
|
||||
let VALU = 1;
|
||||
let Uses = [EXEC];
|
||||
|
||||
let AsmMatchConverter =
|
||||
!if(!eq(VOP3Only,1),
|
||||
"cvtVOP3",
|
||||
!if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
|
||||
|
||||
let AsmVariantName = AMDGPUAsmVariants.VOP3;
|
||||
|
||||
let isCodeGenOnly = 0;
|
||||
|
||||
int Size = 8;
|
||||
|
||||
// Because SGPRs may be allowed if there are multiple operands, we
|
||||
// need a post-isel hook to insert copies in order to avoid
|
||||
// violating constant bus requirements.
|
||||
let hasPostISelHook = 1;
|
||||
}
|
||||
|
||||
|
||||
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
|
||||
InstSI <P.Outs64, P.Ins64, "", pattern>,
|
||||
VOP <opName>,
|
||||
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
|
||||
@ -50,7 +113,7 @@ class VOP3_PseudoNew <string opName, VOPProfile P, list<dag> pattern, bit VOP3On
|
||||
VOPProfile Pfl = P;
|
||||
}
|
||||
|
||||
class VOP3_Real <VOP3_PseudoNew ps, int EncodingFamily> :
|
||||
class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
|
||||
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
||||
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
|
||||
|
||||
@ -66,7 +129,7 @@ class VOP3_Real <VOP3_PseudoNew ps, int EncodingFamily> :
|
||||
let TSFlags = ps.TSFlags;
|
||||
}
|
||||
|
||||
class VOP3aNew<VOPProfile P> : Enc64 {
|
||||
class VOP3a<VOPProfile P> : Enc64 {
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<2> src1_modifiers;
|
||||
@ -81,7 +144,7 @@ class VOP3aNew<VOPProfile P> : Enc64 {
|
||||
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
|
||||
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = src0;
|
||||
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
|
||||
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
|
||||
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
|
||||
let Inst{60-59} = !if(P.HasOMod, omod, 0);
|
||||
@ -90,27 +153,27 @@ class VOP3aNew<VOPProfile P> : Enc64 {
|
||||
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
|
||||
}
|
||||
|
||||
class VOP3a_siNew <bits<9> op, VOPProfile P> : VOP3aNew<P> {
|
||||
class VOP3a_si <bits<9> op, VOPProfile P> : VOP3a<P> {
|
||||
let Inst{25-17} = op;
|
||||
let Inst{11} = !if(P.HasClamp, clamp, 0);
|
||||
let Inst{11} = !if(P.HasClamp, clamp{0}, 0);
|
||||
}
|
||||
|
||||
class VOP3a_viNew <bits<10> op, VOPProfile P> : VOP3aNew<P> {
|
||||
class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
|
||||
let Inst{25-16} = op;
|
||||
let Inst{15} = !if(P.HasClamp, clamp, 0);
|
||||
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
|
||||
}
|
||||
|
||||
class VOP3e_siNew <bits<9> op, VOPProfile P> : VOP3a_siNew <op, P> {
|
||||
class VOP3e_si <bits<9> op, VOPProfile P> : VOP3a_si <op, P> {
|
||||
bits<8> vdst;
|
||||
let Inst{7-0} = vdst;
|
||||
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
}
|
||||
|
||||
class VOP3e_viNew <bits<10> op, VOPProfile P> : VOP3a_viNew <op, P> {
|
||||
class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
|
||||
bits<8> vdst;
|
||||
let Inst{7-0} = vdst;
|
||||
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
}
|
||||
|
||||
class VOP3beNew <VOPProfile P> : Enc64 {
|
||||
class VOP3be <VOPProfile P> : Enc64 {
|
||||
bits<8> vdst;
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
@ -133,24 +196,22 @@ class VOP3beNew <VOPProfile P> : Enc64 {
|
||||
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
|
||||
}
|
||||
|
||||
class VOP3be_siNew <bits<9> op, VOPProfile P> : VOP3beNew<P> {
|
||||
class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
|
||||
let Inst{25-17} = op;
|
||||
}
|
||||
|
||||
class VOP3be_viNew <bits<10> op, VOPProfile P> : VOP3beNew<P> {
|
||||
class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
|
||||
bits<1> clamp;
|
||||
let Inst{25-16} = op;
|
||||
let Inst{15} = !if(P.HasClamp, clamp, 0);
|
||||
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
|
||||
}
|
||||
|
||||
class VOP_SDWAeNew<VOPProfile P> : Enc64 {
|
||||
class VOP_SDWAe<VOPProfile P> : Enc64 {
|
||||
bits<8> src0;
|
||||
bits<3> src0_sel;
|
||||
bits<2> src0_fmodifiers; // {abs,neg}
|
||||
bits<1> src0_imodifiers; // sext
|
||||
bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
|
||||
bits<3> src1_sel;
|
||||
bits<2> src1_fmodifiers;
|
||||
bits<1> src1_imodifiers;
|
||||
bits<2> src1_modifiers;
|
||||
bits<3> dst_sel;
|
||||
bits<2> dst_unused;
|
||||
bits<1> clamp;
|
||||
@ -159,16 +220,77 @@ class VOP_SDWAeNew<VOPProfile P> : Enc64 {
|
||||
bits<2> SDWA_UNUSED_PRESERVE = 2;
|
||||
|
||||
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
|
||||
let Inst{42-40} = !if(P.HasDst, dst_sel{2-0}, SDWA_DWORD{2-0});
|
||||
let Inst{44-43} = !if(P.HasDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0});
|
||||
let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA_DWORD{2-0});
|
||||
let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0});
|
||||
let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
|
||||
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA_DWORD{2-0});
|
||||
let Inst{53-52} = !if(P.HasSrc0Mods, src0_fmodifiers{1-0}, 0);
|
||||
let Inst{51} = !if(P.HasSrc0IntMods, src0_imodifiers{0}, 0);
|
||||
let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
|
||||
let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
|
||||
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA_DWORD{2-0});
|
||||
let Inst{61-60} = !if(P.HasSrc1Mods, src1_fmodifiers{1-0}, 0);
|
||||
let Inst{59} = !if(P.HasSrc1IntMods, src1_imodifiers{0}, 0);
|
||||
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
|
||||
let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
|
||||
}
|
||||
|
||||
class VOP_SDWA <string OpName, VOPProfile P> :
|
||||
InstSI <P.OutsSDWA, P.InsSDWA, OpName#P.AsmSDWA, []>,
|
||||
VOP_SDWAe<P> {
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
let VALU = 1;
|
||||
let SDWA = 1;
|
||||
let Size = 8;
|
||||
|
||||
let SubtargetPredicate = isVI;
|
||||
let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
|
||||
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
|
||||
AMDGPUAsmVariants.Disable);
|
||||
let DecoderNamespace = "SDWA";
|
||||
}
|
||||
|
||||
class VOP_DPPe<VOPProfile P> : Enc64 {
|
||||
bits<2> src0_modifiers;
|
||||
bits<8> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<9> dpp_ctrl;
|
||||
bits<1> bound_ctrl;
|
||||
bits<4> bank_mask;
|
||||
bits<4> row_mask;
|
||||
|
||||
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
|
||||
let Inst{48-40} = dpp_ctrl;
|
||||
let Inst{51} = bound_ctrl;
|
||||
let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
|
||||
let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
|
||||
let Inst{54} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg
|
||||
let Inst{55} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs
|
||||
let Inst{59-56} = bank_mask;
|
||||
let Inst{63-60} = row_mask;
|
||||
}
|
||||
|
||||
class VOP_DPP <string OpName, VOPProfile P> :
|
||||
InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
|
||||
VOP_DPPe<P> {
|
||||
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
|
||||
let VALU = 1;
|
||||
let DPP = 1;
|
||||
let Size = 8;
|
||||
|
||||
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
|
||||
let SubtargetPredicate = isVI;
|
||||
let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
|
||||
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
|
||||
AMDGPUAsmVariants.Disable);
|
||||
let DecoderNamespace = "DPP";
|
||||
}
|
||||
|
||||
include "VOPCInstructions.td"
|
||||
include "VOP1Instructions.td"
|
||||
include "VOP2Instructions.td"
|
||||
include "VOP3Instructions.td"
|
||||
|
Loading…
Reference in New Issue
Block a user