mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[AMDGPU][MC][GFX9] Added support of VOP3 'op_sel' modifier
See bug 33591: https://bugs.llvm.org//show_bug.cgi?id=33591 Reviewers: vpykhtin, artem.tamazov, SamWot, arsenm Differential Revision: https://reviews.llvm.org/D35424 llvm-svn: 308740
This commit is contained in:
parent
030b7675c5
commit
a902d05ab9
@ -174,6 +174,14 @@ private:
|
||||
bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
||||
SDValue &Clamp) const;
|
||||
|
||||
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||
bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
||||
SDValue &Clamp) const;
|
||||
|
||||
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||
bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
||||
SDValue &Clamp) const;
|
||||
|
||||
void SelectADD_SUB_I64(SDNode *N);
|
||||
void SelectUADDO_USUBO(SDNode *N);
|
||||
void SelectDIV_SCALE(SDNode *N);
|
||||
@ -1864,6 +1872,42 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
|
||||
return SelectVOP3PMods(In, Src, SrcMods);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const {
|
||||
Src = In;
|
||||
// FIXME: Handle op_sel
|
||||
SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods,
|
||||
SDValue &Clamp) const {
|
||||
SDLoc SL(In);
|
||||
|
||||
// FIXME: Handle clamp
|
||||
Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
|
||||
|
||||
return SelectVOP3OpSel(In, Src, SrcMods);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const {
|
||||
// FIXME: Handle op_sel
|
||||
return SelectVOP3Mods(In, Src, SrcMods);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods,
|
||||
SDValue &Clamp) const {
|
||||
SDLoc SL(In);
|
||||
|
||||
// FIXME: Handle clamp
|
||||
Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
|
||||
|
||||
return SelectVOP3OpSelMods(In, Src, SrcMods);
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
||||
const AMDGPUTargetLowering& Lowering =
|
||||
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
|
||||
|
@ -1060,6 +1060,7 @@ public:
|
||||
|
||||
void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
|
||||
OptionalImmIndexMap &OptionalIdx);
|
||||
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
|
||||
|
||||
@ -2688,7 +2689,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
|
||||
|
||||
// FIXME: How to verify the number of elements matches the number of src
|
||||
// operands?
|
||||
for (int I = 0; I < 3; ++I) {
|
||||
for (int I = 0; I < 4; ++I) {
|
||||
if (I != 0) {
|
||||
if (getLexer().is(AsmToken::RBrac))
|
||||
break;
|
||||
@ -4088,6 +4089,30 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
|
||||
return MatchOperand_NoMatch;
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
|
||||
cvtVOP3P(Inst, Operands);
|
||||
|
||||
int Opc = Inst.getOpcode();
|
||||
|
||||
int SrcNum;
|
||||
const int Ops[] = { AMDGPU::OpName::src0,
|
||||
AMDGPU::OpName::src1,
|
||||
AMDGPU::OpName::src2 };
|
||||
for (SrcNum = 0;
|
||||
SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
|
||||
++SrcNum);
|
||||
assert(SrcNum > 0);
|
||||
|
||||
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
|
||||
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
|
||||
|
||||
if ((OpSel & (1 << SrcNum)) != 0) {
|
||||
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
|
||||
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
|
||||
Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
|
||||
}
|
||||
}
|
||||
|
||||
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
|
||||
// 1. This operand is input modifiers
|
||||
return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
|
||||
@ -4172,7 +4197,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
|
||||
int Opc = Inst.getOpcode();
|
||||
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
|
||||
|
||||
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
|
||||
if (OpSelHiIdx != -1) {
|
||||
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
|
||||
}
|
||||
|
||||
int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
|
||||
if (NegLoIdx != -1) {
|
||||
@ -4188,13 +4217,16 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
|
||||
AMDGPU::OpName::src2_modifiers };
|
||||
|
||||
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
|
||||
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
|
||||
|
||||
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
|
||||
unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
|
||||
unsigned OpSelHi = 0;
|
||||
unsigned NegLo = 0;
|
||||
unsigned NegHi = 0;
|
||||
|
||||
if (OpSelHiIdx != -1) {
|
||||
OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
|
||||
}
|
||||
|
||||
if (NegLoIdx != -1) {
|
||||
int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
|
||||
NegLo = Inst.getOperand(NegLoIdx).getImm();
|
||||
|
@ -803,7 +803,8 @@ void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
|
||||
}
|
||||
}
|
||||
|
||||
static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
|
||||
static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
|
||||
bool HasDstSel) {
|
||||
int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
|
||||
|
||||
for (int I = 0; I < NumOps; ++I) {
|
||||
@ -811,11 +812,16 @@ static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
|
||||
raw_ostream &O) {
|
||||
void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
|
||||
StringRef Name,
|
||||
unsigned Mod,
|
||||
raw_ostream &O) {
|
||||
unsigned Opc = MI->getOpcode();
|
||||
int NumOps = 0;
|
||||
int Ops[3];
|
||||
@ -830,7 +836,12 @@ static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
|
||||
Ops[NumOps++] = MI->getOperand(Idx).getImm();
|
||||
}
|
||||
|
||||
if (allOpsDefaultValue(Ops, NumOps, Mod))
|
||||
const bool HasDstSel =
|
||||
NumOps > 0 &&
|
||||
Mod == SISrcMods::OP_SEL_0 &&
|
||||
MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
|
||||
|
||||
if (allOpsDefaultValue(Ops, NumOps, Mod, HasDstSel))
|
||||
return;
|
||||
|
||||
O << Name;
|
||||
@ -841,6 +852,10 @@ static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
|
||||
O << !!(Ops[I] & Mod);
|
||||
}
|
||||
|
||||
if (HasDstSel) {
|
||||
O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
|
||||
}
|
||||
|
||||
O << ']';
|
||||
}
|
||||
|
||||
|
@ -127,6 +127,8 @@ private:
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
|
||||
raw_ostream &O);
|
||||
void printOpSel(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printOpSelHi(const MCInst *MI, unsigned OpNo,
|
||||
|
@ -67,7 +67,8 @@ enum : uint64_t {
|
||||
SCALAR_STORE = UINT64_C(1) << 39,
|
||||
FIXED_SIZE = UINT64_C(1) << 40,
|
||||
VOPAsmPrefer32Bit = UINT64_C(1) << 41,
|
||||
HasFPClamp = UINT64_C(1) << 42
|
||||
HasFPClamp = UINT64_C(1) << 42,
|
||||
VOP3_OPSEL = UINT64_C(1) << 43
|
||||
};
|
||||
|
||||
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
|
||||
@ -137,7 +138,8 @@ namespace SISrcMods {
|
||||
SEXT = 1 << 0, // Integer sign-extend modifier
|
||||
NEG_HI = ABS, // Floating-point negate high packed component modifier.
|
||||
OP_SEL_0 = 1 << 2,
|
||||
OP_SEL_1 = 1 << 3
|
||||
OP_SEL_1 = 1 << 3,
|
||||
DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1)
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -83,6 +83,10 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
// the clamp modifier has floating point semantics.
|
||||
field bit FPClamp = 0;
|
||||
|
||||
// This bit indicates that this is a VOP3 opcode which supports op_sel
|
||||
// modifier (gfx9 only).
|
||||
field bit VOP3_OPSEL = 0;
|
||||
|
||||
// These need to be kept in sync with the enum in SIInstrFlags.
|
||||
let TSFlags{0} = SALU;
|
||||
let TSFlags{1} = VALU;
|
||||
@ -127,6 +131,7 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
let TSFlags{40} = FixedSize;
|
||||
let TSFlags{41} = VOPAsmPrefer32Bit;
|
||||
let TSFlags{42} = FPClamp;
|
||||
let TSFlags{43} = VOP3_OPSEL;
|
||||
|
||||
let SchedRW = [Write32Bit];
|
||||
|
||||
|
@ -659,6 +659,15 @@ class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass>
|
||||
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
|
||||
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
|
||||
|
||||
class OpSelModsMatchClass : AsmOperandClass {
|
||||
let Name = "OpSelMods";
|
||||
let ParserMethod = "parseRegOrImm";
|
||||
let PredicateMethod = "isRegOrImm";
|
||||
}
|
||||
|
||||
def IntOpSelModsMatchClass : OpSelModsMatchClass;
|
||||
def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
|
||||
|
||||
def FPRegSDWAInputModsMatchClass : AsmOperandClass {
|
||||
let Name = "SDWARegWithFPInputMods";
|
||||
let ParserMethod = "parseRegWithFPInputMods";
|
||||
@ -750,6 +759,11 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
|
||||
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
|
||||
def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
|
||||
|
||||
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
|
||||
def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
|
||||
|
||||
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
|
||||
def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI assembler operands
|
||||
@ -771,6 +785,7 @@ def SRCMODS {
|
||||
int NEG_HI = ABS;
|
||||
int OP_SEL_0 = 4;
|
||||
int OP_SEL_1 = 8;
|
||||
int DST_OP_SEL = 8;
|
||||
}
|
||||
|
||||
def DSTCLAMP {
|
||||
@ -1020,6 +1035,10 @@ class getSrcMod <ValueType VT> {
|
||||
);
|
||||
}
|
||||
|
||||
class getOpSelMod <ValueType VT> {
|
||||
Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods);
|
||||
}
|
||||
|
||||
// Return type of input modifiers operand specified input operand for DPP
|
||||
class getSrcModExt <ValueType VT> {
|
||||
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
|
||||
@ -1133,6 +1152,37 @@ class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
|
||||
);
|
||||
}
|
||||
|
||||
class getInsVOP3OpSel <RegisterOperand Src0RC,
|
||||
RegisterOperand Src1RC,
|
||||
RegisterOperand Src2RC,
|
||||
int NumSrcArgs,
|
||||
bit HasClamp,
|
||||
Operand Src0Mod,
|
||||
Operand Src1Mod,
|
||||
Operand Src2Mod> {
|
||||
dag ret = !if (!eq(NumSrcArgs, 2),
|
||||
!if (HasClamp,
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
clampmod:$clamp,
|
||||
op_sel:$op_sel),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
op_sel:$op_sel)),
|
||||
// else NumSrcArgs == 3
|
||||
!if (HasClamp,
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
||||
clampmod:$clamp,
|
||||
op_sel:$op_sel),
|
||||
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
||||
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
||||
op_sel:$op_sel))
|
||||
);
|
||||
}
|
||||
|
||||
class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
||||
bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
|
||||
|
||||
@ -1279,6 +1329,34 @@ class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
|
||||
string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
|
||||
}
|
||||
|
||||
class getAsmVOP3OpSel <int NumSrcArgs,
|
||||
bit HasClamp,
|
||||
bit Src0HasMods,
|
||||
bit Src1HasMods,
|
||||
bit Src2HasMods> {
|
||||
string dst = " $vdst";
|
||||
|
||||
string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
|
||||
string isrc1 = !if(!eq(NumSrcArgs, 1), "",
|
||||
!if(!eq(NumSrcArgs, 2), " $src1",
|
||||
" $src1,"));
|
||||
string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
|
||||
|
||||
string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
||||
string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
|
||||
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
|
||||
" $src1_modifiers,"));
|
||||
string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
|
||||
|
||||
string src0 = !if(Src0HasMods, fsrc0, isrc0);
|
||||
string src1 = !if(Src1HasMods, fsrc1, isrc1);
|
||||
string src2 = !if(Src2HasMods, fsrc2, isrc2);
|
||||
|
||||
string clamp = !if(HasClamp, "$clamp", "");
|
||||
|
||||
string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
|
||||
}
|
||||
|
||||
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
|
||||
string dst = !if(HasDst,
|
||||
!if(!eq(DstVT.Size, 1),
|
||||
@ -1462,7 +1540,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
||||
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
|
||||
NumSrcArgs, HasClamp,
|
||||
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
|
||||
|
||||
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
|
||||
NumSrcArgs,
|
||||
HasClamp,
|
||||
getOpSelMod<Src0VT>.ret,
|
||||
getOpSelMod<Src1VT>.ret,
|
||||
getOpSelMod<Src2VT>.ret>.ret;
|
||||
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
|
||||
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
|
||||
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
|
||||
@ -1473,6 +1556,11 @@ class VOPProfile <list<ValueType> _ArgVT> {
|
||||
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
|
||||
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
|
||||
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
|
||||
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
|
||||
HasClamp,
|
||||
HasSrc0FloatMods,
|
||||
HasSrc1FloatMods,
|
||||
HasSrc2FloatMods>.ret;
|
||||
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
||||
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
|
||||
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
|
||||
@ -1495,6 +1583,8 @@ def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
|
||||
def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
|
||||
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
|
||||
|
||||
def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
|
||||
|
||||
def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
|
||||
def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
|
||||
def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
|
||||
|
@ -1288,12 +1288,32 @@ class FPMed3Pat<ValueType vt,
|
||||
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
class FP16Med3Pat<ValueType vt,
|
||||
Instruction med3Inst> : Pat<
|
||||
(fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
|
||||
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
|
||||
(fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
|
||||
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
|
||||
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
|
||||
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
|
||||
>;
|
||||
|
||||
class Int16Med3Pat<Instruction med3Inst,
|
||||
SDPatternOperator max,
|
||||
SDPatternOperator max_oneuse,
|
||||
SDPatternOperator min_oneuse,
|
||||
ValueType vt = i32> : Pat<
|
||||
(max (min_oneuse vt:$src0, vt:$src1),
|
||||
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
|
||||
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
|
||||
>;
|
||||
|
||||
def : FPMed3Pat<f32, V_MED3_F32>;
|
||||
|
||||
let Predicates = [isGFX9] in {
|
||||
def : FPMed3Pat<f16, V_MED3_F16>;
|
||||
def : IntMed3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
|
||||
def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
|
||||
def : FP16Med3Pat<f16, V_MED3_F16>;
|
||||
def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
|
||||
def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
|
||||
} // End Predicates = [isGFX9]
|
||||
|
||||
//============================================================================//
|
||||
|
@ -53,6 +53,46 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
|
||||
ret1));
|
||||
}
|
||||
|
||||
class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
|
||||
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst,
|
||||
(node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
|
||||
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
ret1));
|
||||
}
|
||||
|
||||
class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
|
||||
(VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
|
||||
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst,
|
||||
(node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
|
||||
(P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
|
||||
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
|
||||
|
||||
list<dag> ret1 = [(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
|
||||
|
||||
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
|
||||
!if(!eq(P.NumSrcArgs, 2), ret2,
|
||||
ret1));
|
||||
}
|
||||
|
||||
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
|
||||
list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
|
||||
@ -67,6 +107,16 @@ class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
|
||||
!if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret),
|
||||
VOP3Only>;
|
||||
|
||||
class VOP3OpSelInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
|
||||
VOP3_Pseudo<OpName, P,
|
||||
!if(isFloatType<P.Src0VT>.ret,
|
||||
getVOP3OpSelModPat<P, node>.ret,
|
||||
getVOP3OpSelPat<P, node>.ret),
|
||||
1, 0, 1> {
|
||||
|
||||
let AsmMatchConverter = "cvtVOP3OpSel";
|
||||
}
|
||||
|
||||
// Special case for v_div_fmas_{f32|f64}, since it seems to be the
|
||||
// only VOP instruction that implicitly reads VCC.
|
||||
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in {
|
||||
@ -93,6 +143,11 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
|
||||
let Asm64 = " " # P.Asm64;
|
||||
}
|
||||
|
||||
class VOP3OpSel_Profile<VOPProfile P> : VOP3_Profile<P> {
|
||||
let HasClamp = 1;
|
||||
let HasOpSel = 1;
|
||||
}
|
||||
|
||||
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
|
||||
// v_div_scale_{f32|f64} do not support input modifiers.
|
||||
let HasModifiers = 0;
|
||||
@ -303,7 +358,7 @@ defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
|
||||
} // End Predicates = [Has16BitInsts]
|
||||
|
||||
let SubtargetPredicate = isGFX9 in {
|
||||
def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
|
||||
def V_PACK_B32_F16 : VOP3OpSelInst <"v_pack_b32_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>;
|
||||
def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
@ -313,17 +368,26 @@ def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
|
||||
def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
|
||||
def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
|
||||
def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
|
||||
def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
|
||||
def V_MED3_F16 : VOP3OpSelInst <"v_med3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
|
||||
def V_MED3_I16 : VOP3OpSelInst <"v_med3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
|
||||
def V_MED3_U16 : VOP3OpSelInst <"v_med3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
|
||||
|
||||
def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>;
|
||||
def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>;
|
||||
def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>;
|
||||
def V_MIN3_F16 : VOP3OpSelInst <"v_min3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>;
|
||||
def V_MIN3_I16 : VOP3OpSelInst <"v_min3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>;
|
||||
def V_MIN3_U16 : VOP3OpSelInst <"v_min3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>;
|
||||
|
||||
def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>;
|
||||
def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>;
|
||||
def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>;
|
||||
def V_MAX3_F16 : VOP3OpSelInst <"v_max3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>;
|
||||
def V_MAX3_I16 : VOP3OpSelInst <"v_max3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>;
|
||||
def V_MAX3_U16 : VOP3OpSelInst <"v_max3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>;
|
||||
|
||||
def V_ADD_I16 : VOP3OpSelInst <"v_add_i16", VOP3OpSel_Profile<VOP_I16_I16_I16>>;
|
||||
def V_SUB_I16 : VOP3OpSelInst <"v_sub_i16", VOP3OpSel_Profile<VOP_I16_I16_I16>>;
|
||||
|
||||
def V_MAD_U32_U16 : VOP3OpSelInst <"v_mad_u32_u16", VOP3OpSel_Profile<VOP_I32_I16_I16_I32>>;
|
||||
def V_MAD_I32_I16 : VOP3OpSelInst <"v_mad_i32_i16", VOP3OpSel_Profile<VOP_I32_I16_I16_I32>>;
|
||||
|
||||
def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst <"v_cvt_pknorm_i16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>;
|
||||
def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>;
|
||||
} // End SubtargetPredicate = isGFX9
|
||||
|
||||
|
||||
@ -443,6 +507,11 @@ multiclass VOP3be_Real_vi<bits<10> op> {
|
||||
VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
|
||||
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
|
||||
|
||||
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
|
||||
@ -527,18 +596,27 @@ defm V_ADD3_U32 : VOP3_Real_vi <0x1ff>;
|
||||
defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>;
|
||||
defm V_AND_OR_B32 : VOP3_Real_vi <0x201>;
|
||||
defm V_OR3_B32 : VOP3_Real_vi <0x202>;
|
||||
defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
|
||||
defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx9 <0x2a0>;
|
||||
|
||||
defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
|
||||
|
||||
defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>;
|
||||
defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>;
|
||||
defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>;
|
||||
defm V_MIN3_F16 : VOP3OpSel_Real_gfx9 <0x1f4>;
|
||||
defm V_MIN3_I16 : VOP3OpSel_Real_gfx9 <0x1f5>;
|
||||
defm V_MIN3_U16 : VOP3OpSel_Real_gfx9 <0x1f6>;
|
||||
|
||||
defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>;
|
||||
defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>;
|
||||
defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>;
|
||||
defm V_MAX3_F16 : VOP3OpSel_Real_gfx9 <0x1f7>;
|
||||
defm V_MAX3_I16 : VOP3OpSel_Real_gfx9 <0x1f8>;
|
||||
defm V_MAX3_U16 : VOP3OpSel_Real_gfx9 <0x1f9>;
|
||||
|
||||
defm V_MED3_F16 : VOP3_Real_vi <0x1fa>;
|
||||
defm V_MED3_I16 : VOP3_Real_vi <0x1fb>;
|
||||
defm V_MED3_U16 : VOP3_Real_vi <0x1fc>;
|
||||
defm V_MED3_F16 : VOP3OpSel_Real_gfx9 <0x1fa>;
|
||||
defm V_MED3_I16 : VOP3OpSel_Real_gfx9 <0x1fb>;
|
||||
defm V_MED3_U16 : VOP3OpSel_Real_gfx9 <0x1fc>;
|
||||
|
||||
defm V_ADD_I16 : VOP3OpSel_Real_gfx9 <0x29e>;
|
||||
defm V_SUB_I16 : VOP3OpSel_Real_gfx9 <0x29f>;
|
||||
|
||||
defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx9 <0x1f1>;
|
||||
defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>;
|
||||
|
||||
defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
|
||||
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;
|
||||
|
@ -65,8 +65,13 @@ class VOP3Common <dag outs, dag ins, string asm = "",
|
||||
}
|
||||
|
||||
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
|
||||
bit VOP3Only = 0, bit isVOP3P = 0> :
|
||||
InstSI <P.Outs64, !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64), "", pattern>,
|
||||
bit VOP3Only = 0, bit isVOP3P = 0, bit isVop3OpSel = 0> :
|
||||
InstSI <P.Outs64,
|
||||
!if(isVop3OpSel,
|
||||
P.InsVOP3OpSel,
|
||||
!if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64)),
|
||||
"",
|
||||
pattern>,
|
||||
VOP <opName>,
|
||||
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
|
||||
MnemonicAlias<opName#"_e64", opName> {
|
||||
@ -74,9 +79,12 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
|
||||
let isPseudo = 1;
|
||||
let isCodeGenOnly = 1;
|
||||
let UseNamedOperandTable = 1;
|
||||
let VOP3_OPSEL = isVop3OpSel;
|
||||
|
||||
string Mnemonic = opName;
|
||||
string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64);
|
||||
string AsmOperands = !if(isVop3OpSel,
|
||||
P.AsmVOP3OpSel,
|
||||
!if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64));
|
||||
|
||||
let Size = 8;
|
||||
let mayLoad = 0;
|
||||
@ -146,11 +154,11 @@ class VOP3P_Real<VOP3P_Pseudo ps, int EncodingFamily> :
|
||||
VOP3_Real<ps, EncodingFamily>;
|
||||
|
||||
class VOP3a<VOPProfile P> : Enc64 {
|
||||
bits<2> src0_modifiers;
|
||||
bits<4> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<3> src1_modifiers;
|
||||
bits<9> src1;
|
||||
bits<2> src2_modifiers;
|
||||
bits<3> src2_modifiers;
|
||||
bits<9> src2;
|
||||
bits<1> clamp;
|
||||
bits<2> omod;
|
||||
@ -189,6 +197,13 @@ class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
|
||||
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
|
||||
}
|
||||
|
||||
class VOP3OpSel_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
|
||||
let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0);
|
||||
let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0);
|
||||
let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0);
|
||||
let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0);
|
||||
}
|
||||
|
||||
class VOP3be <VOPProfile P> : Enc64 {
|
||||
bits<8> vdst;
|
||||
bits<2> src0_modifiers;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -31,6 +31,15 @@ v_pack_b32_f16 v1, v2, v3
|
||||
// GFX9: v_pack_b32_f16 v1, v2, v3 ; encoding: [0x01,0x00,0xa0,0xd2,0x02,0x07,0x02,0x00]
|
||||
// NOVI: :1: error: instruction not supported on this GPU
|
||||
|
||||
v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0]
|
||||
// GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0xa0,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0]
|
||||
// GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] ; encoding: [0x05,0x10,0xa0,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1]
|
||||
// GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] ; encoding: [0x05,0x40,0xa0,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_xad_u32 v1, v2, v3, v4
|
||||
// GFX9: v_xad_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf3,0xd1,0x02,0x07,0x12,0x04]
|
||||
// NOVI: :1: error: instruction not supported on this GPU
|
||||
@ -51,10 +60,46 @@ v_max3_f16 v1, v2, v3, v4
|
||||
// GFX9: v_max3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf7,0xd1,0x02,0x07,0x12,0x04]
|
||||
// NOVI: :1: error: instruction not supported on this GPU
|
||||
|
||||
v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,0]
|
||||
// GFX9: v_max3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf7,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0]
|
||||
// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf7,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0]
|
||||
// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf7,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0]
|
||||
// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf7,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1]
|
||||
// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf7,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1]
|
||||
// GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_i16 v1, v2, v3, v4
|
||||
// GFX9: v_max3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf8,0xd1,0x02,0x07,0x12,0x04]
|
||||
// NOVI: :1: error: instruction not supported on this GPU
|
||||
|
||||
v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,0]
|
||||
// GFX9: v_max3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0]
|
||||
// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf8,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0]
|
||||
// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf8,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0]
|
||||
// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf8,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1]
|
||||
// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf8,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1]
|
||||
// GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf8,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_max3_u16 v1, v2, v3, v4
|
||||
// GFX9: v_max3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf9,0xd1,0x02,0x07,0x12,0x04]
|
||||
// NOVI: :1: error: instruction not supported on this GPU
|
||||
@ -70,3 +115,78 @@ v_med3_i16 v1, v2, v3, v4
|
||||
v_med3_u16 v1, v2, v3, v4
|
||||
// GFX9: v_med3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfc,0xd1,0x02,0x07,0x12,0x04]
|
||||
// NOVI: :1: error: instruction not supported on this GPU
|
||||
|
||||
v_mad_u32_u16 v5, v1, v2, v3
|
||||
// GFX9: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf1,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0]
|
||||
// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf1,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0]
|
||||
// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf1,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0]
|
||||
// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf1,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1]
|
||||
// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf1,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1]
|
||||
// GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf1,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_i32_i16 v5, v1, v2, v3
|
||||
// GFX9: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf2,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1]
|
||||
// GFX9: v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf2,0xd1,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, v1, v2
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, -v1, v2
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x20]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, v1, -v2
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x40]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, -v1, -v2
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x60]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, |v1|, v2
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, |v1|, v2 ; encoding: [0x05,0x01,0x99,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, v1, |v2|
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, v1, |v2| ; encoding: [0x05,0x02,0x99,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[0,0,0]
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0]
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x99,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1]
|
||||
// GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x99,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_cvt_pknorm_u16_f16 v5, -v1, -v2
|
||||
// GFX9: v_cvt_pknorm_u16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x9a,0xd2,0x01,0x05,0x02,0x60]
|
||||
|
||||
v_cvt_pknorm_u16_f16 v5, |v1|, |v2|
|
||||
// GFX9: v_cvt_pknorm_u16_f16 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x9a,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1]
|
||||
// GFX9: v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9a,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_add_i16 v5, v1, v2
|
||||
// GFX9: v_add_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9e,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_add_i16 v5, v1, v2 op_sel:[1,1,1]
|
||||
// GFX9: v_add_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9e,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_sub_i16 v5, v1, v2
|
||||
// GFX9: v_sub_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9f,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_sub_i16 v5, v1, v2 op_sel:[1,1,1]
|
||||
// GFX9: v_sub_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9f,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_sub_i16 v5, v1, v2 clamp
|
||||
// GFX9: v_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x9f,0xd2,0x01,0x05,0x02,0x00]
|
||||
|
@ -36,8 +36,8 @@ v_pk_add_u16 v1, v2, v3 op_sel:[-1,0]
|
||||
// GFX9: 35: error: failed parsing operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,-1]
|
||||
|
||||
// GFX9: 40: error: not a valid operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0]
|
||||
// GFX9: 42: error: not a valid operand.
|
||||
v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0,0]
|
||||
|
||||
// XXGFX9: invalid operand for instruction
|
||||
v_pk_add_u16 v1, v2, v3 neg_lo:[0,0]
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user