1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[AMDGPU] Add MC layer support for v_fmac_legacy_f32

This instruction was introduced in GFX10.3, reusing the opcode of
v_mac_legacy_f32 from GFX10.1.

Differential Revision: https://reviews.llvm.org/D89247
This commit is contained in:
Jay Foad 2020-10-09 16:37:01 +01:00
parent 9f80ca3aba
commit bfbf8b669b
7 changed files with 70 additions and 18 deletions

View File

@ -6894,6 +6894,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
Opc == AMDGPU::V_MAC_F16_e64_vi ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
auto it = Inst.begin();
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));

View File

@ -390,6 +390,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),

View File

@ -672,14 +672,23 @@ let SubtargetPredicate = HasDLInsts in {
defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>;
let Constraints = "$vdst = $src2",
DisableEncoding="$src2",
DisableEncoding = "$src2",
isConvertibleToThreeAddress = 1,
isCommutable = 1 in {
isCommutable = 1 in
defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
}
} // End SubtargetPredicate = HasDLInsts
let SubtargetPredicate = HasNoMadMacF32Insts in {
let Constraints = "$vdst = $src2",
DisableEncoding = "$src2",
isConvertibleToThreeAddress = 1,
isCommutable = 1 in
defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
} // End SubtargetPredicate = HasNoMadMacF32Insts
let Constraints = "$vdst = $src2",
DisableEncoding="$src2",
isConvertibleToThreeAddress = 1,
@ -1147,21 +1156,25 @@ multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
// NB: Same opcode as v_mac_legacy_f32
let DecoderNamespace = "GFX10_B" in
defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
// VOP2 no carry-in, carry-out.
defm V_ADD_NC_U32 :

View File

@ -22,6 +22,9 @@ s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES)
v_fma_legacy_f32 v0, v1, v2, v3
// GFX10: error: instruction not supported on this GPU
v_fmac_legacy_f32 v0, v1, v2
// GFX10: error: instruction not supported on this GPU
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
// GFX10: error: instruction not supported on this GPU

View File

@ -61,6 +61,15 @@ v_fma_legacy_f32 v0, v1, |v2|, -v3
v_fma_legacy_f32 v0, s1, 2.0, -v3
// GFX10: encoding: [0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84]
v_fmac_legacy_f32 v0, v1, v2
// GFX10: encoding: [0x01,0x05,0x00,0x0c]
v_fmac_legacy_f32 v0, |v1|, -v2
// GFX10: encoding: [0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40]
v_fmac_legacy_f32 v0, s1, 2.0
// GFX10: encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00]
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]

View File

@ -0,0 +1,16 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
//===----------------------------------------------------------------------===//
// Unsupported dpp variants.
//===----------------------------------------------------------------------===//
v_fmac_legacy_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: dpp variant of this instruction is not supported
//===----------------------------------------------------------------------===//
// Unsupported sdwa variants.
//===----------------------------------------------------------------------===//
v_fmac_legacy_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported

View File

@ -52,6 +52,15 @@
# GFX10: v_fma_legacy_f32 v0, s1, 2.0, -v3
0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84
# GFX10: v_fmac_legacy_f32_e32 v0, v1, v2
0x01,0x05,0x00,0x0c
# GFX10: v_fmac_legacy_f32_e64 v0, |v1|, -v2
0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40
# GFX10: v_fmac_legacy_f32_e64 v0, s1, 2.0
0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00
# GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00