[AMDGPU] Add MC layer support for v_fmac_legacy_f32

This instruction was introduced in GFX10.3, reusing the opcode of v_mac_legacy_f32 from GFX10.1. Differential Revision: https://reviews.llvm.org/D89247
2024-11-23 03:02:36 +01:00 · 2020-10-09 16:37:01 +01:00 · 2020-10-09 16:37:01 +01:00 · bfbf8b669b
commit bfbf8b669b
parent 9f80ca3aba
7 changed files with 70 additions and 18 deletions
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@ -6894,6 +6894,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
      Opc == AMDGPU::V_MAC_F16_e64_vi ||
      Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
      Opc == AMDGPU::V_FMAC_F32_e64_vi ||
+      Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
      Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
    auto it = Inst.begin();
    std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@ -390,6 +390,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
              MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+              MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
              MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
    // Insert dummy unused src2_modifiers.
    insertNamedMCOperand(MI, MCOperand::createImm(0),
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@ -672,14 +672,23 @@ let SubtargetPredicate = HasDLInsts in {
 defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>;

 let Constraints = "$vdst = $src2",
-    DisableEncoding="$src2",
+    DisableEncoding = "$src2",
    isConvertibleToThreeAddress = 1,
-    isCommutable = 1 in {
+    isCommutable = 1 in
 defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
-}

 } // End SubtargetPredicate = HasDLInsts

+let SubtargetPredicate = HasNoMadMacF32Insts in {
+
+let Constraints = "$vdst = $src2",
+    DisableEncoding = "$src2",
+    isConvertibleToThreeAddress = 1,
+    isCommutable = 1 in
+defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
+
+} // End SubtargetPredicate = HasNoMadMacF32Insts
+
 let Constraints = "$vdst = $src2",
      DisableEncoding="$src2",
      isConvertibleToThreeAddress = 1,
@ -1147,6 +1156,10 @@ multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
  VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
  VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;

+// NB: Same opcode as v_mac_legacy_f32
+let DecoderNamespace = "GFX10_B" in
+defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
+
 defm V_XNOR_B32        : VOP2_Real_gfx10<0x01e>;
 defm V_FMAC_F32        : VOP2_Real_gfx10<0x02b>;
 defm V_FMAMK_F32       : VOP2Only_Real_MADK_gfx10<0x02c>;
--- a/test/MC/AMDGPU/gfx1011_err.s
+++ b/test/MC/AMDGPU/gfx1011_err.s
@ -22,6 +22,9 @@ s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES)
 v_fma_legacy_f32 v0, v1, v2, v3
 // GFX10: error: instruction not supported on this GPU

+v_fmac_legacy_f32 v0, v1, v2
+// GFX10: error: instruction not supported on this GPU
+
 image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
 // GFX10: error: instruction not supported on this GPU

--- a/test/MC/AMDGPU/gfx1030_new.s
+++ b/test/MC/AMDGPU/gfx1030_new.s
@ -61,6 +61,15 @@ v_fma_legacy_f32 v0, v1, |v2|, -v3
 v_fma_legacy_f32 v0, s1, 2.0, -v3
 // GFX10: encoding: [0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84]

+v_fmac_legacy_f32 v0, v1, v2
+// GFX10: encoding: [0x01,0x05,0x00,0x0c]
+
+v_fmac_legacy_f32 v0, |v1|, -v2
+// GFX10: encoding: [0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40]
+
+v_fmac_legacy_f32 v0, s1, 2.0
+// GFX10: encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00]
+
 image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
 // GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]

--- a/test/MC/AMDGPU/gfx1030_unsupported.s
+++ b/test/MC/AMDGPU/gfx1030_unsupported.s
@ -0,0 +1,16 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
+
+//===----------------------------------------------------------------------===//
+// Unsupported dpp variants.
+//===----------------------------------------------------------------------===//
+
+v_fmac_legacy_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: dpp variant of this instruction is not supported
+
+//===----------------------------------------------------------------------===//
+// Unsupported sdwa variants.
+//===----------------------------------------------------------------------===//
+
+v_fmac_legacy_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
--- a/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt
+++ b/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt
@ -52,6 +52,15 @@
 # GFX10: v_fma_legacy_f32 v0, s1, 2.0, -v3
 0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84

+# GFX10: v_fmac_legacy_f32_e32 v0, v1, v2
+0x01,0x05,0x00,0x0c
+
+# GFX10: v_fmac_legacy_f32_e64 v0, |v1|, -v2
+0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40
+
+# GFX10: v_fmac_legacy_f32_e64 v0, s1, 2.0
+0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00
+
 # GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
 0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00