mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AMDGPU] Add MC layer support for v_fmac_legacy_f32
This instruction was introduced in GFX10.3, reusing the opcode of v_mac_legacy_f32 from GFX10.1. Differential Revision: https://reviews.llvm.org/D89247
This commit is contained in:
parent
9f80ca3aba
commit
bfbf8b669b
@ -6894,6 +6894,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
|
||||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
|
||||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
|
||||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
|
||||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
|
||||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
|
||||
auto it = Inst.begin();
|
||||
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
|
||||
|
@ -390,6 +390,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
||||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
|
||||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
|
||||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
|
||||
MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
|
||||
MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
|
||||
// Insert dummy unused src2_modifiers.
|
||||
insertNamedMCOperand(MI, MCOperand::createImm(0),
|
||||
|
@ -672,14 +672,23 @@ let SubtargetPredicate = HasDLInsts in {
|
||||
defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>;
|
||||
|
||||
let Constraints = "$vdst = $src2",
|
||||
DisableEncoding="$src2",
|
||||
DisableEncoding = "$src2",
|
||||
isConvertibleToThreeAddress = 1,
|
||||
isCommutable = 1 in {
|
||||
isCommutable = 1 in
|
||||
defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
|
||||
}
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
|
||||
let SubtargetPredicate = HasNoMadMacF32Insts in {
|
||||
|
||||
let Constraints = "$vdst = $src2",
|
||||
DisableEncoding = "$src2",
|
||||
isConvertibleToThreeAddress = 1,
|
||||
isCommutable = 1 in
|
||||
defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
|
||||
|
||||
} // End SubtargetPredicate = HasNoMadMacF32Insts
|
||||
|
||||
let Constraints = "$vdst = $src2",
|
||||
DisableEncoding="$src2",
|
||||
isConvertibleToThreeAddress = 1,
|
||||
@ -1147,21 +1156,25 @@ multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
|
||||
VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
|
||||
VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
|
||||
|
||||
defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
|
||||
defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
|
||||
defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
|
||||
defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
|
||||
defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
|
||||
defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
|
||||
defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
|
||||
defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
|
||||
defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
|
||||
defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
|
||||
defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
|
||||
defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
|
||||
defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
|
||||
defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
|
||||
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
|
||||
// NB: Same opcode as v_mac_legacy_f32
|
||||
let DecoderNamespace = "GFX10_B" in
|
||||
defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
|
||||
|
||||
defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
|
||||
defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
|
||||
defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
|
||||
defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
|
||||
defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
|
||||
defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
|
||||
defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
|
||||
defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
|
||||
defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
|
||||
defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
|
||||
defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
|
||||
defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
|
||||
defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
|
||||
defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
|
||||
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
|
||||
|
||||
// VOP2 no carry-in, carry-out.
|
||||
defm V_ADD_NC_U32 :
|
||||
|
@ -22,6 +22,9 @@ s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES)
|
||||
v_fma_legacy_f32 v0, v1, v2, v3
|
||||
// GFX10: error: instruction not supported on this GPU
|
||||
|
||||
v_fmac_legacy_f32 v0, v1, v2
|
||||
// GFX10: error: instruction not supported on this GPU
|
||||
|
||||
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
// GFX10: error: instruction not supported on this GPU
|
||||
|
||||
|
@ -61,6 +61,15 @@ v_fma_legacy_f32 v0, v1, |v2|, -v3
|
||||
v_fma_legacy_f32 v0, s1, 2.0, -v3
|
||||
// GFX10: encoding: [0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84]
|
||||
|
||||
v_fmac_legacy_f32 v0, v1, v2
|
||||
// GFX10: encoding: [0x01,0x05,0x00,0x0c]
|
||||
|
||||
v_fmac_legacy_f32 v0, |v1|, -v2
|
||||
// GFX10: encoding: [0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40]
|
||||
|
||||
v_fmac_legacy_f32 v0, s1, 2.0
|
||||
// GFX10: encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00]
|
||||
|
||||
image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]
|
||||
|
||||
|
16
test/MC/AMDGPU/gfx1030_unsupported.s
Normal file
16
test/MC/AMDGPU/gfx1030_unsupported.s
Normal file
@ -0,0 +1,16 @@
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Unsupported dpp variants.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
v_fmac_legacy_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: dpp variant of this instruction is not supported
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Unsupported sdwa variants.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
v_fmac_legacy_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
|
@ -52,6 +52,15 @@
|
||||
# GFX10: v_fma_legacy_f32 v0, s1, 2.0, -v3
|
||||
0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84
|
||||
|
||||
# GFX10: v_fmac_legacy_f32_e32 v0, v1, v2
|
||||
0x01,0x05,0x00,0x0c
|
||||
|
||||
# GFX10: v_fmac_legacy_f32_e64 v0, |v1|, -v2
|
||||
0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40
|
||||
|
||||
# GFX10: v_fmac_legacy_f32_e64 v0, s1, 2.0
|
||||
0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00
|
||||
|
||||
# GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7]
|
||||
0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user