1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[AMDGPU] CodeGen: check dst operand type to determine if omod is supported for VOP3 instructions

Summary:
Previously, CodeGen checked first src operand type to determine if omod is supported by instruction. This isn't correct for some instructions: e.g. V_CMP_EQ_F32 has floating-point src operands but desn't support omod.
Changed .td files to check if dst operand instead of src operand.

Reviewers: arsenm, vpykhtin

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye

Differential Revision: https://reviews.llvm.org/D35350

llvm-svn: 308179
This commit is contained in:
Sam Kolton 2017-07-17 14:23:38 +00:00
parent 0c7889542c
commit c4d6857d3f
11 changed files with 81 additions and 51 deletions

View File

@ -1436,7 +1436,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit IsPacked = isPackedType<Src0VT>.ret;
field bit HasOpSel = IsPacked;
field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
field bit HasSDWAOMod = isFloatType<DstVT>.ret;
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;

View File

@ -1060,7 +1060,7 @@ def : Pat <
class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : Pat <
(i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
(i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
(i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;

View File

@ -117,7 +117,10 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
(node (P.Src0VT
!if(P.HasOMod,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
}
@ -813,9 +816,11 @@ let SubtargetPredicate = isVI in {
// Aliases to simplify matching of floating-point instructions that
// are VOP2 on SI and VOP3 on VI.
class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias <
name#" $dst, $src0, $src1",
(inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
!if(inst.Pfl.HasOMod,
(inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0),
(inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0))
>, PredicateControl {
let UseInstAsmMatchConverter = 0;
let AsmVariantName = AMDGPUAsmVariants.VOP3;

View File

@ -12,17 +12,21 @@
//===----------------------------------------------------------------------===//
class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
dag src0 = !if(P.HasOMod,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
list<dag> ret3 = [(set P.DstVT:$vdst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
(node (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
(node (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))];
(node (P.Src0VT src0)))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,

View File

@ -148,6 +148,19 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
let SubtargetPredicate = AssemblerPredicate;
}
class getVOPCPat64 <PatLeaf cond, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set i1:$sdst,
(setcc (P.Src0VT
!if(P.HasOMod,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
}
multiclass VOPC_Pseudos <string opName,
VOPC_Profile P,
PatLeaf cond = COND_NULL,
@ -163,14 +176,7 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
def _e64 : VOP3_Pseudo<opName, P,
!if(P.HasModifiers,
[(set i1:$sdst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))])>,
def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
@ -634,7 +640,7 @@ class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
(inst $src0_modifiers, $src0, $src1_modifiers, $src1,
DSTCLAMP.NONE, DSTOMOD.NONE)
DSTCLAMP.NONE)
>;
def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;

View File

@ -136,6 +136,8 @@ class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
let TSFlags = ps.TSFlags;
let UseNamedOperandTable = ps.UseNamedOperandTable;
let Uses = ps.Uses;
VOPProfile Pfl = ps.Pfl;
}
// XXX - Is there any reason to distingusih this from regular VOP3

View File

@ -34,7 +34,7 @@ body: |
bb.0:
successors: %bb.2, %bb.1
%7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec
%7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, implicit %exec
%vcc = COPY killed %7
S_CBRANCH_VCCZ %bb.2, implicit killed %vcc

View File

@ -332,7 +332,7 @@ body: |
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %exec, implicit %exec
# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def %vcc, implicit %exec
# VI: %{{[0-9]+}} = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec
@ -345,20 +345,21 @@ body: |
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 0, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, implicit-def %exec, implicit %exec
name: vopc_instructions
@ -415,28 +416,28 @@ body: |
V_CMPX_EQ_I32_e32 123, killed %13, implicit-def %vcc, implicit-def %exec, implicit %exec
%14 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, 0, implicit %exec
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, implicit %exec
%15 = V_AND_B32_e64 %5, %3, implicit %exec
%18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, 0, implicit-def %exec, implicit %exec
%18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, implicit-def %exec, implicit %exec
%16 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMP_LT_I32_e64 %6, killed %16, implicit %exec
%17 = V_AND_B32_e64 %5, %3, implicit %exec
%19 = V_CMPX_EQ_I32_e64 23, killed %17, implicit-def %exec, implicit %exec
%20 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, 0, implicit %exec
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, implicit %exec
%21 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, 2, implicit-def %exec, implicit %exec
%vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, implicit-def %exec, implicit %exec
%23 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, 2, implicit %exec
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, implicit %exec
%24 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, 0, implicit-def %exec, implicit %exec
%vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, implicit-def %exec, implicit %exec
%25 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, 0, implicit-def %exec, implicit %exec
%vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, implicit-def %exec, implicit %exec
%26 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, 0, implicit-def %exec, implicit %exec
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, implicit-def %exec, implicit %exec
%27 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, 2, implicit-def %exec, implicit %exec
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, implicit-def %exec, implicit %exec
%100 = V_MOV_B32_e32 %vcc_lo, implicit %exec

View File

@ -8,7 +8,7 @@
# GCN: %{{[0-9]+}} = V_BCNT_U32_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
# GCN: %{{[0-9]+}} = V_BFM_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
# GCN: %{{[0-9]+}} = V_CVT_PKNORM_I16_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %vcc, implicit %exec
# GCN: %{{[0-9]+}} = V_CVT_PKNORM_I16_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec
# GCN: %{{[0-9]+}} = V_READLANE_B32 killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec
---
@ -50,7 +50,7 @@ body: |
%15 = V_BFM_B32_e64 %13, killed %14, implicit-def %vcc, implicit %exec
%16 = V_LSHRREV_B32_e64 16, %15, implicit %exec
%17 = V_CVT_PKNORM_I16_F32_e64 0, %15, 0, killed %16, 0, 0, implicit-def %vcc, implicit %exec
%17 = V_CVT_PKNORM_I16_F32_e64 0, %15, 0, killed %16, 0, implicit-def %vcc, implicit %exec
%18 = V_LSHRREV_B32_e64 16, %17, implicit %exec
%19 = V_READLANE_B32 killed %18, 0, implicit-def %vcc, implicit %exec

View File

@ -81,7 +81,7 @@ body: |
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%sgpr7 = S_MOV_B32 61440
%sgpr6 = S_MOV_B32 -1
%vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, 0, implicit %exec
%vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, implicit %exec
S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc
bb.2.if:

View File

@ -1,35 +1,47 @@
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN
// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN
// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN
v_add_f32_e64 v0, v1
// CHECK: error: too few operands for instruction
// GCN: error: too few operands for instruction
v_div_scale_f32 v24, vcc, v22, 1.1, v22
// CHECK: error: invalid operand for instruction
// GCN: error: invalid operand for instruction
v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3]
// CHECK: error: instruction not supported on this GPU
// GFX67: error: instruction not supported on this GPU
// GFX89: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[0:1], v[1:2], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[1:2], v[1:2], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[2:3], v[1:2], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[3:4], v[0:1], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[4:5], v[1:2], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[5:6], v[1:2], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[8:9], v[1:2], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_mqsad_pk_u16_u8 v[9:10], v[1:2], v9, v[4:5]
// CHECK: error: destination must be different than all sources
// GCN: error: destination must be different than all sources
v_cmp_eq_f32_e64 vcc, v0, v1 mul:2
// GCN: error: invalid operand for instruction
v_cmp_le_f64_e64 vcc, v0, v1 mul:4
// GCN: error: invalid operand for instruction
v_cvt_u32_f32_e64 v0, v1 div:2
// GCN: error: invalid operand for instruction