mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 19:12:56 +02:00
[AMDGPU][MC] Corrected several VI opcodes to avoid printing _e64
See bug 32936: https://bugs.llvm.org//show_bug.cgi?id=32936 Reviewers: artem.tamazov, vpykhtin Differential Revision: https://reviews.llvm.org/D33123 llvm-svn: 303070
This commit is contained in:
parent
61ca3be831
commit
5a5f736ba9
@ -657,6 +657,17 @@ multiclass VOP2_Real_e64_vi <bits<10> op> {
|
||||
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_e64only_vi <bits<10> op> {
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
|
||||
// Hack to stop printing _e64
|
||||
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME#"_e64");
|
||||
let OutOperandList = (outs VGPR_32:$vdst);
|
||||
let AsmString = ps.Mnemonic # " " # ps.AsmOperands;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass Base_VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> {
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
@ -724,17 +735,17 @@ defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>;
|
||||
defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
|
||||
defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
|
||||
|
||||
defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>;
|
||||
defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>;
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>;
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>;
|
||||
defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>;
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>;
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>;
|
||||
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
|
||||
defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>;
|
||||
defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>;
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>;
|
||||
defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>;
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>;
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>;
|
||||
|
||||
defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>;
|
||||
defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>;
|
||||
|
@ -25,7 +25,7 @@ define amdgpu_kernel void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fold_mi_v_or_0:
|
||||
; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
|
||||
; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
|
||||
; GCN-NOT: [[RESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
|
||||
@ -50,7 +50,7 @@ define amdgpu_kernel void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fold_mi_v_xor_0:
|
||||
; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
|
||||
; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]]
|
||||
; GCN-NOT: [[RESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
|
||||
@ -86,8 +86,8 @@ define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fold_mi_v_not_0:
|
||||
; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
|
||||
; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
|
||||
; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
|
||||
; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
|
||||
; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
@ -104,8 +104,8 @@ define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
|
||||
; GCN: buffer_load_dwordx2
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}}
|
||||
|
||||
; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
|
||||
; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
|
||||
; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
|
||||
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
|
||||
|
@ -25,7 +25,7 @@ define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val)
|
||||
; XXX - Why 0 in register?
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
@ -40,9 +40,9 @@ define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrs
|
||||
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
|
||||
; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
|
||||
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
@ -61,7 +61,7 @@ define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out,
|
||||
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
|
||||
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
|
||||
; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
|
||||
@ -73,8 +73,8 @@ define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
@ -87,10 +87,10 @@ define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
@ -105,14 +105,14 @@ define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
@ -131,22 +131,22 @@ define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}}
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
@ -174,7 +174,7 @@ define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out,
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
@ -189,7 +189,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noa
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
@ -206,7 +206,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)*
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
|
||||
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
|
||||
; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
@ -220,7 +220,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %ou
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
@ -236,7 +236,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
@ -253,7 +253,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %ou
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}}
|
||||
; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
|
@ -26,9 +26,9 @@ define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val)
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i64:
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
||||
@ -41,9 +41,9 @@ define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrs
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i64_user:
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
@ -171,11 +171,11 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val)
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i128:
|
||||
; GCN: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
|
||||
; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0
|
||||
; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]]
|
||||
; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0
|
||||
; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]]
|
||||
|
||||
; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0
|
||||
; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]]
|
||||
; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0
|
||||
; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]]
|
||||
|
||||
; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]]
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
|
||||
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
|
||||
; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
|
||||
%result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
|
||||
store <2 x half> %result, <2 x half> addrspace(1)* %out
|
||||
@ -16,7 +16,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out,
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pkrtz_samereg_v2f16_f32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[X]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
|
||||
define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 {
|
||||
%result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x)
|
||||
store <2 x half> %result, <2 x half> addrspace(1)* %out
|
||||
@ -39,7 +39,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]]
|
||||
; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -55,7 +55,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out,
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], 1.0
|
||||
; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
|
||||
define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -70,7 +70,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)
|
||||
; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
|
||||
; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]]
|
||||
; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, 1.0, [[A]]
|
||||
define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -85,7 +85,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)
|
||||
; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], [[B]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -103,7 +103,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)
|
||||
; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], -[[B]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -121,7 +121,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)
|
||||
; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], -[[B]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -140,7 +140,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace
|
||||
; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -|[[A]]|, -[[B]]
|
||||
; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -2,9 +2,9 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}mbcnt_intrinsics:
|
||||
; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
|
||||
; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[LO:v[0-9]+]], -1, 0
|
||||
; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
|
||||
; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
|
||||
; VI: v_mbcnt_hi_u32_b32 {{v[0-9]+}}, -1, [[LO]]
|
||||
define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) {
|
||||
main_body:
|
||||
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
|
@ -243,7 +243,7 @@ v_or_b32_e32 v1, v2, v3
|
||||
v_xor_b32_e32 v1, v2, v3
|
||||
|
||||
// SICI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_bfm_b32_e64 v1, v2, v3
|
||||
|
||||
// SICI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
|
||||
@ -259,15 +259,15 @@ v_madmk_f32 v1, v2, 64.0, v3
|
||||
v_madak_f32 v1, v2, v3, 64.0
|
||||
|
||||
// SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_bcnt_u32_b32_e64 v1, v2, v3
|
||||
|
||||
// SICI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x46,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_mbcnt_lo_u32_b32_e64 v1, v2, v3
|
||||
|
||||
// SICI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_mbcnt_hi_u32_b32_e64 v1, v2, v3
|
||||
|
||||
// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
|
||||
@ -376,31 +376,31 @@ v_subbrev_u32 v1, vcc, v2, v3, vcc
|
||||
v_subbrev_u32 v1, s[0:1], v2, v3, vcc
|
||||
|
||||
// SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
|
||||
// VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_ldexp_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_ldexp_f32 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
|
||||
// VI: v_cvt_pkaccum_u8_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pkaccum_u8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pkaccum_u8_f32 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
|
||||
// VI: v_cvt_pknorm_i16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pknorm_i16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pknorm_i16_f32 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
|
||||
// VI: v_cvt_pknorm_u16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pknorm_u16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pknorm_u16_f32 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
|
||||
// VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pkrtz_f16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pkrtz_f16_f32 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x60,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_u16_u32_e64 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x62,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_i16_i32_e64 v1, v2, v3
|
||||
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
|
@ -288,31 +288,31 @@ v_or_b32 v1, v2, v3
|
||||
v_xor_b32 v1, v2, v3
|
||||
|
||||
// SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
|
||||
// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_bfm_b32 v1, v2, v3
|
||||
|
||||
// SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
|
||||
// VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_bcnt_u32_b32 v1, v2, v3
|
||||
|
||||
// SICI: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
|
||||
// VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_mbcnt_lo_u32_b32 v1, v2, v3
|
||||
|
||||
// SICI: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
|
||||
// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_mbcnt_hi_u32_b32 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
|
||||
// VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_u16_u32 v1, v2, v3
|
||||
|
||||
// SICI: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
|
||||
// VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_cvt_pk_i16_i32 v1, v2, v3
|
||||
|
||||
// SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
|
||||
// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
v_bfm_b32 v1, v2, v3
|
||||
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
|
@ -72,7 +72,7 @@
|
||||
# VI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
|
||||
0x02 0x07 0x02 0x2a
|
||||
|
||||
# VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x93 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
|
||||
@ -84,13 +84,13 @@
|
||||
# VI: v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
|
||||
0x02 0x07 0x02 0x30 0x00 0x00 0x80 0x42
|
||||
|
||||
# VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x8b 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x8c 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x8d 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
@ -171,25 +171,25 @@
|
||||
# VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01]
|
||||
0x01 0x00 0x1e 0xd1 0x02 0x07 0xaa 0x01
|
||||
|
||||
# VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_ldexp_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x88 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_cvt_pkaccum_u8_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_cvt_pkaccum_u8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0xf0 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_cvt_pknorm_i16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_cvt_pknorm_i16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x94 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_cvt_pknorm_u16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_cvt_pknorm_u16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x95 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_cvt_pkrtz_f16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x96 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x97 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
|
||||
# VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x98 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_add_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
|
||||
|
Loading…
Reference in New Issue
Block a user