mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU : Fix mqsad_u32_u8 instruction incorrect data type.
Differential Revision: http://reviews.llvm.org/D23700 llvm-svn: 281081
This commit is contained in:
parent
762a182e7c
commit
876390a884
@ -538,7 +538,7 @@ def int_amdgcn_mqsad_pk_u16_u8 :
|
|||||||
|
|
||||||
def int_amdgcn_mqsad_u32_u8 :
|
def int_amdgcn_mqsad_u32_u8 :
|
||||||
GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
|
GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
|
||||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_amdgcn_cvt_pk_u8_f32 :
|
def int_amdgcn_cvt_pk_u8_f32 :
|
||||||
GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
|
GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
|
||||||
|
@ -58,7 +58,7 @@ defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x172, 0x1e5>, "v_qsad_pk_u16_u8",
|
|||||||
VOP_I64_I64_I32_I64, int_amdgcn_qsad_pk_u16_u8>;
|
VOP_I64_I64_I32_I64, int_amdgcn_qsad_pk_u16_u8>;
|
||||||
|
|
||||||
defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x174, 0x1e7>, "v_mqsad_u32_u8",
|
defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x174, 0x1e7>, "v_mqsad_u32_u8",
|
||||||
VOP_I32_I32_I32_I32, int_amdgcn_mqsad_u32_u8>;
|
VOP_V4I32_I64_I32_V4I32, int_amdgcn_mqsad_u32_u8>;
|
||||||
|
|
||||||
let isCommutable = 1 in {
|
let isCommutable = 1 in {
|
||||||
defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
|
defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
|
||||||
|
@ -624,9 +624,10 @@ class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
|
|||||||
// instructions for the given VT.
|
// instructions for the given VT.
|
||||||
class getVALUDstForVT<ValueType VT> {
|
class getVALUDstForVT<ValueType VT> {
|
||||||
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
|
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
|
||||||
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
|
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
|
||||||
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
|
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
|
||||||
VOPDstOperand<SReg_64>))); // else VT == i1
|
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
|
||||||
|
VOPDstOperand<SReg_64>)))); // else VT == i1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the register class to use for source 0 of VOP[12C]
|
// Returns the register class to use for source 0 of VOP[12C]
|
||||||
@ -636,14 +637,15 @@ class getVOPSrc0ForVT<ValueType VT> {
|
|||||||
!if(!eq(VT.Value, f32.Value), 1,
|
!if(!eq(VT.Value, f32.Value), 1,
|
||||||
!if(!eq(VT.Value, f64.Value), 1,
|
!if(!eq(VT.Value, f64.Value), 1,
|
||||||
0)));
|
0)));
|
||||||
RegisterOperand ret = !if(isFP,
|
RegisterOperand ret = !if(isFP,
|
||||||
!if(!eq(VT.Size, 64), VSrc_f64, VSrc_f32),
|
!if(!eq(VT.Size, 64), VSrc_f64, VSrc_f32),
|
||||||
!if(!eq(VT.Size, 64), VSrc_b64, VSrc_b32));
|
!if(!eq(VT.Size, 64), VSrc_b64, VSrc_b32));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the vreg register class to use for source operand given VT
|
// Returns the vreg register class to use for source operand given VT
|
||||||
class getVregSrcForVT<ValueType VT> {
|
class getVregSrcForVT<ValueType VT> {
|
||||||
RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
|
RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
|
||||||
|
!if(!eq(VT.Size, 64), VReg_64, VGPR_32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -655,6 +657,8 @@ class getVOP3SrcForVT<ValueType VT> {
|
|||||||
!if(!eq(VT.Value, f64.Value), 1,
|
!if(!eq(VT.Value, f64.Value), 1,
|
||||||
0)));
|
0)));
|
||||||
RegisterOperand ret =
|
RegisterOperand ret =
|
||||||
|
!if(!eq(VT.Size, 128),
|
||||||
|
VSrc_128,
|
||||||
!if(!eq(VT.Size, 64),
|
!if(!eq(VT.Size, 64),
|
||||||
!if(isFP,
|
!if(isFP,
|
||||||
VCSrc_f64,
|
VCSrc_f64,
|
||||||
@ -665,7 +669,8 @@ class getVOP3SrcForVT<ValueType VT> {
|
|||||||
VCSrc_f32,
|
VCSrc_f32,
|
||||||
VCSrc_b32)
|
VCSrc_b32)
|
||||||
)
|
)
|
||||||
);
|
)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns 1 if the source arguments have modifiers, 0 if they do not.
|
// Returns 1 if the source arguments have modifiers, 0 if they do not.
|
||||||
@ -779,7 +784,7 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
|||||||
// VOP1 without input operands (V_NOP)
|
// VOP1 without input operands (V_NOP)
|
||||||
(ins),
|
(ins),
|
||||||
!if(!eq(NumSrcArgs, 1),
|
!if(!eq(NumSrcArgs, 1),
|
||||||
!if(HasFloatModifiers,
|
!if(HasFloatModifiers,
|
||||||
// VOP1_SDWA with float modifiers
|
// VOP1_SDWA with float modifiers
|
||||||
(ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
|
(ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
|
||||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||||
@ -800,7 +805,7 @@ class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
|
|||||||
Src1Mod:$src1_fmodifiers, Src1RC:$src1,
|
Src1Mod:$src1_fmodifiers, Src1RC:$src1,
|
||||||
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||||
src0_sel:$src0_sel, src1_sel:$src1_sel)),
|
src0_sel:$src0_sel, src1_sel:$src1_sel)),
|
||||||
|
|
||||||
!if(!eq(DstVT.Size, 1),
|
!if(!eq(DstVT.Size, 1),
|
||||||
// VOPC_SDWA with int modifiers
|
// VOPC_SDWA with int modifiers
|
||||||
(ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
|
(ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
|
||||||
@ -1163,6 +1168,7 @@ def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
|
|||||||
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
|
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
|
||||||
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
|
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
|
||||||
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
|
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
|
||||||
|
def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
|
||||||
|
|
||||||
// This class is used only with VOPC instructions. Use $sdst for out operand
|
// This class is used only with VOPC instructions. Use $sdst for out operand
|
||||||
class SIInstAlias <string asm, Instruction inst, VOPProfile p,
|
class SIInstAlias <string asm, Instruction inst, VOPProfile p,
|
||||||
|
@ -412,6 +412,8 @@ defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
|
|||||||
|
|
||||||
defm VSrc : RegImmOperand<"VS", "VSrc">;
|
defm VSrc : RegImmOperand<"VS", "VSrc">;
|
||||||
|
|
||||||
|
def VSrc_128 : RegisterOperand<VReg_128>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// VSrc_* Operands with an VGPR
|
// VSrc_* Operands with an VGPR
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -1,21 +1,47 @@
|
|||||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
declare i32 @llvm.amdgcn.mqsad.u32.u8(i32, i32, i32) #0
|
declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}v_mqsad_u32_u8:
|
; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant:
|
||||||
; GCN: v_mqsad_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||||
define void @v_mqsad_u32_u8(i32 addrspace(1)* %out, i32 %src) {
|
define void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) {
|
||||||
%result= call i32 @llvm.amdgcn.mqsad.u32.u8(i32 %src, i32 100, i32 100) #0
|
%result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> <i32 100, i32 100, i32 100, i32 100>) #0
|
||||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:
|
; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:
|
||||||
; GCN: v_mqsad_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||||
define void @v_mqsad_u32_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) {
|
define void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) {
|
||||||
%result= call i32 @llvm.amdgcn.mqsad.u32.u8(i32 %src, i32 %a, i32 %b) #0
|
%result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0
|
||||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
|
||||||
|
; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||||
|
define void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
|
||||||
|
%result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0
|
||||||
|
store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate:
|
||||||
|
; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||||
|
define void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
|
||||||
|
%result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0
|
||||||
|
store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr:
|
||||||
|
; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||||
|
define void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) {
|
||||||
|
%in = load <4 x i32>, <4 x i32> addrspace(1) * %input
|
||||||
|
|
||||||
|
%result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0
|
||||||
|
store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,3 +6,6 @@ v_add_f32_e64 v0, v1
|
|||||||
|
|
||||||
v_div_scale_f32 v24, vcc, v22, 1.1, v22
|
v_div_scale_f32 v24, vcc, v22, 1.1, v22
|
||||||
// CHECK: error: invalid operand for instruction
|
// CHECK: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3]
|
||||||
|
// CHECK: error: instruction not supported on this GPU
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
|
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
|
||||||
// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
|
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
|
||||||
|
|
||||||
|
// RUN: llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI
|
||||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
|
||||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
|
||||||
|
|
||||||
@ -357,3 +359,8 @@ v_div_scale_f32 v24, vcc, v22, v22, 0xc0000000
|
|||||||
v_mad_f32 v9, 0.5, v5, -v8
|
v_mad_f32 v9, 0.5, v5, -v8
|
||||||
// SICI: v_mad_f32 v9, 0.5, v5, -v8 ; encoding: [0x09,0x00,0x82,0xd2,0xf0,0x0a,0x22,0x84]
|
// SICI: v_mad_f32 v9, 0.5, v5, -v8 ; encoding: [0x09,0x00,0x82,0xd2,0xf0,0x0a,0x22,0x84]
|
||||||
// VI: v_mad_f32 v9, 0.5, v5, -v8 ; encoding: [0x09,0x00,0xc1,0xd1,0xf0,0x0a,0x22,0x84]
|
// VI: v_mad_f32 v9, 0.5, v5, -v8 ; encoding: [0x09,0x00,0xc1,0xd1,0xf0,0x0a,0x22,0x84]
|
||||||
|
|
||||||
|
v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3]
|
||||||
|
// CI: v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3] ; encoding: [0x00,0x00,0xe8,0xd2,0x02,0x08,0x02,0x04]
|
||||||
|
// VI: v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3] ; encoding: [0x00,0x00,0xe7,0xd1,0x02,0x08,0x02,0x04]
|
||||||
|
// NOSI: error: instruction not supported on this GPU
|
||||||
|
Loading…
Reference in New Issue
Block a user