diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index f69da2dcdf0..a57966b70df 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -530,11 +530,11 @@ def int_amdgcn_sad_u16 : def int_amdgcn_qsad_pk_u16_u8 : GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; def int_amdgcn_mqsad_pk_u16_u8 : GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; def int_amdgcn_mqsad_u32_u8 : GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td index 3b45c31e7be..b38b3d422b1 100644 --- a/lib/Target/AMDGPU/CIInstructions.td +++ b/lib/Target/AMDGPU/CIInstructions.td @@ -55,7 +55,7 @@ defm V_MQSAD_U16_U8 : VOP3Inst , "v_mqsad_u16_u8", >; defm V_QSAD_PK_U16_U8 : VOP3Inst , "v_qsad_pk_u16_u8", - VOP_I32_I32_I32_I32, int_amdgcn_qsad_pk_u16_u8>; + VOP_I64_I64_I32_I64, int_amdgcn_qsad_pk_u16_u8>; defm V_MQSAD_U32_U8 : VOP3Inst , "v_mqsad_u32_u8", VOP_I32_I32_I32_I32, int_amdgcn_mqsad_u32_u8>; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 298f953b799..805a3284e39 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1606,6 +1606,7 @@ def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; +def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; // This class is used only with VOPC instructions. Use $sdst for out operand class SIInstAlias : diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 15d161fe9c7..a30ce2f5e76 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1732,7 +1732,7 @@ defm V_MSAD_U8 : VOP3Inst , "v_msad_u8", VOP_I32_I32_I32_I32, int_amdgcn_msad_u8>; defm V_MQSAD_PK_U16_U8 : VOP3Inst , "v_mqsad_pk_u16_u8", - VOP_I32_I32_I32_I32, int_amdgcn_mqsad_pk_u16_u8>; + VOP_I64_I64_I32_I64, int_amdgcn_mqsad_pk_u16_u8>; //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>; diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll index fb13b19fd63..7c2495e096e 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -1,21 +1,21 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32, i32, i32) #0 +declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8: -; GCN: v_mqsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define void @v_mqsad_pk_u16_u8(i32 addrspace(1)* %out, i32 %src) { - %result= call i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 +; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { + %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 + store i64 %result, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate: -; GCN: v_mqsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define void @v_mqsad_pk_u16_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) { - %result= call i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32 %src, i32 %a, i32 %b) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 +; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { + %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 + store i64 %result, i64 addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll index 801b59461b6..ece4224f6e6 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -1,21 +1,21 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare i32 @llvm.amdgcn.qsad.pk.u16.u8(i32, i32, i32) #0 +declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8: -; GCN: v_qsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define void @v_qsad_pk_u16_u8(i32 addrspace(1)* %out, i32 %src) { - %result= call i32 @llvm.amdgcn.qsad.pk.u16.u8(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 +; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { + %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 + store i64 %result, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate: -; GCN: v_qsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define void @v_qsad_pk_u16_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) { - %result= call i32 @llvm.amdgcn.qsad.pk.u16.u8(i32 %src, i32 %a, i32 %b) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 +; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { + %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 + store i64 %result, i64 addrspace(1)* %out, align 4 ret void }