1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00
llvm-mirror/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
Tim Corringham 4837270633 [AMDGPU] Fix for vector element insertion
Summary:
Incorrect code was generated when lowering insertelement operations
for vectors with 8 or 16 bit elements.  The value being inserted was
not adjusted for the position of the element within the 32 bit word
and so only the low element within each 32 bit word could receive
the intended value.

Fixed by simply replicating the value to each element of a
congruent vector before the mask and or operation used to
update the intended element.

A number of affected LIT tests have been updated appropriately.

before the mask & or into the intended

Reviewers: arsenm, nhaehnle

Reviewed By: arsenm

Subscribers: llvm-commits, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D57588

llvm-svn: 352885
2019-02-01 16:51:09 +00:00

324 lines
12 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
; GCN-LABEL: {{^}}float4_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 3
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 2
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC2]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC3:[^,]+]], [[IDX]], 1
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC3]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC4:[^,]+]], [[IDX]], 0
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC4]]
; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]]
define amdgpu_kernel void @float4_inselt(<4 x float> addrspace(1)* %out, <4 x float> %vec, i32 %sel) {
entry:
%v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel
store <4 x float> %v, <4 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}float4_inselt_undef:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-NOT: v_cmp_
; GCN-NOT: v_cndmask_
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]]
; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]]
; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]]
define amdgpu_kernel void @float4_inselt_undef(<4 x float> addrspace(1)* %out, i32 %sel) {
entry:
%v = insertelement <4 x float> undef, float 1.000000e+00, i32 %sel
store <4 x float> %v, <4 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}int4_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 3
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1, v{{[0-9]+}}, [[CC1]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 2
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CC2]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC3:[^,]+]], [[IDX]], 1
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CC3]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC4:[^,]+]], [[IDX]], 0
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1, v{{[0-9]+}}, [[CC4]]
; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]]
define amdgpu_kernel void @int4_inselt(<4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %sel) {
entry:
%v = insertelement <4 x i32> %vec, i32 1, i32 %sel
store <4 x i32> %v, <4 x i32> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}float2_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 1
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 0
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC2]]
; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]]
define amdgpu_kernel void @float2_inselt(<2 x float> addrspace(1)* %out, <2 x float> %vec, i32 %sel) {
entry:
%v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel
store <2 x float> %v, <2 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}float8_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 3
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST0:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC1]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC2:[^,]+]], [[IDX]], 2
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC2]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC3:[^,]+]], [[IDX]], 1
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC3]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC4:[^,]+]], [[IDX]], 0
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST0:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC4]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC5:[^,]+]], [[IDX:s[0-9]+]], 7
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_LAST1:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC5]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC6:[^,]+]], [[IDX]], 6
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC6]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC7:[^,]+]], [[IDX]], 5
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, [[CC7]]
; GCN-DAG: v_cmp_ne_u32_e64 [[CC8:[^,]+]], [[IDX]], 4
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST1:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC8]]
; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST0]]:[[ELT_LAST0]]]
; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST1]]:[[ELT_LAST1]]]
define amdgpu_kernel void @float8_inselt(<8 x float> addrspace(1)* %out, <8 x float> %vec, i32 %sel) {
entry:
%v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel
store <8 x float> %v, <8 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}float16_inselt:
; GCN: v_movreld_b32
define amdgpu_kernel void @float16_inselt(<16 x float> addrspace(1)* %out, <16 x float> %vec, i32 %sel) {
entry:
%v = insertelement <16 x float> %vec, float 1.000000e+00, i32 %sel
store <16 x float> %v, <16 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}half4_inselt:
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4
; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]]
; GCN: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
; GCN: v_mov_b32_e32 [[V:v[0-9]+]], [[K]]
; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}}
; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}}
define amdgpu_kernel void @half4_inselt(<4 x half> addrspace(1)* %out, <4 x half> %vec, i32 %sel) {
entry:
%v = insertelement <4 x half> %vec, half 1.000000e+00, i32 %sel
store <4 x half> %v, <4 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}half2_inselt:
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4
; GCN: s_lshl_b32 [[V:s[0-9]+]], 0xffff, [[SEL]]
; GCN: v_bfi_b32 v{{[0-9]+}}, [[V]], v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @half2_inselt(<2 x half> addrspace(1)* %out, <2 x half> %vec, i32 %sel) {
entry:
%v = insertelement <2 x half> %vec, half 1.000000e+00, i32 %sel
store <2 x half> %v, <2 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}half8_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 0
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 1
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 2
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 3
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 4
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 5
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 6
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 7
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
define amdgpu_kernel void @half8_inselt(<8 x half> addrspace(1)* %out, <8 x half> %vec, i32 %sel) {
entry:
%v = insertelement <8 x half> %vec, half 1.000000e+00, i32 %sel
store <8 x half> %v, <8 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}short2_inselt:
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x10001
; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4
; GCN: s_lshl_b32 [[V:s[0-9]+]], 0xffff, [[SEL]]
; GCN: v_bfi_b32 v{{[0-9]+}}, [[V]], [[K]], v{{[0-9]+}}
define amdgpu_kernel void @short2_inselt(<2 x i16> addrspace(1)* %out, <2 x i16> %vec, i32 %sel) {
entry:
%v = insertelement <2 x i16> %vec, i16 1, i32 %sel
store <2 x i16> %v, <2 x i16> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}short4_inselt:
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 4
; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]]
; GCN: s_mov_b32 [[K:s[0-9]+]], 0x10001
; GCN: v_mov_b32_e32 [[V:v[0-9]+]], [[K]]
; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}}
; GCN: v_bfi_b32 v{{[0-9]+}}, s{{[0-9]+}}, [[V]], v{{[0-9]+}}
define amdgpu_kernel void @short4_inselt(<4 x i16> addrspace(1)* %out, <4 x i16> %vec, i32 %sel) {
entry:
%v = insertelement <4 x i16> %vec, i16 1, i32 %sel
store <4 x i16> %v, <4 x i16> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}byte8_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN: s_lshl_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 3
; GCN: s_lshl_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], [[SEL]]
; GCN: s_mov_b32 [[K:s[0-9]+]], 0x1010101
; GCN: s_and_b32 s3, s1, [[K]]
; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]]
; GCN: s_andn2_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
; GCN: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
define amdgpu_kernel void @byte8_inselt(<8 x i8> addrspace(1)* %out, <8 x i8> %vec, i32 %sel) {
entry:
%v = insertelement <8 x i8> %vec, i8 1, i32 %sel
store <8 x i8> %v, <8 x i8> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}byte16_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 0
; GCN-DAG: v_cmp_ne_u32_e64 {{[^,]+}}, {{s[0-9]+}}, 15
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
; GCN-DAG: v_or_b32_sdwa
define amdgpu_kernel void @byte16_inselt(<16 x i8> addrspace(1)* %out, <16 x i8> %vec, i32 %sel) {
entry:
%v = insertelement <16 x i8> %vec, i8 1, i32 %sel
store <16 x i8> %v, <16 x i8> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}double2_inselt:
; GCN-NOT: v_movrel
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_eq_u32_e64 [[CC1:[^,]+]], [[IDX:s[0-9]+]], 1
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]]
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC1]]
; GCN-DAG: v_cmp_eq_u32_e64 [[CC2:[^,]+]], [[IDX]], 0
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC2]]
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC2]]
define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) {
entry:
%v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel
store <2 x double> %v, <2 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}double8_inselt:
; GCN-NOT: v_cndmask
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
; GCN: buffer_load_dword
define amdgpu_kernel void @double8_inselt(<8 x double> addrspace(1)* %out, <8 x double> %vec, i32 %sel) {
entry:
%v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel
store <8 x double> %v, <8 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}bit4_inselt:
; GCN: buffer_store_byte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
define amdgpu_kernel void @bit4_inselt(<4 x i1> addrspace(1)* %out, <4 x i1> %vec, i32 %sel) {
entry:
%v = insertelement <4 x i1> %vec, i1 1, i32 %sel
store <4 x i1> %v, <4 x i1> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}bit128_inselt:
; GCN-NOT: buffer_
; GCN-DAG: v_cmp_ne_u32_e64 [[CC1:[^,]+]], s{{[0-9]+}}, 0
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CC1]]
; GCN-DAG: v_mov_b32_e32 [[LASTIDX:v[0-9]+]], 0x7f
; GCN-DAG: v_cmp_ne_u32_e32 [[CCL:[^,]+]], s{{[0-9]+}}, [[LASTIDX]]
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}, [[CCL]]
define amdgpu_kernel void @bit128_inselt(<128 x i1> addrspace(1)* %out, <128 x i1> %vec, i32 %sel) {
entry:
%v = insertelement <128 x i1> %vec, i1 1, i32 %sel
store <128 x i1> %v, <128 x i1> addrspace(1)* %out
ret void
}