1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/store-global.ll
Tim Renouf c57b91da41 [AMDGPU] Support for v3i32/v3f32
Added support for dwordx3 for most load/store types, but not DS, and not
intrinsics yet.

SI (gfx6) does not have dwordx3 instructions, so they are not enabled
there.

Some of this patch is from Matt Arsenault, also of AMD.

Differential Revision: https://reviews.llvm.org/D58902

Change-Id: I913ef54f1433a7149da8d72f4af54dbb13436bd9
llvm-svn: 356659
2019-03-21 12:01:21 +00:00

407 lines
11 KiB
LLVM

; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}store_i1:
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM-NOT: MEM_RAT MSKOR
; SIVI: buffer_store_byte
; GFX9: global_store_byte
define amdgpu_kernel void @store_i1(i1 addrspace(1)* %out) {
entry:
store i1 true, i1 addrspace(1)* %out
ret void
}
; i8 store
; FUNC-LABEL: {{^}}store_i8:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; EG-NOT: MEM_RAT MSKOR
; EG: VTX_READ_8
; EG: AND_INT
; EG: AND_INT
; EG: LSHL
; EG: LSHL
; EG: LSHL
; SIVI: buffer_store_byte
; GFX9: global_store_byte
define amdgpu_kernel void @store_i8(i8 addrspace(1)* %out, i8 %in) {
entry:
store i8 %in, i8 addrspace(1)* %out
ret void
}
; i16 store
; FUNC-LABEL: {{^}}store_i16:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; EG-NOT: MEM_RAT MSKOR
; EG: VTX_READ_16
; EG: AND_INT
; EG: AND_INT
; EG: LSHL
; EG: LSHL
; EG: LSHL
; SIVI: buffer_store_short
; GFX9: global_store_short
define amdgpu_kernel void @store_i16(i16 addrspace(1)* %out, i16 %in) {
entry:
store i16 %in, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_i24:
; SIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
; SIVI-DAG: buffer_store_byte
; SIVI-DAG: buffer_store_short
; GFX9-DAG: global_store_byte_d16_hi v{{\[[0-9]:[0-9]+\]}}, v{{[0-9]+}}, off offset:2
; GFX9-DAG: global_store_short
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
define amdgpu_kernel void @store_i24(i24 addrspace(1)* %out, i24 %in) {
entry:
store i24 %in, i24 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_i25:
; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}}
; GCN: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]]
; SIVI: buffer_store_dword [[VAND]]
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VAND]]
; EG: MEM_RAT_CACHELESS STORE_RAW
; EG-NOT: MEM_RAT
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT
define amdgpu_kernel void @store_i25(i25 addrspace(1)* %out, i25 %in) {
entry:
store i25 %in, i25 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_v2i8:
; v2i8 is naturally 2B aligned
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM-NOT: MEM_RAT MSKOR
; SIVI: buffer_store_short
; GFX9: global_store_short
define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
store <2 x i8> %0, <2 x i8> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_v2i8_unaligned:
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM-NOT: MEM_RAT MSKOR
; SI: buffer_store_byte
define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
store <2 x i8> %0, <2 x i8> addrspace(1)* %out, align 1
ret void
}
; FUNC-LABEL: {{^}}store_v2i16:
; EG: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
store <2 x i16> %0, <2 x i16> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_v2i16_unaligned:
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM-NOT: MEM_RAT MSKOR
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_short
; SIVI: buffer_store_short
; GFX9: global_store_short
; GFX9: global_store_short
define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
store <2 x i16> %0, <2 x i16> addrspace(1)* %out, align 2
ret void
}
; FUNC-LABEL: {{^}}store_v4i8:
; EG: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_v4i8_unaligned:
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM-NOT: MEM_RAT MSKOR
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 1
ret void
}
; FUNC-LABEL: {{^}}store_v4i8_halfaligned:
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; CM-NOT: MEM_RAT MSKOR
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SI: buffer_store_short
; SI: buffer_store_short
; SI-NOT: buffer_store_dword
define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 2
ret void
}
; floating-point store
; FUNC-LABEL: {{^}}store_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
; SIVI: buffer_store_dword
; GFX9: global_store_dword
define amdgpu_kernel void @store_f32(float addrspace(1)* %out, float %in) {
store float %in, float addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_v4i16:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}
; SIVI: buffer_store_dwordx2
; GFX9: global_store_dwordx2
define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i16>
store <4 x i16> %0, <4 x i16> addrspace(1)* %out
ret void
}
; vec2 floating-point stores
; FUNC-LABEL: {{^}}store_v2f32:
; EG: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_dwordx2
; GFX9: global_store_dwordx2
define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
entry:
%0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
%1 = insertelement <2 x float> %0, float %b, i32 1
store <2 x float> %1, <2 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_v3i32:
; SI-DAG: buffer_store_dword v
; SI-DAG: buffer_store_dwordx2
; VI: buffer_store_dwordx3
; GFX9: global_store_dwordx3
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}},
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XY}}, {{T[0-9]+\.[XYZW]}},
define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16
ret void
}
; FUNC-LABEL: {{^}}store_v4i32:
; EG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XYZW}}
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_dwordx4
; GFX9: global_store_dwordx4
define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_v4i32_unaligned:
; EG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XYZW}}
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_dwordx4
; GFX9: global_store_dwordx4
define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
ret void
}
; v4f32 store
; FUNC-LABEL: {{^}}store_v4f32:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_dwordx4
; GFX9: global_store_dwordx4
define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%1 = load <4 x float>, <4 x float> addrspace(1) * %in
store <4 x float> %1, <4 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_i64_i8:
; EG: MEM_RAT MSKOR
; CM: MEM_RAT MSKOR
; SIVI: buffer_store_byte
; GFX9: global_store_byte
define amdgpu_kernel void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
store i8 %0, i8 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}store_i64_i16:
; EG: MEM_RAT MSKOR
; SIVI: buffer_store_short
; GFX9: global_store_short
define amdgpu_kernel void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
store i16 %0, i16 addrspace(1)* %out
ret void
}
; The stores in this function are combined by the optimizer to create a
; 64-bit store with 32-bit alignment. This is legal and the legalizer
; should not try to split the 64-bit store back into 2 32-bit stores.
; FUNC-LABEL: {{^}}vecload2:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XY, T[0-9]+\.X}}, 1
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SIVI: buffer_store_dwordx2
; GFX9: global_store_dwordx2
define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
entry:
%0 = load i32, i32 addrspace(4)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
%1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
store i32 %0, i32 addrspace(1)* %out, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
ret void
}
; When i128 was a legal type this program generated cannot select errors:
; FUNC-LABEL: {{^}}"i128-const-store":
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 1
; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X
; SIVI: buffer_store_dwordx4
; GFX9: global_store_dwordx4
define amdgpu_kernel void @i128-const-store(i32 addrspace(1)* %out) {
entry:
store i32 1, i32 addrspace(1)* %out, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 1, i32 addrspace(1)* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
store i32 2, i32 addrspace(1)* %arrayidx4, align 4
%arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
store i32 2, i32 addrspace(1)* %arrayidx6, align 4
ret void
}
attributes #0 = { nounwind }