1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
Nicolai Haehnle e4544eb06f AMDGPU: use ComplexPattern for offsets in llvm.amdgcn.buffer.load/store.format
Summary:
We cannot easily deduce that an offset is in an SGPR, but the Mesa frontend
cannot easily make use of an explicit soffset parameter either. Furthermore,
it is likely that in the future, LLVM will be in a better position than the
frontend to choose an SGPR offset if possible.

Since there aren't any frontend uses of these intrinsics in upstream
repositories yet, I would like to take this opportunity to change the
intrinsic signatures to a single offset parameter, which is then selected
to immediate offsets or voffsets using a ComplexPattern.

Reviewers: arsenm, tstellarAMD, mareko

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18218

llvm-svn: 263790
2016-03-18 16:24:20 +00:00

79 lines
3.4 KiB
LLVM

;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}buffer_store:
;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0
;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc
;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc
define void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) #0 {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_immoffs:
;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42
define void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) #0 {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_idx:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
define void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) #0 {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_ofs:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 offen
define void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) #0 {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_both:
;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
define void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_both_reversed:
;CHECK: v_mov_b32_e32 v6, v4
;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen
define void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i1 0, i1 0)
ret void
}
; Ideally, the register allocator would avoid the wait here
;
;CHECK-LABEL: {{^}}buffer_store_wait:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
;CHECK: s_waitcnt vmcnt(0) expcnt(0)
;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen
define void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) #0 {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i1 0, i1 0)
ret void
}
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #2
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }