mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
e4544eb06f
Summary: We cannot easily deduce that an offset is in an SGPR, but the Mesa frontend cannot easily make use of an explicit soffset parameter either. Furthermore, it is likely that in the future, LLVM will be in a better position than the frontend to choose an SGPR offset if possible. Since there aren't any frontend uses of these intrinsics in upstream repositories yet, I would like to take this opportunity to change the intrinsic signatures to a single offset parameter, which is then selected to immediate offsets or voffsets using a ComplexPattern. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18218 llvm-svn: 263790
79 lines
3.4 KiB
LLVM
79 lines
3.4 KiB
LLVM
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
|
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
|
|
|
;CHECK-LABEL: {{^}}buffer_store:
|
|
;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0
|
|
;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc
|
|
;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc
|
|
define void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) #0 {
|
|
main_body:
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
|
|
ret void
|
|
}
|
|
|
|
;CHECK-LABEL: {{^}}buffer_store_immoffs:
|
|
;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42
|
|
define void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) #0 {
|
|
main_body:
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
|
|
ret void
|
|
}
|
|
|
|
;CHECK-LABEL: {{^}}buffer_store_idx:
|
|
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
|
|
define void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) #0 {
|
|
main_body:
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
|
|
ret void
|
|
}
|
|
|
|
;CHECK-LABEL: {{^}}buffer_store_ofs:
|
|
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 offen
|
|
define void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) #0 {
|
|
main_body:
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i1 0, i1 0)
|
|
ret void
|
|
}
|
|
|
|
;CHECK-LABEL: {{^}}buffer_store_both:
|
|
;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
|
|
define void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
|
|
main_body:
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i1 0, i1 0)
|
|
ret void
|
|
}
|
|
|
|
;CHECK-LABEL: {{^}}buffer_store_both_reversed:
|
|
;CHECK: v_mov_b32_e32 v6, v4
|
|
;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen
|
|
define void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
|
|
main_body:
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i1 0, i1 0)
|
|
ret void
|
|
}
|
|
|
|
; Ideally, the register allocator would avoid the wait here
|
|
;
|
|
;CHECK-LABEL: {{^}}buffer_store_wait:
|
|
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
|
|
;CHECK: s_waitcnt vmcnt(0) expcnt(0)
|
|
;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
|
|
;CHECK: s_waitcnt vmcnt(0)
|
|
;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen
|
|
define void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) #0 {
|
|
main_body:
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
|
|
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
|
|
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i1 0, i1 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
|
|
declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #2
|
|
|
|
attributes #0 = { "ShaderType"="0" }
|
|
attributes #1 = { nounwind }
|
|
attributes #2 = { nounwind readonly }
|