mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-21 20:12:56 +02:00
30a743add7
Summary: They correspond to BUFFER_LOAD/STORE_DWORD[_X2,X3,X4] and mostly behave like llvm.amdgcn.buffer.load/store.format. They will be used by Mesa for SSBO and atomic counters at least when robust buffer access behavior is desired. (These instructions perform no format conversion and do buffer range checking per component.) As a side effect of sharing patterns with llvm.amdgcn.buffer.store.format, it has become trivial to add support for the f32 and v2f32 variants of that intrinsic, so the patch does so. Also DAG-ify (and fix) some tests that I noticed intermittent failures in while developing this patch. Some tests were (temporarily) adjusted for the required mayLoad/hasSideEffects changes to the BUFFER_STORE_DWORD* instructions. See also http://reviews.llvm.org/D18291. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18292 llvm-svn: 266126
131 lines
4.8 KiB
LLVM
131 lines
4.8 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
|
|
|
; FUNC-LABEL: {{^}}s_abs_i32:
|
|
; GCN: s_abs_i32
|
|
; GCN: s_add_i32
|
|
define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
|
|
%neg = sub i32 0, %val
|
|
%cond = icmp sgt i32 %val, %neg
|
|
%res = select i1 %cond, i32 %val, i32 %neg
|
|
%res2 = add i32 %res, 2
|
|
store i32 %res2, i32 addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}v_abs_i32:
|
|
; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
|
|
; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
|
|
; GCN: v_add_i32
|
|
define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
|
|
%val = load i32, i32 addrspace(1)* %src, align 4
|
|
%neg = sub i32 0, %val
|
|
%cond = icmp sgt i32 %val, %neg
|
|
%res = select i1 %cond, i32 %val, i32 %neg
|
|
%res2 = add i32 %res, 2
|
|
store i32 %res2, i32 addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}s_abs_v2i32:
|
|
; GCN: s_abs_i32
|
|
; GCN: s_abs_i32
|
|
; GCN: s_add_i32
|
|
; GCN: s_add_i32
|
|
define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
|
|
%z0 = insertelement <2 x i32> undef, i32 0, i32 0
|
|
%z1 = insertelement <2 x i32> %z0, i32 0, i32 1
|
|
%t0 = insertelement <2 x i32> undef, i32 2, i32 0
|
|
%t1 = insertelement <2 x i32> %t0, i32 2, i32 1
|
|
%neg = sub <2 x i32> %z1, %val
|
|
%cond = icmp sgt <2 x i32> %val, %neg
|
|
%res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
|
|
%res2 = add <2 x i32> %res, %t1
|
|
store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}v_abs_v2i32:
|
|
; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
|
|
; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
|
|
|
|
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
|
|
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
|
|
|
|
; GCN: v_add_i32
|
|
; GCN: v_add_i32
|
|
define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
|
|
%z0 = insertelement <2 x i32> undef, i32 0, i32 0
|
|
%z1 = insertelement <2 x i32> %z0, i32 0, i32 1
|
|
%t0 = insertelement <2 x i32> undef, i32 2, i32 0
|
|
%t1 = insertelement <2 x i32> %t0, i32 2, i32 1
|
|
%val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4
|
|
%neg = sub <2 x i32> %z1, %val
|
|
%cond = icmp sgt <2 x i32> %val, %neg
|
|
%res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
|
|
%res2 = add <2 x i32> %res, %t1
|
|
store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}s_abs_v4i32:
|
|
; TODO: this should use s_abs_i32
|
|
; GCN: s_abs_i32
|
|
; GCN: s_abs_i32
|
|
; GCN: s_abs_i32
|
|
; GCN: s_abs_i32
|
|
|
|
; GCN: s_add_i32
|
|
; GCN: s_add_i32
|
|
; GCN: s_add_i32
|
|
; GCN: s_add_i32
|
|
define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
|
|
%z0 = insertelement <4 x i32> undef, i32 0, i32 0
|
|
%z1 = insertelement <4 x i32> %z0, i32 0, i32 1
|
|
%z2 = insertelement <4 x i32> %z1, i32 0, i32 2
|
|
%z3 = insertelement <4 x i32> %z2, i32 0, i32 3
|
|
%t0 = insertelement <4 x i32> undef, i32 2, i32 0
|
|
%t1 = insertelement <4 x i32> %t0, i32 2, i32 1
|
|
%t2 = insertelement <4 x i32> %t1, i32 2, i32 2
|
|
%t3 = insertelement <4 x i32> %t2, i32 2, i32 3
|
|
%neg = sub <4 x i32> %z3, %val
|
|
%cond = icmp sgt <4 x i32> %val, %neg
|
|
%res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
|
|
%res2 = add <4 x i32> %res, %t3
|
|
store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}v_abs_v4i32:
|
|
; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
|
|
; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
|
|
; GCN-DAG: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
|
|
; GCN-DAG: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
|
|
|
|
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
|
|
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
|
|
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
|
|
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
|
|
|
|
; GCN: v_add_i32
|
|
; GCN: v_add_i32
|
|
; GCN: v_add_i32
|
|
; GCN: v_add_i32
|
|
define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind {
|
|
%z0 = insertelement <4 x i32> undef, i32 0, i32 0
|
|
%z1 = insertelement <4 x i32> %z0, i32 0, i32 1
|
|
%z2 = insertelement <4 x i32> %z1, i32 0, i32 2
|
|
%z3 = insertelement <4 x i32> %z2, i32 0, i32 3
|
|
%t0 = insertelement <4 x i32> undef, i32 2, i32 0
|
|
%t1 = insertelement <4 x i32> %t0, i32 2, i32 1
|
|
%t2 = insertelement <4 x i32> %t1, i32 2, i32 2
|
|
%t3 = insertelement <4 x i32> %t2, i32 2, i32 3
|
|
%val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4
|
|
%neg = sub <4 x i32> %z3, %val
|
|
%cond = icmp sgt <4 x i32> %val, %neg
|
|
%res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
|
|
%res2 = add <4 x i32> %res, %t3
|
|
store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|