mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary: This adds support for 64 bit buffer atomic arithmetic instructions but does not include cmpswap as that depends on a fix to the way the register pairs are handled Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61 Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58918 llvm-svn: 355520
This commit is contained in:
parent
8c5cdae314
commit
b43336b134
@ -1047,8 +1047,8 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic <
|
||||
AMDGPURsrcIntrinsic<1>;
|
||||
|
||||
class AMDGPUBufferAtomic : Intrinsic <
|
||||
[llvm_i32_ty],
|
||||
[llvm_i32_ty, // vdata(VGPR)
|
||||
[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, // vdata(VGPR)
|
||||
llvm_v4i32_ty, // rsrc(SGPR)
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // offset(SGPR/VGPR/imm)
|
||||
|
@ -1201,35 +1201,36 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
|
||||
// buffer_atomic patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
|
||||
multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
def : GCNPat<
|
||||
(name i32:$vdata_in, v4i32:$rsrc, 0,
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
imm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
imm:$cachepolicy, imm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name i32:$vdata_in, v4i32:$rsrc, 0,
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
imm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
imm:$cachepolicy, imm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
|
||||
$vdata_in,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
@ -1237,16 +1238,26 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
|
||||
>;
|
||||
}
|
||||
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_add, "BUFFER_ATOMIC_ADD">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_and, "BUFFER_ATOMIC_AND">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_or, "BUFFER_ATOMIC_OR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i32, "BUFFER_ATOMIC_UMIN">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i32, "BUFFER_ATOMIC_SMAX">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i64, "BUFFER_ATOMIC_SMIN_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i64, "BUFFER_ATOMIC_UMIN_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i64, "BUFFER_ATOMIC_SMAX_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
|
||||
|
||||
def : GCNPat<
|
||||
(SIbuffer_atomic_cmpswap
|
||||
@ -1297,7 +1308,6 @@ def : GCNPat<
|
||||
sub0)
|
||||
>;
|
||||
|
||||
|
||||
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
|
||||
PatFrag constant_ld> : GCNPat <
|
||||
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
|
@ -147,9 +147,7 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
|
||||
|
||||
class SDBufferAtomic<string opcode> : SDNode <opcode,
|
||||
SDTypeProfile<1, 8,
|
||||
[SDTCisVT<0, i32>, // dst
|
||||
SDTCisVT<1, i32>, // vdata
|
||||
SDTCisVT<2, v4i32>, // rsrc
|
||||
[SDTCisVT<2, v4i32>, // rsrc
|
||||
SDTCisVT<3, i32>, // vindex(VGPR)
|
||||
SDTCisVT<4, i32>, // voffset(VGPR)
|
||||
SDTCisVT<5, i32>, // soffset(SGPR)
|
||||
|
@ -1,81 +1,81 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
|
||||
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32(
|
||||
define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(
|
||||
define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(
|
||||
define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(
|
||||
define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(
|
||||
define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(
|
||||
define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32(
|
||||
define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.and(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32(
|
||||
define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.or(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(
|
||||
define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.xor(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
@ -88,16 +88,16 @@ main_body:
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,81 +1,81 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
|
||||
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32(
|
||||
define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(
|
||||
define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(
|
||||
define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(
|
||||
define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(
|
||||
define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(
|
||||
define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32(
|
||||
define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.and(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32(
|
||||
define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.or(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor(
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(
|
||||
define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
main_body:
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.xor(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%r = bitcast i32 %orig to float
|
||||
ret float %r
|
||||
}
|
||||
@ -88,16 +88,16 @@ main_body:
|
||||
ret float %r
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -20,18 +20,51 @@
|
||||
;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}}
|
||||
define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
|
||||
main_body:
|
||||
%o1 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%o2 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%o3 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o2, <4 x i32> %rsrc, i32 0, i32 %voffset, i1 0)
|
||||
%o4 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o3, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i1 0)
|
||||
%o1 = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%o2 = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %o1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%o3 = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %o2, <4 x i32> %rsrc, i32 0, i32 %voffset, i1 0)
|
||||
%o4 = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %o3, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i1 0)
|
||||
%ofs.5 = add i32 %voffset, 42
|
||||
%o5 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o4, <4 x i32> %rsrc, i32 0, i32 %ofs.5, i1 0)
|
||||
%o6 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o5, <4 x i32> %rsrc, i32 0, i32 8192, i1 0)
|
||||
%unused = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%o5 = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %o4, <4 x i32> %rsrc, i32 0, i32 %ofs.5, i1 0)
|
||||
%o6 = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %o5, <4 x i32> %rsrc, i32 0, i32 8192, i1 0)
|
||||
%unused = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%out = bitcast i32 %o6 to float
|
||||
ret float %out
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test11:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_atomic_swap_x2 v[3:4], off, s[0:3], 0 glc
|
||||
;VI: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap_x2 v[3:4], v1, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap_x2 v[3:4], v2, s[0:3], 0 offen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap_x2 v[3:4], v[1:2], s[0:3], 0 idxen offen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap_x2 v[3:4], v2, s[0:3], 0 offen offset:42 glc
|
||||
;CHECK-DAG: s_waitcnt vmcnt(0)
|
||||
;SICI: buffer_atomic_swap_x2 v[3:4], v0, s[0:3], 0 offen glc
|
||||
;VI: buffer_atomic_swap_x2 v[3:4], off, s[0:3], [[SOFS]] offset:4 glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap_x2 v[3:4], off, s[0:3], 0{{$}}
|
||||
define amdgpu_ps float @test11(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
|
||||
main_body:
|
||||
%o0 = sext i32 %data to i64
|
||||
%o1 = call i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64 %o0, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%o2 = call i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64 %o1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%o3 = call i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64 %o2, <4 x i32> %rsrc, i32 0, i32 %voffset, i1 0)
|
||||
%o4 = call i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64 %o3, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i1 0)
|
||||
%ofs.5 = add i32 %voffset, 42
|
||||
%o5 = call i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64 %o4, <4 x i32> %rsrc, i32 0, i32 %ofs.5, i1 0)
|
||||
%o6 = call i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64 %o5, <4 x i32> %rsrc, i32 0, i32 8192, i1 0)
|
||||
%unused = call i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64 %o6, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%o7 = trunc i64 %o6 to i32
|
||||
%out = bitcast i32 %o7 to float
|
||||
ret float %out
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test2:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 idxen glc
|
||||
@ -53,24 +86,60 @@ main_body:
|
||||
;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 idxen glc
|
||||
define amdgpu_ps float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
|
||||
main_body:
|
||||
%t1 = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t2 = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %t1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t3 = call i32 @llvm.amdgcn.buffer.atomic.smin(i32 %t2, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t4 = call i32 @llvm.amdgcn.buffer.atomic.umin(i32 %t3, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t5 = call i32 @llvm.amdgcn.buffer.atomic.smax(i32 %t4, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t6 = call i32 @llvm.amdgcn.buffer.atomic.umax(i32 %t5, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t7 = call i32 @llvm.amdgcn.buffer.atomic.and(i32 %t6, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t8 = call i32 @llvm.amdgcn.buffer.atomic.or(i32 %t7, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t9 = call i32 @llvm.amdgcn.buffer.atomic.xor(i32 %t8, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t1 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t2 = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32 %t1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t3 = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32 %t2, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t4 = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32 %t3, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t5 = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32 %t4, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t6 = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32 %t5, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t7 = call i32 @llvm.amdgcn.buffer.atomic.and.i32(i32 %t6, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t8 = call i32 @llvm.amdgcn.buffer.atomic.or.i32(i32 %t7, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t9 = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32 %t8, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%out = bitcast i32 %t9 to float
|
||||
ret float %out
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test3:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_atomic_add_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_sub_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_smin_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_umin_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_smax_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_umax_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_and_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_or_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_xor_x2 v[0:1], v2, s[0:3], 0 idxen glc
|
||||
define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
|
||||
main_body:
|
||||
%t0 = sext i32 %data to i64
|
||||
%t1 = call i64 @llvm.amdgcn.buffer.atomic.add.i64(i64 %t0, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t2 = call i64 @llvm.amdgcn.buffer.atomic.sub.i64(i64 %t1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t3 = call i64 @llvm.amdgcn.buffer.atomic.smin.i64(i64 %t2, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t4 = call i64 @llvm.amdgcn.buffer.atomic.umin.i64(i64 %t3, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t5 = call i64 @llvm.amdgcn.buffer.atomic.smax.i64(i64 %t4, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t6 = call i64 @llvm.amdgcn.buffer.atomic.umax.i64(i64 %t5, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t7 = call i64 @llvm.amdgcn.buffer.atomic.and.i64(i64 %t6, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t8 = call i64 @llvm.amdgcn.buffer.atomic.or.i64(i64 %t7, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t9 = call i64 @llvm.amdgcn.buffer.atomic.xor.i64(i64 %t8, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
%t10 = trunc i64 %t9 to i32
|
||||
%out = bitcast i32 %t10 to float
|
||||
ret float %out
|
||||
}
|
||||
|
||||
; Ideally, we would teach tablegen & friends that cmpswap only modifies the
|
||||
; first vgpr. Since we don't do that yet, the register allocator will have to
|
||||
; create copies which we don't bother to track here.
|
||||
;
|
||||
;CHECK-LABEL: {{^}}test3:
|
||||
;CHECK-LABEL: {{^}}test4:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -85,7 +154,7 @@ main_body:
|
||||
;CHECK-DAG: s_waitcnt vmcnt(0)
|
||||
;SICI: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen glc
|
||||
;VI: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc
|
||||
define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
|
||||
define amdgpu_ps float @test4(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
|
||||
main_body:
|
||||
%o1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
|
||||
%o2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
@ -105,25 +174,35 @@ main_body:
|
||||
ret float %out
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test4:
|
||||
;CHECK-LABEL: {{^}}test7:
|
||||
;CHECK: buffer_atomic_add v0,
|
||||
define amdgpu_ps float @test4() {
|
||||
define amdgpu_ps float @test7() {
|
||||
main_body:
|
||||
%v = call i32 @llvm.amdgcn.buffer.atomic.add(i32 1, <4 x i32> undef, i32 0, i32 4, i1 false)
|
||||
%v = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 1, <4 x i32> undef, i32 0, i32 4, i1 false)
|
||||
%v.float = bitcast i32 %v to float
|
||||
ret float %v.float
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.sub.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.smin.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.umin.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.smax.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.umax.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.and.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.or.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.buffer.atomic.xor.i64(i64, <4 x i32>, i32, i32, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user