mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: add missing llvm.amdgcn.{raw,struct}.buffer.atomic.{inc,dec}
Summary: Wrapping increment/decrement. These aren't exposed by many APIs... Change-Id: I1df25c7889de5a5ba76468ad8e8a2597efa9af6c Reviewers: arsenm, tpr, dstuttard Subscribers: kzhuravl, jvesely, wdng, yaxunl, t-tye, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65283 llvm-svn: 367821
This commit is contained in:
parent
5a4b1ab0cd
commit
496b39a5b9
@ -837,9 +837,6 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
|
||||
defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
|
||||
defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
|
||||
defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
|
||||
|
||||
// TODO: INC/DEC are weird: they seem to have a vdata argument in hardware,
|
||||
// even though it clearly shouldn't be needed
|
||||
defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">;
|
||||
defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">;
|
||||
|
||||
@ -963,6 +960,8 @@ def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
|
||||
def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
|
||||
def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
|
||||
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
|
||||
def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
|
||||
def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
|
||||
def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
|
||||
[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, // src(VGPR)
|
||||
@ -994,6 +993,8 @@ def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic;
|
||||
def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic;
|
||||
def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
|
||||
def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
|
||||
def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
|
||||
def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
|
||||
def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
|
||||
[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, // src(VGPR)
|
||||
|
@ -4354,6 +4354,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_AND)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_OR)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_INC)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD)
|
||||
|
@ -532,6 +532,8 @@ enum NodeType : unsigned {
|
||||
BUFFER_ATOMIC_AND,
|
||||
BUFFER_ATOMIC_OR,
|
||||
BUFFER_ATOMIC_XOR,
|
||||
BUFFER_ATOMIC_INC,
|
||||
BUFFER_ATOMIC_DEC,
|
||||
BUFFER_ATOMIC_CMPSWAP,
|
||||
BUFFER_ATOMIC_FADD,
|
||||
BUFFER_ATOMIC_PK_FADD,
|
||||
|
@ -81,6 +81,8 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_inc>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_dec>;
|
||||
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
|
||||
@ -92,6 +94,8 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
|
||||
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
|
||||
def : SourceOfDivergence<int_amdgcn_ps_live>;
|
||||
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
|
||||
|
@ -1316,6 +1316,8 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i32, "BUFFER_ATOMIC_INC">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i32, "BUFFER_ATOMIC_DEC">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
|
||||
@ -1326,6 +1328,8 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
|
||||
|
||||
multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
|
@ -6414,7 +6414,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_and:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_or:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_xor: {
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_inc:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_dec: {
|
||||
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
@ -6463,6 +6465,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
|
||||
break;
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_inc:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_INC;
|
||||
break;
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_dec:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unhandled atomic opcode");
|
||||
}
|
||||
@ -6479,7 +6487,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_and:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_or:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_xor: {
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_dec: {
|
||||
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
@ -6528,6 +6538,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
|
||||
break;
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_INC;
|
||||
break;
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_dec:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unhandled atomic opcode");
|
||||
}
|
||||
|
@ -198,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
|
||||
def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
|
||||
def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
|
||||
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
|
||||
def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
|
||||
def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
|
||||
def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
|
||||
def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
|
||||
|
||||
|
@ -44,6 +44,10 @@ main_body:
|
||||
;CHECK: buffer_atomic_or v0, v1, s[0:3], 0 offen glc slc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 offen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_inc v0, v1, s[0:3], 0 offen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_dec v0, v1, s[0:3], 0 offen glc
|
||||
define amdgpu_ps float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %voffset) {
|
||||
main_body:
|
||||
%t1 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
|
||||
@ -55,7 +59,9 @@ main_body:
|
||||
%t7 = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32 %t6, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
|
||||
%t8 = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32 %t7, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
|
||||
%t9 = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32 %t8, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
|
||||
%out = bitcast i32 %t9 to float
|
||||
%t10 = call i32 @llvm.amdgcn.raw.buffer.atomic.inc.i32(i32 %t9, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
|
||||
%t11 = call i32 @llvm.amdgcn.raw.buffer.atomic.dec.i32(i32 %t10, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
|
||||
%out = bitcast i32 %t11 to float
|
||||
ret float %out
|
||||
}
|
||||
|
||||
@ -110,6 +116,8 @@ declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i3
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -50,6 +50,10 @@ main_body:
|
||||
;CHECK: buffer_atomic_or v0, v1, s[0:3], 0 idxen glc slc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_inc v0, v1, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_dec v0, v1, s[0:3], 0 idxen glc
|
||||
define amdgpu_ps float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
|
||||
main_body:
|
||||
%t1 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
|
||||
@ -61,7 +65,9 @@ main_body:
|
||||
%t7 = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32 %t6, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
|
||||
%t8 = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32 %t7, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
|
||||
%t9 = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 %t8, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
|
||||
%out = bitcast i32 %t9 to float
|
||||
%t10 = call i32 @llvm.amdgcn.struct.buffer.atomic.inc.i32(i32 %t9, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
|
||||
%t11 = call i32 @llvm.amdgcn.struct.buffer.atomic.dec.i32(i32 %t10, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
|
||||
%out = bitcast i32 %t11 to float
|
||||
ret float %out
|
||||
}
|
||||
|
||||
@ -122,6 +128,8 @@ declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32,
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user