mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
AMDGPU: Lower buffer store and atomic intrinsics manually
Summary: Without this, SIMemoryLegalizer inserts s_waitcnt vmcnt(0) before every buffer store and atomic instruction. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D39060 llvm-svn: 317754
This commit is contained in:
parent
fd39065f1d
commit
3170d60e41
@ -3960,6 +3960,19 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(ATOMIC_DEC)
|
||||
NODE_NAME_CASE(BUFFER_LOAD)
|
||||
NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
|
||||
NODE_NAME_CASE(BUFFER_STORE)
|
||||
NODE_NAME_CASE(BUFFER_STORE_FORMAT)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_ADD)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_SUB)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_SMIN)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_UMIN)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_SMAX)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_UMAX)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_AND)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_OR)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
|
||||
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -445,6 +445,19 @@ enum NodeType : unsigned {
|
||||
ATOMIC_DEC,
|
||||
BUFFER_LOAD,
|
||||
BUFFER_LOAD_FORMAT,
|
||||
BUFFER_STORE,
|
||||
BUFFER_STORE_FORMAT,
|
||||
BUFFER_ATOMIC_SWAP,
|
||||
BUFFER_ATOMIC_ADD,
|
||||
BUFFER_ATOMIC_SUB,
|
||||
BUFFER_ATOMIC_SMIN,
|
||||
BUFFER_ATOMIC_UMIN,
|
||||
BUFFER_ATOMIC_SMAX,
|
||||
BUFFER_ATOMIC_UMAX,
|
||||
BUFFER_ATOMIC_AND,
|
||||
BUFFER_ATOMIC_OR,
|
||||
BUFFER_ATOMIC_XOR,
|
||||
BUFFER_ATOMIC_CMPSWAP,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
@ -966,12 +966,12 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
>;
|
||||
}
|
||||
|
||||
defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
|
||||
defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
|
||||
defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
|
||||
defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, f32, "BUFFER_STORE_DWORD">;
|
||||
defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
|
||||
defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// buffer_atomic patterns
|
||||
@ -1013,19 +1013,19 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
|
||||
>;
|
||||
}
|
||||
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_add, "BUFFER_ATOMIC_ADD">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_and, "BUFFER_ATOMIC_AND">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_or, "BUFFER_ATOMIC_OR">;
|
||||
defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_add, "BUFFER_ATOMIC_ADD">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_and, "BUFFER_ATOMIC_AND">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_or, "BUFFER_ATOMIC_OR">;
|
||||
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
|
||||
|
||||
def : GCNPat<
|
||||
(int_amdgcn_buffer_atomic_cmpswap
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
|
||||
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
|
||||
imm:$slc),
|
||||
@ -1037,7 +1037,7 @@ def : GCNPat<
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(int_amdgcn_buffer_atomic_cmpswap
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
|
||||
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
|
||||
imm:$slc),
|
||||
@ -1049,7 +1049,7 @@ def : GCNPat<
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(int_amdgcn_buffer_atomic_cmpswap
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
|
||||
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
|
||||
imm:$slc),
|
||||
@ -1061,7 +1061,7 @@ def : GCNPat<
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(int_amdgcn_buffer_atomic_cmpswap
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
|
||||
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
|
||||
imm:$slc),
|
||||
|
@ -4238,6 +4238,95 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
case Intrinsic::amdgcn_buffer_atomic_swap:
|
||||
case Intrinsic::amdgcn_buffer_atomic_add:
|
||||
case Intrinsic::amdgcn_buffer_atomic_sub:
|
||||
case Intrinsic::amdgcn_buffer_atomic_smin:
|
||||
case Intrinsic::amdgcn_buffer_atomic_umin:
|
||||
case Intrinsic::amdgcn_buffer_atomic_smax:
|
||||
case Intrinsic::amdgcn_buffer_atomic_umax:
|
||||
case Intrinsic::amdgcn_buffer_atomic_and:
|
||||
case Intrinsic::amdgcn_buffer_atomic_or:
|
||||
case Intrinsic::amdgcn_buffer_atomic_xor: {
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // vdata
|
||||
Op.getOperand(3), // rsrc
|
||||
Op.getOperand(4), // vindex
|
||||
Op.getOperand(5), // offset
|
||||
Op.getOperand(6) // slc
|
||||
};
|
||||
EVT VT = Op.getOperand(3).getValueType();
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile,
|
||||
VT.getStoreSize(), 4);
|
||||
unsigned Opcode = 0;
|
||||
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_buffer_atomic_swap:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_add:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_sub:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_smin:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_umin:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_smax:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_umax:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_and:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_or:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_atomic_xor:
|
||||
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unhandled atomic opcode");
|
||||
}
|
||||
|
||||
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
|
||||
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // src
|
||||
Op.getOperand(3), // cmp
|
||||
Op.getOperand(4), // rsrc
|
||||
Op.getOperand(5), // vindex
|
||||
Op.getOperand(6), // offset
|
||||
Op.getOperand(7) // slc
|
||||
};
|
||||
EVT VT = Op.getOperand(4).getValueType();
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile,
|
||||
VT.getStoreSize(), 4);
|
||||
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
|
||||
// Basic sample.
|
||||
case Intrinsic::amdgcn_image_sample:
|
||||
case Intrinsic::amdgcn_image_sample_cl:
|
||||
@ -4465,6 +4554,30 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
|
||||
case Intrinsic::amdgcn_buffer_store:
|
||||
case Intrinsic::amdgcn_buffer_store_format: {
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
Op.getOperand(2), // vdata
|
||||
Op.getOperand(3), // rsrc
|
||||
Op.getOperand(4), // vindex
|
||||
Op.getOperand(5), // offset
|
||||
Op.getOperand(6), // glc
|
||||
Op.getOperand(7) // slc
|
||||
};
|
||||
EVT VT = Op.getOperand(3).getValueType();
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable,
|
||||
VT.getStoreSize(), 4);
|
||||
|
||||
unsigned Opcode = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
|
||||
AMDGPUISD::BUFFER_STORE :
|
||||
AMDGPUISD::BUFFER_STORE_FORMAT;
|
||||
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
|
||||
default:
|
||||
return Op;
|
||||
}
|
||||
|
@ -93,6 +93,53 @@ def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
|
||||
def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
|
||||
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
||||
|
||||
def SDTBufferStore : SDTypeProfile<0, 6,
|
||||
[ // vdata
|
||||
SDTCisVT<1, v4i32>, // rsrc
|
||||
SDTCisVT<2, i32>, // vindex
|
||||
SDTCisVT<3, i32>, // offset
|
||||
SDTCisVT<4, i1>, // glc
|
||||
SDTCisVT<5, i1>]>; // slc
|
||||
|
||||
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
|
||||
[SDNPMemOperand, SDNPHasChain, SDNPMayStore]>;
|
||||
def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", SDTBufferStore,
|
||||
[SDNPMemOperand, SDNPHasChain, SDNPMayStore]>;
|
||||
|
||||
class SDBufferAtomic<string opcode> : SDNode <opcode,
|
||||
SDTypeProfile<1, 5,
|
||||
[SDTCisVT<0, i32>, // dst
|
||||
SDTCisVT<1, i32>, // vdata
|
||||
SDTCisVT<2, v4i32>, // rsrc
|
||||
SDTCisVT<3, i32>, // vindex
|
||||
SDTCisVT<4, i32>, // offset
|
||||
SDTCisVT<5, i1>]>, // slc
|
||||
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
|
||||
>;
|
||||
|
||||
def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
|
||||
def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
|
||||
def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
|
||||
def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
|
||||
def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
|
||||
def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
|
||||
def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
|
||||
def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
|
||||
def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
|
||||
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
|
||||
|
||||
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
|
||||
SDTypeProfile<1, 6,
|
||||
[SDTCisVT<0, i32>, // dst
|
||||
SDTCisVT<1, i32>, // src
|
||||
SDTCisVT<2, i32>, // cmp
|
||||
SDTCisVT<3, v4i32>, // rsrc
|
||||
SDTCisVT<4, i32>, // vindex
|
||||
SDTCisVT<5, i32>, // offset
|
||||
SDTCisVT<6, i1>]>, // slc
|
||||
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
|
||||
>;
|
||||
|
||||
class SDSample<string opcode> : SDNode <opcode,
|
||||
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
|
||||
SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
|
||||
|
@ -2,6 +2,7 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI
|
||||
|
||||
;CHECK-LABEL: {{^}}test1:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
;VI: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -32,6 +33,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}test2:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_sub v0, v1, s[0:3], 0 idxen glc
|
||||
@ -69,6 +71,7 @@ main_body:
|
||||
; create copies which we don't bother to track here.
|
||||
;
|
||||
;CHECK-LABEL: {{^}}test3:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;VI: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc
|
||||
|
@ -2,6 +2,7 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0
|
||||
;CHECK: buffer_store_format_xyzw v[4:7], off, s[0:3], 0 glc
|
||||
;CHECK: buffer_store_format_xyzw v[8:11], off, s[0:3], 0 slc
|
||||
@ -14,6 +15,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_immoffs:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0 offset:42
|
||||
define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
|
||||
main_body:
|
||||
@ -22,6 +24,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_idx:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) {
|
||||
main_body:
|
||||
@ -30,6 +33,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_ofs:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 offen
|
||||
define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
|
||||
main_body:
|
||||
@ -38,6 +42,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_both:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
|
||||
define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) {
|
||||
main_body:
|
||||
@ -47,6 +52,7 @@ main_body:
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_both_reversed:
|
||||
;CHECK: v_mov_b32_e32 v6, v4
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen
|
||||
define amdgpu_ps void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) {
|
||||
main_body:
|
||||
@ -57,6 +63,7 @@ main_body:
|
||||
; Ideally, the register allocator would avoid the wait here
|
||||
;
|
||||
;CHECK-LABEL: {{^}}buffer_store_wait:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
|
||||
;CHECK: s_waitcnt expcnt(0)
|
||||
;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
|
||||
@ -71,6 +78,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_x1:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_x v0, v1, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %index) {
|
||||
main_body:
|
||||
@ -79,6 +87,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_x2:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_format_xy v[0:1], v2, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) {
|
||||
main_body:
|
||||
|
@ -2,6 +2,7 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||
;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
|
||||
;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
|
||||
@ -14,6 +15,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_immoffs:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
|
||||
define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
|
||||
main_body:
|
||||
@ -22,6 +24,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_idx:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) {
|
||||
main_body:
|
||||
@ -30,6 +33,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_ofs:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
|
||||
define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
|
||||
main_body:
|
||||
@ -38,6 +42,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_both:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 idxen offen
|
||||
define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) {
|
||||
main_body:
|
||||
@ -47,6 +52,7 @@ main_body:
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_both_reversed:
|
||||
;CHECK: v_mov_b32_e32 v6, v4
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 idxen offen
|
||||
define amdgpu_ps void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) {
|
||||
main_body:
|
||||
@ -57,6 +63,7 @@ main_body:
|
||||
; Ideally, the register allocator would avoid the wait here
|
||||
;
|
||||
;CHECK-LABEL: {{^}}buffer_store_wait:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen
|
||||
;CHECK: s_waitcnt expcnt(0)
|
||||
;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 idxen
|
||||
@ -71,6 +78,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_x1:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dword v0, v1, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %index) {
|
||||
main_body:
|
||||
@ -79,6 +87,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_x2:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) #0 {
|
||||
main_body:
|
||||
|
@ -2,6 +2,7 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI
|
||||
|
||||
;CHECK-LABEL: {{^}}image_atomic_swap:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;SI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x04,0x00,0x00]
|
||||
;VI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x04,0x00,0x00]
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -13,6 +14,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_atomic_swap_v2i32:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;SI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x02,0x00,0x00]
|
||||
;VI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x02,0x00,0x00]
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -24,6 +26,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_atomic_swap_i32:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;SI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x01,0x00,0x00]
|
||||
;VI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x01,0x00,0x00]
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -35,6 +38,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_atomic_cmpswap:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;SI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x40,0xf0,0x00,0x04,0x00,0x00]
|
||||
;VI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0x00,0x04,0x00,0x00]
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -47,6 +51,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_atomic_add:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;SI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0x00,0x04,0x00,0x00]
|
||||
;VI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00]
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -58,6 +63,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_atomic_sub:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;SI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00]
|
||||
;VI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4c,0xf0,0x00,0x04,0x00,0x00]
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
@ -69,6 +75,7 @@ main_body:
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}image_atomic_unchanged:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: image_atomic_smin v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x50,0xf0,0x00,0x04,0x00,0x00]
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: image_atomic_umin v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x54,0xf0,0x00,0x04,0x00,0x00]
|
||||
|
@ -2,6 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_v4i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
@ -11,6 +12,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_v2i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
|
||||
@ -20,6 +22,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
|
||||
@ -29,6 +32,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_mip:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
@ -38,6 +42,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_1:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
@ -48,6 +53,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_f32_v2i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
|
||||
@ -57,6 +63,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_v2f32_v4i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
|
||||
@ -66,6 +73,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_v4i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
|
||||
main_body:
|
||||
@ -74,6 +82,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_v2i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
|
||||
main_body:
|
||||
@ -82,6 +91,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
|
||||
main_body:
|
||||
@ -90,6 +100,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_f32_i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
|
||||
define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 {
|
||||
main_body:
|
||||
@ -98,6 +109,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_v2f32_v4i32:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
|
||||
define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 {
|
||||
main_body:
|
||||
@ -106,6 +118,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_mip:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
|
||||
define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
|
||||
main_body:
|
||||
@ -114,6 +127,7 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}getresinfo:
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
|
||||
define amdgpu_ps void @getresinfo() #0 {
|
||||
main_body:
|
||||
|
@ -2,6 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}test1:
|
||||
; CHECK-NOT: s_waitcnt
|
||||
; CHECK: image_store
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0){{$}}
|
||||
; CHECK-NEXT: image_store
|
||||
@ -17,6 +18,7 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float>
|
||||
; emitted as late as possible.
|
||||
;
|
||||
; CHECK-LABEL: {{^}}test2:
|
||||
; CHECK-NOT: s_waitcnt
|
||||
; CHECK: image_load
|
||||
; CHECK-NEXT: s_waitcnt
|
||||
; CHECK: s_waitcnt vmcnt(0){{$}}
|
||||
|
Loading…
Reference in New Issue
Block a user