mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
AMDGPU: Correct behavior of f16/i16 non-format store intrinsics
This was switching to use a format store for a non-format store for f16 types. Also fixes i16/f16 stores on targets without legal f16. The corresponding loads also need to be fixed. llvm-svn: 367872
This commit is contained in:
parent
47a677c669
commit
edc817e382
@ -1256,8 +1256,12 @@ let SubtargetPredicate = HasPackedD16VMem in {
|
||||
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i16, "BUFFER_STORE_DWORD">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f16, "BUFFER_STORE_DWORD">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i16, "BUFFER_STORE_DWORDX2">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f16, "BUFFER_STORE_DWORDX2">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3f32, "BUFFER_STORE_DWORDX3">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3i32, "BUFFER_STORE_DWORDX3">;
|
||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
|
||||
|
@ -218,28 +218,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
|
||||
|
||||
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
||||
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
|
||||
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
|
||||
@ -688,6 +666,30 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
|
||||
|
||||
setTargetDAGCombine(ISD::ADD);
|
||||
setTargetDAGCombine(ISD::ADDCARRY);
|
||||
setTargetDAGCombine(ISD::SUB);
|
||||
@ -6877,10 +6879,22 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
|
||||
case Intrinsic::amdgcn_raw_buffer_store:
|
||||
case Intrinsic::amdgcn_raw_buffer_store_format: {
|
||||
const bool IsFormat =
|
||||
IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format;
|
||||
|
||||
SDValue VData = Op.getOperand(2);
|
||||
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
|
||||
EVT VDataVT = VData.getValueType();
|
||||
EVT EltType = VDataVT.getScalarType();
|
||||
bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
|
||||
if (IsD16)
|
||||
VData = handleD16VData(VData, DAG);
|
||||
|
||||
if (!isTypeLegal(VDataVT)) {
|
||||
VData =
|
||||
DAG.getNode(ISD::BITCAST, DL,
|
||||
getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
|
||||
}
|
||||
|
||||
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
@ -6893,15 +6907,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
Op.getOperand(6), // cachepolicy
|
||||
DAG.getConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_raw_buffer_store ?
|
||||
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
|
||||
unsigned Opc =
|
||||
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
|
||||
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
|
||||
MemSDNode *M = cast<MemSDNode>(Op);
|
||||
|
||||
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
|
||||
EVT VDataType = VData.getValueType().getScalarType();
|
||||
if (VDataType == MVT::i8 || VDataType == MVT::i16)
|
||||
return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
|
||||
if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
|
||||
return handleByteShortBufferStores(DAG, VDataVT, DL, Ops, M);
|
||||
|
||||
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
|
||||
M->getMemoryVT(), M->getMemOperand());
|
||||
@ -6909,10 +6922,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
|
||||
case Intrinsic::amdgcn_struct_buffer_store:
|
||||
case Intrinsic::amdgcn_struct_buffer_store_format: {
|
||||
const bool IsFormat =
|
||||
IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format;
|
||||
|
||||
SDValue VData = Op.getOperand(2);
|
||||
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
|
||||
EVT VDataVT = VData.getValueType();
|
||||
EVT EltType = VDataVT.getScalarType();
|
||||
bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
|
||||
|
||||
if (IsD16)
|
||||
VData = handleD16VData(VData, DAG);
|
||||
|
||||
if (!isTypeLegal(VDataVT)) {
|
||||
VData =
|
||||
DAG.getNode(ISD::BITCAST, DL,
|
||||
getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
|
||||
}
|
||||
|
||||
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
@ -6932,7 +6958,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
|
||||
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
|
||||
EVT VDataType = VData.getValueType().getScalarType();
|
||||
if (VDataType == MVT::i8 || VDataType == MVT::i16)
|
||||
if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
|
||||
return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
|
||||
|
||||
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
|
||||
@ -7107,6 +7133,9 @@ SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
|
||||
EVT VDataType, SDLoc DL,
|
||||
SDValue Ops[],
|
||||
MemSDNode *M) const {
|
||||
if (VDataType == MVT::f16)
|
||||
Ops[1] = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Ops[1]);
|
||||
|
||||
SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);
|
||||
Ops[1] = BufferStoreExt;
|
||||
unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE :
|
||||
|
@ -215,6 +215,67 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}raw_buffer_store_f16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK-NOT: v0
|
||||
;CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
;CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @raw_buffer_store_f16(<4 x i32> inreg %rsrc, i32 %v1) {
|
||||
main_body:
|
||||
%trunc = trunc i32 %v1 to i16
|
||||
%cast = bitcast i16 %trunc to half
|
||||
call void @llvm.amdgcn.raw.buffer.store.f16(half %cast, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_v2f16:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
define amdgpu_ps void @buffer_store_v2f16(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %offset) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_v4f16:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
|
||||
define amdgpu_ps void @buffer_store_v4f16(<4 x i32> inreg %rsrc, <4 x half> %data, i32 %offset) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}raw_buffer_store_i16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK-NOT: v0
|
||||
;CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
;CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @raw_buffer_store_i16(<4 x i32> inreg %rsrc, i32 %v1) {
|
||||
main_body:
|
||||
%trunc = trunc i32 %v1 to i16
|
||||
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %trunc, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_v2i16:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dword v0, v1, s[0:3], 0 offen
|
||||
define amdgpu_ps void @buffer_store_v2i16(<4 x i32> inreg %rsrc, <2 x i16> %data, i32 %offset) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_store_v4i16:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
|
||||
define amdgpu_ps void @buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16> %data, i32 %offset) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16> %data, <4 x i32> %rsrc, i32 %offset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
|
||||
@ -223,7 +284,12 @@ declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32,
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
|
||||
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -121,12 +121,39 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}struct_buffer_store_short:
|
||||
;CHECK-LABEL: {{^}}struct_buffer_store_f16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK-NEXT: v_cvt_f16_f32_e32 v{{[0-9]}}, v{{[0-9]}}
|
||||
;CHECK-NEXT: buffer_store_short v{{[0-9]}}, v{{[0-9]}}, s[0:3], 0 idxen
|
||||
;CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @struct_buffer_store_f16(<4 x i32> inreg %rsrc, float %v1, i32 %index) {
|
||||
%v2 = fptrunc float %v1 to half
|
||||
call void @llvm.amdgcn.struct.buffer.store.f16(half %v2, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}struct_buffer_store_v2f16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK: buffer_store_dword v0, {{v[0-9]+}}, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @struct_buffer_store_v2f16(<4 x i32> inreg %rsrc, <2 x half> %v1, i32 %index) {
|
||||
call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}struct_buffer_store_v4f16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK: buffer_store_dwordx2 v[0:1], {{v[0-9]+}}, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @struct_buffer_store_v4f16(<4 x i32> inreg %rsrc, <4 x half> %v1, i32 %index) {
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}struct_buffer_store_i16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK-NEXT: v_cvt_u32_f32_e32 v{{[0-9]}}, v{{[0-9]}}
|
||||
;CHECK-NEXT: buffer_store_short v{{[0-9]}}, v{{[0-9]}}, s[0:3], 0 idxen
|
||||
;CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @struct_buffer_store_short(<4 x i32> inreg %rsrc, float %v1, i32 %index) {
|
||||
define amdgpu_ps void @struct_buffer_store_i16(<4 x i32> inreg %rsrc, float %v1, i32 %index) {
|
||||
main_body:
|
||||
%v2 = fptoui float %v1 to i32
|
||||
%v3 = trunc i32 %v2 to i16
|
||||
@ -134,6 +161,22 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}struct_buffer_store_vif16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK: buffer_store_dword v0, {{v[0-9]+}}, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @struct_buffer_store_vif16(<4 x i32> inreg %rsrc, <2 x i16> %v1, i32 %index) {
|
||||
call void @llvm.amdgcn.struct.buffer.store.v2i16(<2 x i16> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}struct_buffer_store_v4i16:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK: buffer_store_dwordx2 v[0:1], {{v[0-9]+}}, s[0:3], 0 idxen
|
||||
define amdgpu_ps void @struct_buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16> %v1, i32 %index) {
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4i16(<4 x i16> %v1, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
|
||||
@ -143,6 +186,12 @@ declare void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i
|
||||
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.struct.buffer.store.i8(i8, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.i16(i16, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.f16(half, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32) #0
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
Loading…
Reference in New Issue
Block a user