mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
AMDGPU: Overload return type of llvm.amdgcn.buffer.load.format
Summary: Allow the selection of BUFFER_LOAD_FORMAT_x and _XY. Do this now before the frontend patches land in Mesa. Eventually, we may want to automatically reduce the size of loads at the LLVM IR level, which requires such overloads, and in some cases Mesa can generate them directly. Reviewers: tstellarAMD, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18255 llvm-svn: 263792
This commit is contained in:
parent
a279ac7cfa
commit
f1aca63ee8
@ -206,7 +206,7 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic <
|
||||
[]>;
|
||||
|
||||
def int_amdgcn_buffer_load_format : Intrinsic <
|
||||
[llvm_v4f32_ty],
|
||||
[llvm_anyfloat_ty],
|
||||
[llvm_v4i32_ty, // rsrc(SGPR)
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // offset(SGPR/VGPR/imm)
|
||||
|
@ -2108,45 +2108,52 @@ def : Pat <
|
||||
//===----------------------------------------------------------------------===//
|
||||
// buffer_load/store_format patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass MUBUF_LoadIntrinsicPat<ValueType vt, string opcode> {
|
||||
def : Pat<
|
||||
(int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
|
||||
(vt (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
|
||||
(MUBUFIntrinsicOffset i32:$soffset,
|
||||
i16:$offset),
|
||||
imm:$glc, imm:$slc),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_OFFSET $rsrc, $soffset, (as_i16imm $offset),
|
||||
imm:$glc, imm:$slc)),
|
||||
(!cast<MUBUF>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
|
||||
(vt (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
|
||||
(MUBUFIntrinsicOffset i32:$soffset,
|
||||
i16:$offset),
|
||||
imm:$glc, imm:$slc),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
||||
imm:$glc, imm:$slc)),
|
||||
(!cast<MUBUF>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
|
||||
(vt (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
|
||||
(MUBUFIntrinsicVOffset i32:$soffset,
|
||||
i16:$offset,
|
||||
i32:$voffset),
|
||||
imm:$glc, imm:$slc),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_OFFEN $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
||||
imm:$glc, imm:$slc)),
|
||||
(!cast<MUBUF>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
|
||||
(vt (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
|
||||
(MUBUFIntrinsicVOffset i32:$soffset,
|
||||
i16:$offset,
|
||||
i32:$voffset),
|
||||
imm:$glc, imm:$slc),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_BOTHEN
|
||||
imm:$glc, imm:$slc)),
|
||||
(!cast<MUBUF>(opcode # _BOTHEN)
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
}
|
||||
|
||||
defm : MUBUF_LoadIntrinsicPat<f32, "BUFFER_LOAD_FORMAT_X">;
|
||||
defm : MUBUF_LoadIntrinsicPat<v2f32, "BUFFER_LOAD_FORMAT_XY">;
|
||||
defm : MUBUF_LoadIntrinsicPat<v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
|
||||
|
||||
def : Pat<
|
||||
(int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0,
|
||||
|
@ -8,9 +8,9 @@
|
||||
;CHECK: s_waitcnt
|
||||
define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) #0 {
|
||||
main_body:
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
|
||||
%data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
|
||||
%data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
|
||||
%data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
|
||||
%data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
|
||||
%r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
|
||||
%r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
|
||||
%r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
|
||||
@ -22,7 +22,7 @@ main_body:
|
||||
;CHECK: s_waitcnt
|
||||
define <4 x float> @buffer_load_immoffs(<4 x i32> inreg) #0 {
|
||||
main_body:
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
|
||||
ret <4 x float> %data
|
||||
}
|
||||
|
||||
@ -35,9 +35,9 @@ main_body:
|
||||
;CHECK: s_waitcnt
|
||||
define <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) #0 {
|
||||
main_body:
|
||||
%d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0)
|
||||
%d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0)
|
||||
%d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0)
|
||||
%d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0)
|
||||
%d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0)
|
||||
%d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0)
|
||||
%d.3 = fadd <4 x float> %d.0, %d.1
|
||||
%data = fadd <4 x float> %d.2, %d.3
|
||||
ret <4 x float> %data
|
||||
@ -51,8 +51,8 @@ main_body:
|
||||
;CHECK: s_waitcnt
|
||||
define <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) #0 {
|
||||
main_body:
|
||||
%d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
|
||||
%d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
|
||||
%d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
|
||||
%d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
|
||||
%data = fadd <4 x float> %d.0, %d.1
|
||||
ret <4 x float> %data
|
||||
}
|
||||
@ -62,7 +62,7 @@ main_body:
|
||||
;CHECK: s_waitcnt
|
||||
define <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) #0 {
|
||||
main_body:
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
|
||||
ret <4 x float> %data
|
||||
}
|
||||
|
||||
@ -71,7 +71,7 @@ main_body:
|
||||
;CHECK: s_waitcnt
|
||||
define <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) #0 {
|
||||
main_body:
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
|
||||
ret <4 x float> %data
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ main_body:
|
||||
define <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) #0 {
|
||||
main_body:
|
||||
%ofs = add i32 %1, 58
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
|
||||
ret <4 x float> %data
|
||||
}
|
||||
|
||||
@ -90,7 +90,7 @@ main_body:
|
||||
;CHECK: s_waitcnt
|
||||
define <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) #0 {
|
||||
main_body:
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
|
||||
ret <4 x float> %data
|
||||
}
|
||||
|
||||
@ -100,11 +100,31 @@ main_body:
|
||||
;CHECK: s_waitcnt
|
||||
define <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) #0 {
|
||||
main_body:
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
|
||||
ret <4 x float> %data
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #1
|
||||
;CHECK-LABEL: {{^}}buffer_load_x:
|
||||
;CHECK: buffer_load_format_x v0, s[0:3], 0
|
||||
;CHECK: s_waitcnt
|
||||
define float @buffer_load_x(<4 x i32> inreg %rsrc) #0 {
|
||||
main_body:
|
||||
%data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
|
||||
ret float %data
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_load_xy:
|
||||
;CHECK: buffer_load_format_xy v[0:1], s[0:3], 0
|
||||
;CHECK: s_waitcnt
|
||||
define <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) #0 {
|
||||
main_body:
|
||||
%data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
|
||||
ret <2 x float> %data
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
|
||||
declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -65,13 +65,13 @@ main_body:
|
||||
define void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) #0 {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
|
||||
%data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
|
||||
call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i1 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user