mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
gl: Fix D24X8 accelerated encode/decode
- PS3 D24X8 is swapped as a full word, unlike PC. - Add missing paths to handle custom swap behavior.
This commit is contained in:
parent
81fa3da101
commit
362a26a404
@ -207,7 +207,8 @@ namespace gl
|
|||||||
compute_task::run(cmd, num_invocations);
|
compute_task::run(cmd, num_invocations);
|
||||||
}
|
}
|
||||||
|
|
||||||
cs_shuffle_d32fx8_to_x8d24f::cs_shuffle_d32fx8_to_x8d24f()
|
template <bool SwapBytes>
|
||||||
|
cs_shuffle_d32fx8_to_x8d24f<SwapBytes>::cs_shuffle_d32fx8_to_x8d24f()
|
||||||
{
|
{
|
||||||
uniforms = "uniform uint in_ptr, out_ptr;\n";
|
uniforms = "uniform uint in_ptr, out_ptr;\n";
|
||||||
|
|
||||||
@ -223,15 +224,22 @@ namespace gl
|
|||||||
" value |= stencil;\n"
|
" value |= stencil;\n"
|
||||||
" data[index + out_ptr] = bswap_u32(value);\n";
|
" data[index + out_ptr] = bswap_u32(value);\n";
|
||||||
|
|
||||||
|
if constexpr (!SwapBytes)
|
||||||
|
{
|
||||||
|
work_kernel = fmt::replace_all(work_kernel, "bswap_u32(value)", "value", 1);
|
||||||
|
}
|
||||||
|
|
||||||
cs_shuffle_base::build("");
|
cs_shuffle_base::build("");
|
||||||
}
|
}
|
||||||
|
|
||||||
void cs_shuffle_d32fx8_to_x8d24f::bind_resources()
|
template <bool SwapBytes>
|
||||||
|
void cs_shuffle_d32fx8_to_x8d24f<SwapBytes>::bind_resources()
|
||||||
{
|
{
|
||||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cs_shuffle_d32fx8_to_x8d24f::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
template <bool SwapBytes>
|
||||||
|
void cs_shuffle_d32fx8_to_x8d24f<SwapBytes>::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||||
{
|
{
|
||||||
u32 data_offset;
|
u32 data_offset;
|
||||||
if (src_offset > dst_offset)
|
if (src_offset > dst_offset)
|
||||||
@ -250,7 +258,11 @@ namespace gl
|
|||||||
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
|
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
cs_shuffle_x8d24f_to_d32fx8::cs_shuffle_x8d24f_to_d32fx8()
|
template cs_shuffle_d32fx8_to_x8d24f<true>;
|
||||||
|
template cs_shuffle_d32fx8_to_x8d24f<false>;
|
||||||
|
|
||||||
|
template <bool SwapBytes>
|
||||||
|
cs_shuffle_x8d24f_to_d32fx8<SwapBytes>::cs_shuffle_x8d24f_to_d32fx8()
|
||||||
{
|
{
|
||||||
uniforms = "uniform uint texel_count, in_ptr, out_ptr;\n";
|
uniforms = "uniform uint texel_count, in_ptr, out_ptr;\n";
|
||||||
|
|
||||||
@ -267,15 +279,22 @@ namespace gl
|
|||||||
" data[index * 2 + out_offset] = d24f_to_f32(depth);\n"
|
" data[index * 2 + out_offset] = d24f_to_f32(depth);\n"
|
||||||
" data[index * 2 + (out_offset + 1)] = stencil;\n";
|
" data[index * 2 + (out_offset + 1)] = stencil;\n";
|
||||||
|
|
||||||
|
if constexpr (!SwapBytes)
|
||||||
|
{
|
||||||
|
work_kernel = fmt::replace_all(work_kernel, "value = bswap_u32(value)", "// value = bswap_u32(value)", 1);
|
||||||
|
}
|
||||||
|
|
||||||
cs_shuffle_base::build("");
|
cs_shuffle_base::build("");
|
||||||
}
|
}
|
||||||
|
|
||||||
void cs_shuffle_x8d24f_to_d32fx8::bind_resources()
|
template <bool SwapBytes>
|
||||||
|
void cs_shuffle_x8d24f_to_d32fx8<SwapBytes>::bind_resources()
|
||||||
{
|
{
|
||||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cs_shuffle_x8d24f_to_d32fx8::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
template <bool SwapBytes>
|
||||||
|
void cs_shuffle_x8d24f_to_d32fx8<SwapBytes>::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||||
{
|
{
|
||||||
u32 data_offset;
|
u32 data_offset;
|
||||||
if (src_offset > dst_offset)
|
if (src_offset > dst_offset)
|
||||||
@ -294,6 +313,9 @@ namespace gl
|
|||||||
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
|
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template cs_shuffle_x8d24f_to_d32fx8<true>;
|
||||||
|
template cs_shuffle_x8d24f_to_d32fx8<false>;
|
||||||
|
|
||||||
cs_d24x8_to_ssbo::cs_d24x8_to_ssbo()
|
cs_d24x8_to_ssbo::cs_d24x8_to_ssbo()
|
||||||
{
|
{
|
||||||
initialize();
|
initialize();
|
||||||
@ -332,11 +354,11 @@ namespace gl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This method is callable in sensitive code and must restore the GL state on exit
|
// This method is callable in sensitive code and must restore the GL state on exit
|
||||||
gl::saved_sampler_state save_0(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
gl::saved_sampler_state save_sampler0(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
||||||
gl::saved_sampler_state save_1(GL_COMPUTE_BUFFER_SLOT(1), m_sampler);
|
gl::saved_sampler_state save_sampler1(GL_COMPUTE_BUFFER_SLOT(1), m_sampler);
|
||||||
|
|
||||||
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(0), depth_view);
|
gl::bind_image_view_safe save_image1(cmd, GL_COMPUTE_BUFFER_SLOT(0), depth_view);
|
||||||
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(1), stencil_view);
|
gl::bind_image_view_safe save_image2(cmd, GL_COMPUTE_BUFFER_SLOT(1), stencil_view);
|
||||||
|
|
||||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
|
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
|
||||||
|
|
||||||
@ -383,8 +405,8 @@ namespace gl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This method is callable in sensitive code and must restore the GL state on exit
|
// This method is callable in sensitive code and must restore the GL state on exit
|
||||||
gl::saved_sampler_state save(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
gl::saved_sampler_state save_sampler(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
||||||
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(0), data_view);
|
gl::bind_image_view_safe save_image(cmd, GL_COMPUTE_BUFFER_SLOT(0), data_view);
|
||||||
|
|
||||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
|
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
|
||||||
|
|
||||||
|
@ -78,6 +78,7 @@ namespace gl
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <bool SwapBytes>
|
||||||
struct cs_shuffle_d32fx8_to_x8d24f : cs_shuffle_base
|
struct cs_shuffle_d32fx8_to_x8d24f : cs_shuffle_base
|
||||||
{
|
{
|
||||||
u32 m_ssbo_length = 0;
|
u32 m_ssbo_length = 0;
|
||||||
@ -89,6 +90,7 @@ namespace gl
|
|||||||
void run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels);
|
void run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <bool SwapBytes>
|
||||||
struct cs_shuffle_x8d24f_to_d32fx8 : cs_shuffle_base
|
struct cs_shuffle_x8d24f_to_d32fx8 : cs_shuffle_base
|
||||||
{
|
{
|
||||||
u32 m_ssbo_length = 0;
|
u32 m_ssbo_length = 0;
|
||||||
|
@ -289,10 +289,10 @@ namespace gl
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (auto as_vi = dynamic_cast<const gl::viewable_image*>(src);
|
if (auto as_vi = dynamic_cast<const gl::viewable_image*>(src);
|
||||||
gl::get_driver_caps().vendor_AMD &&
|
|
||||||
src->get_target() == gl::texture::target::texture2D &&
|
src->get_target() == gl::texture::target::texture2D &&
|
||||||
as_vi)
|
as_vi)
|
||||||
{
|
{
|
||||||
|
// RGBA8 <-> D24X8 bitcasts are some very common conversions due to some PS3 coding hacks & workarounds.
|
||||||
switch (src->get_internal_format())
|
switch (src->get_internal_format())
|
||||||
{
|
{
|
||||||
case gl::texture::internal_format::depth24_stencil8:
|
case gl::texture::internal_format::depth24_stencil8:
|
||||||
@ -337,8 +337,16 @@ namespace gl
|
|||||||
mem_info->memory_required = (mem_info->image_size_in_texels * 6);
|
mem_info->memory_required = (mem_info->image_size_in_texels * 6);
|
||||||
ensure(!initialize_scratch_mem());
|
ensure(!initialize_scratch_mem());
|
||||||
|
|
||||||
get_compute_task<cs_fconvert_task<f32, f16, false, true>>()->run(cmd, dst, dst_offset,
|
if (pack_info.swap_bytes) [[ likely ]]
|
||||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
|
{
|
||||||
|
get_compute_task<cs_fconvert_task<f32, f16, false, true>>()->run(cmd, dst, dst_offset,
|
||||||
|
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
get_compute_task<cs_fconvert_task<f32, f16, false, false>>()->run(cmd, dst, dst_offset,
|
||||||
|
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
|
||||||
|
}
|
||||||
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes + dst_offset);
|
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes + dst_offset);
|
||||||
}
|
}
|
||||||
else if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
else if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||||
@ -347,8 +355,16 @@ namespace gl
|
|||||||
mem_info->memory_required = (mem_info->image_size_in_texels * 12);
|
mem_info->memory_required = (mem_info->image_size_in_texels * 12);
|
||||||
ensure(!initialize_scratch_mem());
|
ensure(!initialize_scratch_mem());
|
||||||
|
|
||||||
get_compute_task<cs_shuffle_d32fx8_to_x8d24f>()->run(cmd, dst, dst_offset,
|
if (pack_info.swap_bytes)
|
||||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
|
{
|
||||||
|
get_compute_task<cs_shuffle_d32fx8_to_x8d24f<true>>()->run(cmd, dst, dst_offset,
|
||||||
|
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
get_compute_task<cs_shuffle_d32fx8_to_x8d24f<false>>()->run(cmd, dst, dst_offset,
|
||||||
|
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
|
||||||
|
}
|
||||||
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes + dst_offset);
|
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes + dst_offset);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -501,7 +517,6 @@ namespace gl
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Stencil format on NV. Use driver upload path
|
// Stencil format on NV. Use driver upload path
|
||||||
|
|
||||||
if (unpack_info.type == GL_UNSIGNED_INT_24_8)
|
if (unpack_info.type == GL_UNSIGNED_INT_24_8)
|
||||||
{
|
{
|
||||||
if (auto job = get_trivial_transform_job(unpack_info))
|
if (auto job = get_trivial_transform_job(unpack_info))
|
||||||
@ -517,7 +532,15 @@ namespace gl
|
|||||||
{
|
{
|
||||||
mem_info->memory_required = (mem_info->image_size_in_texels * 8);
|
mem_info->memory_required = (mem_info->image_size_in_texels * 8);
|
||||||
initialize_scratch_mem();
|
initialize_scratch_mem();
|
||||||
get_compute_task<cs_shuffle_x8d24f_to_d32fx8>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
|
|
||||||
|
if (unpack_info.swap_bytes)
|
||||||
|
{
|
||||||
|
get_compute_task<cs_shuffle_x8d24f_to_d32fx8<true>>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
get_compute_task<cs_shuffle_x8d24f_to_d32fx8<false>>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -14,8 +14,8 @@ R"(
|
|||||||
#define FMT_GL_BGR5_A1 0x99F0
|
#define FMT_GL_BGR5_A1 0x99F0
|
||||||
#define FMT_GL_RGBA4 0x8056
|
#define FMT_GL_RGBA4 0x8056
|
||||||
|
|
||||||
#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8
|
#define bswap_u16(bits) (bits & 0xFFu) << 8u | (bits & 0xFF00u) >> 8u | (bits & 0xFF0000u) << 8u | (bits & 0xFF000000u) >> 8u
|
||||||
#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24
|
#define bswap_u32(bits) (bits & 0xFFu) << 24u | (bits & 0xFF00u) << 8u | (bits & 0xFF0000u) >> 8u | (bits & 0xFF000000u) >> 24u
|
||||||
|
|
||||||
layout(location=0) out vec4 outColor;
|
layout(location=0) out vec4 outColor;
|
||||||
|
|
||||||
@ -73,18 +73,10 @@ uint readUint32(const in uint address)
|
|||||||
|
|
||||||
uvec2 readUint24_8(const in uint address)
|
uvec2 readUint24_8(const in uint address)
|
||||||
{
|
{
|
||||||
const uint raw_value = data[address];
|
const uint raw_value = readUint32(address);
|
||||||
const uint stencil = bitfieldExtract(raw_value, 0, 8);
|
|
||||||
|
|
||||||
if (swap_bytes != 0)
|
|
||||||
{
|
|
||||||
const uint depth = min(bswap_u32(raw_value), 0xffffff);
|
|
||||||
return uvec2(depth, stencil);
|
|
||||||
}
|
|
||||||
|
|
||||||
return uvec2(
|
return uvec2(
|
||||||
bitfieldExtract(raw_value, 8, 24),
|
bitfieldExtract(raw_value, 8, 24),
|
||||||
stencil
|
bitfieldExtract(raw_value, 0, 8)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,8 @@ layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
|||||||
#define IMAGE_LOCATION(x) (x + %loc)
|
#define IMAGE_LOCATION(x) (x + %loc)
|
||||||
#define SSBO_LOCATION IMAGE_LOCATION(2)
|
#define SSBO_LOCATION IMAGE_LOCATION(2)
|
||||||
|
|
||||||
|
#define bswap_u32(bits) (bits & 0xFFu) << 24u | (bits & 0xFF00u) << 8u | (bits & 0xFF0000u) >> 8u | (bits & 0xFF000000u) >> 24u
|
||||||
|
|
||||||
layout(%set, binding=IMAGE_LOCATION(0)) uniform sampler2D depthData;
|
layout(%set, binding=IMAGE_LOCATION(0)) uniform sampler2D depthData;
|
||||||
layout(%set, binding=IMAGE_LOCATION(1)) uniform usampler2D stencilData;
|
layout(%set, binding=IMAGE_LOCATION(1)) uniform usampler2D stencilData;
|
||||||
|
|
||||||
@ -62,13 +64,15 @@ void main()
|
|||||||
float depth = texelFetch(depthData, coord, 0).x;
|
float depth = texelFetch(depthData, coord, 0).x;
|
||||||
uint stencil = texelFetch(stencilData, coord, 0).x;
|
uint stencil = texelFetch(stencilData, coord, 0).x;
|
||||||
uint depth_bytes = uint(depth * 0xffffff);
|
uint depth_bytes = uint(depth * 0xffffff);
|
||||||
|
uint value = (depth_bytes << 8) | stencil;
|
||||||
|
|
||||||
if (swap_bytes != 0)
|
if (swap_bytes != 0)
|
||||||
{
|
{
|
||||||
depth_bytes = (bitfieldExtract(depth_bytes, 0, 8) << 16u) | (bitfieldExtract(depth_bytes, 16, 8) << 0u) | depth_bytes & 0xFF00u;
|
// PS3-style byteswap (full word). PC byteswap is slightly different.
|
||||||
|
value = bswap_u32(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
data[input_coord_to_output_id(coord)] = (depth_bytes << 8) | stencil;
|
data[input_coord_to_output_id(coord)] = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)"
|
)"
|
||||||
|
Loading…
Reference in New Issue
Block a user