mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 12:12:50 +01:00
gl: Fix D24X8 accelerated encode/decode
- PS3 D24X8 is swapped as a full word, unlike PC. - Add missing paths to handle custom swap behavior.
This commit is contained in:
parent
81fa3da101
commit
362a26a404
@ -207,7 +207,8 @@ namespace gl
|
||||
compute_task::run(cmd, num_invocations);
|
||||
}
|
||||
|
||||
cs_shuffle_d32fx8_to_x8d24f::cs_shuffle_d32fx8_to_x8d24f()
|
||||
template <bool SwapBytes>
|
||||
cs_shuffle_d32fx8_to_x8d24f<SwapBytes>::cs_shuffle_d32fx8_to_x8d24f()
|
||||
{
|
||||
uniforms = "uniform uint in_ptr, out_ptr;\n";
|
||||
|
||||
@ -223,15 +224,22 @@ namespace gl
|
||||
" value |= stencil;\n"
|
||||
" data[index + out_ptr] = bswap_u32(value);\n";
|
||||
|
||||
if constexpr (!SwapBytes)
|
||||
{
|
||||
work_kernel = fmt::replace_all(work_kernel, "bswap_u32(value)", "value", 1);
|
||||
}
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
|
||||
void cs_shuffle_d32fx8_to_x8d24f::bind_resources()
|
||||
template <bool SwapBytes>
|
||||
void cs_shuffle_d32fx8_to_x8d24f<SwapBytes>::bind_resources()
|
||||
{
|
||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
||||
}
|
||||
|
||||
void cs_shuffle_d32fx8_to_x8d24f::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||
template <bool SwapBytes>
|
||||
void cs_shuffle_d32fx8_to_x8d24f<SwapBytes>::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||
{
|
||||
u32 data_offset;
|
||||
if (src_offset > dst_offset)
|
||||
@ -250,7 +258,11 @@ namespace gl
|
||||
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
|
||||
}
|
||||
|
||||
cs_shuffle_x8d24f_to_d32fx8::cs_shuffle_x8d24f_to_d32fx8()
|
||||
template cs_shuffle_d32fx8_to_x8d24f<true>;
|
||||
template cs_shuffle_d32fx8_to_x8d24f<false>;
|
||||
|
||||
template <bool SwapBytes>
|
||||
cs_shuffle_x8d24f_to_d32fx8<SwapBytes>::cs_shuffle_x8d24f_to_d32fx8()
|
||||
{
|
||||
uniforms = "uniform uint texel_count, in_ptr, out_ptr;\n";
|
||||
|
||||
@ -267,15 +279,22 @@ namespace gl
|
||||
" data[index * 2 + out_offset] = d24f_to_f32(depth);\n"
|
||||
" data[index * 2 + (out_offset + 1)] = stencil;\n";
|
||||
|
||||
if constexpr (!SwapBytes)
|
||||
{
|
||||
work_kernel = fmt::replace_all(work_kernel, "value = bswap_u32(value)", "// value = bswap_u32(value)", 1);
|
||||
}
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
|
||||
void cs_shuffle_x8d24f_to_d32fx8::bind_resources()
|
||||
template <bool SwapBytes>
|
||||
void cs_shuffle_x8d24f_to_d32fx8<SwapBytes>::bind_resources()
|
||||
{
|
||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
||||
}
|
||||
|
||||
void cs_shuffle_x8d24f_to_d32fx8::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||
template <bool SwapBytes>
|
||||
void cs_shuffle_x8d24f_to_d32fx8<SwapBytes>::run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||
{
|
||||
u32 data_offset;
|
||||
if (src_offset > dst_offset)
|
||||
@ -294,6 +313,9 @@ namespace gl
|
||||
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
|
||||
}
|
||||
|
||||
template cs_shuffle_x8d24f_to_d32fx8<true>;
|
||||
template cs_shuffle_x8d24f_to_d32fx8<false>;
|
||||
|
||||
cs_d24x8_to_ssbo::cs_d24x8_to_ssbo()
|
||||
{
|
||||
initialize();
|
||||
@ -332,11 +354,11 @@ namespace gl
|
||||
}
|
||||
|
||||
// This method is callable in sensitive code and must restore the GL state on exit
|
||||
gl::saved_sampler_state save_0(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
||||
gl::saved_sampler_state save_1(GL_COMPUTE_BUFFER_SLOT(1), m_sampler);
|
||||
gl::saved_sampler_state save_sampler0(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
||||
gl::saved_sampler_state save_sampler1(GL_COMPUTE_BUFFER_SLOT(1), m_sampler);
|
||||
|
||||
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(0), depth_view);
|
||||
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(1), stencil_view);
|
||||
gl::bind_image_view_safe save_image1(cmd, GL_COMPUTE_BUFFER_SLOT(0), depth_view);
|
||||
gl::bind_image_view_safe save_image2(cmd, GL_COMPUTE_BUFFER_SLOT(1), stencil_view);
|
||||
|
||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
|
||||
|
||||
@ -383,8 +405,8 @@ namespace gl
|
||||
}
|
||||
|
||||
// This method is callable in sensitive code and must restore the GL state on exit
|
||||
gl::saved_sampler_state save(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
||||
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(0), data_view);
|
||||
gl::saved_sampler_state save_sampler(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
|
||||
gl::bind_image_view_safe save_image(cmd, GL_COMPUTE_BUFFER_SLOT(0), data_view);
|
||||
|
||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
|
||||
|
||||
|
@ -78,6 +78,7 @@ namespace gl
|
||||
}
|
||||
};
|
||||
|
||||
template <bool SwapBytes>
|
||||
struct cs_shuffle_d32fx8_to_x8d24f : cs_shuffle_base
|
||||
{
|
||||
u32 m_ssbo_length = 0;
|
||||
@ -89,6 +90,7 @@ namespace gl
|
||||
void run(gl::command_context& cmd, const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels);
|
||||
};
|
||||
|
||||
template <bool SwapBytes>
|
||||
struct cs_shuffle_x8d24f_to_d32fx8 : cs_shuffle_base
|
||||
{
|
||||
u32 m_ssbo_length = 0;
|
||||
|
@ -289,10 +289,10 @@ namespace gl
|
||||
}
|
||||
|
||||
if (auto as_vi = dynamic_cast<const gl::viewable_image*>(src);
|
||||
gl::get_driver_caps().vendor_AMD &&
|
||||
src->get_target() == gl::texture::target::texture2D &&
|
||||
as_vi)
|
||||
{
|
||||
// RGBA8 <-> D24X8 bitcasts are some very common conversions due to some PS3 coding hacks & workarounds.
|
||||
switch (src->get_internal_format())
|
||||
{
|
||||
case gl::texture::internal_format::depth24_stencil8:
|
||||
@ -337,8 +337,16 @@ namespace gl
|
||||
mem_info->memory_required = (mem_info->image_size_in_texels * 6);
|
||||
ensure(!initialize_scratch_mem());
|
||||
|
||||
get_compute_task<cs_fconvert_task<f32, f16, false, true>>()->run(cmd, dst, dst_offset,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
|
||||
if (pack_info.swap_bytes) [[ likely ]]
|
||||
{
|
||||
get_compute_task<cs_fconvert_task<f32, f16, false, true>>()->run(cmd, dst, dst_offset,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
|
||||
}
|
||||
else
|
||||
{
|
||||
get_compute_task<cs_fconvert_task<f32, f16, false, false>>()->run(cmd, dst, dst_offset,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
|
||||
}
|
||||
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes + dst_offset);
|
||||
}
|
||||
else if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||
@ -347,8 +355,16 @@ namespace gl
|
||||
mem_info->memory_required = (mem_info->image_size_in_texels * 12);
|
||||
ensure(!initialize_scratch_mem());
|
||||
|
||||
get_compute_task<cs_shuffle_d32fx8_to_x8d24f>()->run(cmd, dst, dst_offset,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
|
||||
if (pack_info.swap_bytes)
|
||||
{
|
||||
get_compute_task<cs_shuffle_d32fx8_to_x8d24f<true>>()->run(cmd, dst, dst_offset,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
|
||||
}
|
||||
else
|
||||
{
|
||||
get_compute_task<cs_shuffle_d32fx8_to_x8d24f<false>>()->run(cmd, dst, dst_offset,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
|
||||
}
|
||||
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes + dst_offset);
|
||||
}
|
||||
else
|
||||
@ -501,7 +517,6 @@ namespace gl
|
||||
else
|
||||
{
|
||||
// Stencil format on NV. Use driver upload path
|
||||
|
||||
if (unpack_info.type == GL_UNSIGNED_INT_24_8)
|
||||
{
|
||||
if (auto job = get_trivial_transform_job(unpack_info))
|
||||
@ -517,7 +532,15 @@ namespace gl
|
||||
{
|
||||
mem_info->memory_required = (mem_info->image_size_in_texels * 8);
|
||||
initialize_scratch_mem();
|
||||
get_compute_task<cs_shuffle_x8d24f_to_d32fx8>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
|
||||
|
||||
if (unpack_info.swap_bytes)
|
||||
{
|
||||
get_compute_task<cs_shuffle_x8d24f_to_d32fx8<true>>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
|
||||
}
|
||||
else
|
||||
{
|
||||
get_compute_task<cs_shuffle_x8d24f_to_d32fx8<false>>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -14,8 +14,8 @@ R"(
|
||||
#define FMT_GL_BGR5_A1 0x99F0
|
||||
#define FMT_GL_RGBA4 0x8056
|
||||
|
||||
#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8
|
||||
#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24
|
||||
#define bswap_u16(bits) (bits & 0xFFu) << 8u | (bits & 0xFF00u) >> 8u | (bits & 0xFF0000u) << 8u | (bits & 0xFF000000u) >> 8u
|
||||
#define bswap_u32(bits) (bits & 0xFFu) << 24u | (bits & 0xFF00u) << 8u | (bits & 0xFF0000u) >> 8u | (bits & 0xFF000000u) >> 24u
|
||||
|
||||
layout(location=0) out vec4 outColor;
|
||||
|
||||
@ -73,18 +73,10 @@ uint readUint32(const in uint address)
|
||||
|
||||
uvec2 readUint24_8(const in uint address)
|
||||
{
|
||||
const uint raw_value = data[address];
|
||||
const uint stencil = bitfieldExtract(raw_value, 0, 8);
|
||||
|
||||
if (swap_bytes != 0)
|
||||
{
|
||||
const uint depth = min(bswap_u32(raw_value), 0xffffff);
|
||||
return uvec2(depth, stencil);
|
||||
}
|
||||
|
||||
const uint raw_value = readUint32(address);
|
||||
return uvec2(
|
||||
bitfieldExtract(raw_value, 8, 24),
|
||||
stencil
|
||||
bitfieldExtract(raw_value, 0, 8)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@ layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
||||
#define IMAGE_LOCATION(x) (x + %loc)
|
||||
#define SSBO_LOCATION IMAGE_LOCATION(2)
|
||||
|
||||
#define bswap_u32(bits) (bits & 0xFFu) << 24u | (bits & 0xFF00u) << 8u | (bits & 0xFF0000u) >> 8u | (bits & 0xFF000000u) >> 24u
|
||||
|
||||
layout(%set, binding=IMAGE_LOCATION(0)) uniform sampler2D depthData;
|
||||
layout(%set, binding=IMAGE_LOCATION(1)) uniform usampler2D stencilData;
|
||||
|
||||
@ -62,13 +64,15 @@ void main()
|
||||
float depth = texelFetch(depthData, coord, 0).x;
|
||||
uint stencil = texelFetch(stencilData, coord, 0).x;
|
||||
uint depth_bytes = uint(depth * 0xffffff);
|
||||
uint value = (depth_bytes << 8) | stencil;
|
||||
|
||||
if (swap_bytes != 0)
|
||||
{
|
||||
depth_bytes = (bitfieldExtract(depth_bytes, 0, 8) << 16u) | (bitfieldExtract(depth_bytes, 16, 8) << 0u) | depth_bytes & 0xFF00u;
|
||||
// PS3-style byteswap (full word). PC byteswap is slightly different.
|
||||
value = bswap_u32(value);
|
||||
}
|
||||
|
||||
data[input_coord_to_output_id(coord)] = (depth_bytes << 8) | stencil;
|
||||
data[input_coord_to_output_id(coord)] = value;
|
||||
}
|
||||
}
|
||||
)"
|
||||
|
Loading…
Reference in New Issue
Block a user