1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2025-01-31 12:31:45 +01:00

rsx: Fix blit transfers when texel sizes mismatch

- Also refactors some bpp handling code
- Simplify texture intersection test to use a normalized/uniform coordinate space
- Fix broken bounds checking as well
This commit is contained in:
kd-11 2019-03-21 01:55:30 +03:00 committed by kd-11
parent b879b32271
commit adc59f9810
7 changed files with 68 additions and 94 deletions

View File

@ -38,6 +38,16 @@ namespace rsx
u16 dst_y = 0;
u16 width = 0;
u16 height = 0;
areai get_src_area() const
{
return coordi{ {src_x, src_y}, {width, height} };
}
areai get_dst_area() const
{
return coordi{ {dst_x, dst_y}, {width, height} };
}
};
struct surface_format_info
@ -89,6 +99,11 @@ namespace rsx
virtual u16 get_native_pitch() const = 0;
virtual bool is_depth_surface() const = 0;
u8 get_bpp() const
{
return u8(get_native_pitch() / get_surface_width());
}
void save_aa_mode()
{
read_aa_mode = write_aa_mode;
@ -890,7 +905,7 @@ namespace rsx
}
template <typename commandbuffer_type>
std::vector<surface_overlap_info> get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch)
std::vector<surface_overlap_info> get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u8 required_bpp)
{
std::vector<surface_overlap_info> result;
std::vector<std::pair<u32, bool>> dirty;
@ -930,16 +945,26 @@ namespace rsx
surface_format_info surface_info{};
Traits::get_surface_info(surface, &surface_info);
if (this_address < texaddr)
const auto normalized_surface_width = (surface_info.surface_width * scale_x * surface_info.bpp) / required_bpp;
const auto normalized_surface_height = surface_info.surface_height * scale_y;
if (LIKELY(this_address >= texaddr))
{
const auto offset = this_address - texaddr;
info.src_x = 0;
info.src_y = 0;
info.dst_y = (offset / required_pitch);
info.dst_x = (offset % required_pitch) / required_bpp;
info.width = std::min<u32>(normalized_surface_width, required_width - info.dst_x);
info.height = std::min<u32>(normalized_surface_height, required_height - info.dst_y);
}
else
{
const auto int_required_width = required_width / scale_x;
const auto int_required_height = required_height / scale_y;
const auto offset = texaddr - this_address;
info.src_y = (offset / required_pitch) / scale_y;
info.src_x = (offset % required_pitch) / surface_info.bpp / scale_x;
info.src_y = (offset / required_pitch);
info.src_x = (offset % required_pitch) / required_bpp;
if (UNLIKELY(info.src_x >= surface_info.surface_width || info.src_y >= surface_info.surface_height))
if (UNLIKELY(info.src_x >= normalized_surface_width || info.src_y >= normalized_surface_height))
{
// Region lies outside the actual texture area, but inside the 'tile'
// In this case, a small region lies to the top-left corner, partially occupying the target
@ -948,31 +973,26 @@ namespace rsx
info.dst_x = 0;
info.dst_y = 0;
info.width = std::min<u32>(int_required_width, surface_info.surface_width - info.src_x);
info.height = std::min<u32>(int_required_height, surface_info.surface_height - info.src_y);
info.is_clipped = (info.width < int_required_width || info.height < int_required_height);
info.width = std::min<u32>(required_width, normalized_surface_width - info.src_x);
info.height = std::min<u32>(required_height, normalized_surface_height - info.src_y);
}
else
info.is_clipped = (info.width < required_width || info.height < required_height);
if (UNLIKELY(surface_info.bpp != required_bpp))
{
const auto int_surface_width = surface_info.surface_width * scale_x;
const auto int_surface_height = surface_info.surface_height * scale_y;
// Width is calculated in the coordinate-space of the requester; normalize
info.src_x = (info.src_x * required_bpp) / surface_info.bpp;
info.width = (info.width * required_bpp) / surface_info.bpp;
}
const auto offset = this_address - texaddr;
info.dst_y = (offset / required_pitch);
info.dst_x = (offset % required_pitch) / surface_info.bpp;
if (UNLIKELY(info.dst_x >= int_surface_width || info.dst_y >= int_surface_height))
{
// False positive
continue;
}
info.src_x = 0;
info.src_y = 0;
info.width = std::min<u32>(int_surface_width, required_width - info.dst_x);
info.height = std::min<u32>(int_surface_height, required_height - info.dst_y);
info.is_clipped = (info.width < required_width || info.height < required_height);
if (UNLIKELY(scale_x > 1))
{
info.src_x /= scale_x;
info.dst_x /= scale_x;
info.width /= scale_x;
info.src_y /= scale_y;
info.dst_y /= scale_y;
info.height /= scale_y;
}

View File

@ -2124,7 +2124,8 @@ namespace rsx
break;
}
const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch);
const auto bpp = get_format_block_size_in_bytes(format);
const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp);
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
{
@ -2266,9 +2267,9 @@ namespace rsx
src_address += (src.width - src_w) * src_bpp;
}
auto rtt_lookup = [&m_rtts, &cmd](u32 address, u32 width, u32 height, u32 pitch, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
auto rtt_lookup = [&m_rtts, &cmd](u32 address, u32 width, u32 height, u32 pitch, u32 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
{
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch);
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp);
if (list.empty() || (list.back().is_clipped && !allow_clipped))
{
return {};
@ -2278,11 +2279,11 @@ namespace rsx
};
// Check if src/dst are parts of render targets
auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false);
auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false);
dst_is_render_target = dst_subres.surface != nullptr;
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true);
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, true);
src_is_render_target = src_subres.surface != nullptr;
// Always use GPU blit if src or dst is in the surface store
@ -2319,7 +2320,7 @@ namespace rsx
src_subres.surface->read_barrier(cmd);
const auto surf = src_subres.surface;
auto bpp = surf->get_native_pitch() / surf->get_surface_width();
const auto bpp = surf->get_bpp();
if (bpp != src_bpp)
{
//Enable type scaling in src
@ -2327,14 +2328,6 @@ namespace rsx
typeless_info.src_is_depth = src_subres.is_depth;
typeless_info.src_scaling_hint = (f32)bpp / src_bpp;
typeless_info.src_gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
src_w = (u16)(src_w / typeless_info.src_scaling_hint);
if (!src_subres.is_clipped)
src_subres.width = (u16)(src_subres.width / typeless_info.src_scaling_hint);
else
src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true);
verify(HERE), src_subres.surface != nullptr;
}
}
@ -2343,7 +2336,7 @@ namespace rsx
// Full barrier is required in case of partial transfers
dst_subres.surface->read_barrier(cmd);
auto bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width();
auto bpp = dst_subres.surface->get_bpp();
if (bpp != dst_bpp)
{
//Enable type scaling in dst
@ -2351,14 +2344,6 @@ namespace rsx
typeless_info.dst_is_depth = dst_subres.is_depth;
typeless_info.dst_scaling_hint = (f32)bpp / dst_bpp;
typeless_info.dst_gcm_format = dst_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
dst_w = (u16)(dst_w / typeless_info.dst_scaling_hint);
if (!dst_subres.is_clipped)
dst_subres.width = (u16)(dst_subres.width / typeless_info.dst_scaling_hint);
else
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false);
verify(HERE), dst_subres.surface != nullptr;
}
}
@ -2379,7 +2364,7 @@ namespace rsx
{
// Optimizations table based on common width/height pairings. If we guess wrong, the upload resolver will fix it anyway
// TODO: Add more entries based on empirical data
if (LIKELY(dst.width == 1280))
if (LIKELY(dst_dimensions.width == 1280))
{
dst_dimensions.height = std::max<s32>(dst.height, 720);
}
@ -2450,18 +2435,7 @@ namespace rsx
else
{
// Destination dimensions are relaxed (true)
dst_area.x1 = dst_subres.src_x;
dst_area.y1 = dst_subres.src_y;
dst_area.x2 += dst_subres.src_x;
dst_area.y2 += dst_subres.src_y;
f32 scale_x = get_internal_scaling_x(dst_subres.surface);
f32 scale_y = get_internal_scaling_y(dst_subres.surface);
dst_area.x1 = s32(scale_x * dst_area.x1);
dst_area.x2 = s32(scale_x * dst_area.x2);
dst_area.y1 = s32(scale_y * dst_area.y1);
dst_area.y2 = s32(scale_y * dst_area.y2);
dst_area = dst_subres.get_src_area();
dest_texture = dst_subres.surface->get_surface();
typeless_info.dst_context = texture_upload_context::framebuffer_storage;
@ -2585,27 +2559,7 @@ namespace rsx
}
else
{
if (LIKELY(!dst_is_render_target))
{
u16 src_subres_w = src_subres.width;
u16 src_subres_h = src_subres.height;
get_rsx_dimensions(src_subres_w, src_subres_h, src_subres.surface);
const int dst_width = (int)(src_subres_w * scale_x * typeless_info.src_scaling_hint);
const int dst_height = (int)(src_subres_h * scale_y);
dst_area.x2 = dst_area.x1 + dst_width;
dst_area.y2 = dst_area.y1 + dst_height;
}
src_area.x2 = src_subres.width;
src_area.y2 = src_subres.height;
src_area.x1 = src_subres.src_x;
src_area.y1 = src_subres.src_y;
src_area.x2 += src_subres.src_x;
src_area.y2 += src_subres.src_y;
src_area = src_subres.get_src_area();
vram_texture = src_subres.surface->get_surface();
typeless_info.src_context = texture_upload_context::framebuffer_storage;
}

View File

@ -1645,7 +1645,7 @@ void GLGSRender::flip(int buffer)
else
{
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp());
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{

View File

@ -613,8 +613,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
return;
}
auto src_bpp = src_texture->get_native_pitch() / src_texture->get_surface_width();
auto dst_bpp = get_native_pitch() / get_surface_width();
const auto src_bpp = src_texture->get_bpp();
const auto dst_bpp = get_bpp();
rsx::typeless_xfer typeless_info{};
const bool dst_is_depth = is_depth(get_internal_format());

View File

@ -217,7 +217,7 @@ struct gl_render_target_traits
info->native_pitch = surface->get_native_pitch();
info->surface_width = surface->get_surface_width();
info->surface_height = surface->get_surface_height();
info->bpp = static_cast<u8>(info->native_pitch / info->surface_width);
info->bpp = surface->get_bpp();
}
static void prepare_rtt_for_drawing(void *, gl::render_target *rtt) { rtt->reset_refs(); }

View File

@ -3288,7 +3288,7 @@ void VKGSRender::flip(int buffer)
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp());
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range

View File

@ -106,8 +106,8 @@ namespace vk
return;
}
auto src_bpp = src_texture->get_native_pitch() / src_texture->get_surface_width();
auto dst_bpp = get_native_pitch() / get_surface_width();
const auto src_bpp = src_texture->get_bpp();
const auto dst_bpp = get_bpp();
rsx::typeless_xfer typeless_info{};
const auto region = rsx::get_transferable_region(this);
@ -259,7 +259,7 @@ namespace rsx
info->native_pitch = surface->native_pitch;
info->surface_width = surface->get_surface_width();
info->surface_height = surface->get_surface_height();
info->bpp = static_cast<u8>(info->native_pitch / info->surface_width);
info->bpp = surface->get_bpp();
}
static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface)