From adc59f98104cd7af37471ecca7024f0fbfb8122a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 21 Mar 2019 01:55:30 +0300 Subject: [PATCH] rsx: Fix blit transfers when texel sizes mismatch - Also refactors some bpp handling code - Simplify texture intersection test to use a normalized/uniform coordinate space - Fix broken bounds checking as well --- rpcs3/Emu/RSX/Common/surface_store.h | 78 +++++++++++++++++----------- rpcs3/Emu/RSX/Common/texture_cache.h | 68 ++++-------------------- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 2 +- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 4 +- rpcs3/Emu/RSX/GL/GLRenderTargets.h | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 2 +- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 6 +-- 7 files changed, 68 insertions(+), 94 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index c04c98374f..5eea30072f 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -38,6 +38,16 @@ namespace rsx u16 dst_y = 0; u16 width = 0; u16 height = 0; + + areai get_src_area() const + { + return coordi{ {src_x, src_y}, {width, height} }; + } + + areai get_dst_area() const + { + return coordi{ {dst_x, dst_y}, {width, height} }; + } }; struct surface_format_info @@ -89,6 +99,11 @@ namespace rsx virtual u16 get_native_pitch() const = 0; virtual bool is_depth_surface() const = 0; + u8 get_bpp() const + { + return u8(get_native_pitch() / get_surface_width()); + } + void save_aa_mode() { read_aa_mode = write_aa_mode; @@ -890,7 +905,7 @@ namespace rsx } template - std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch) + std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u8 required_bpp) { std::vector result; std::vector> dirty; @@ -930,16 +945,26 @@ namespace rsx surface_format_info surface_info{}; Traits::get_surface_info(surface, &surface_info); - if (this_address < texaddr) + const auto normalized_surface_width = (surface_info.surface_width * scale_x * surface_info.bpp) / required_bpp; + const auto normalized_surface_height = surface_info.surface_height * scale_y; + + if (LIKELY(this_address >= texaddr)) + { + const auto offset = this_address - texaddr; + info.src_x = 0; + info.src_y = 0; + info.dst_y = (offset / required_pitch); + info.dst_x = (offset % required_pitch) / required_bpp; + info.width = std::min(normalized_surface_width, required_width - info.dst_x); + info.height = std::min(normalized_surface_height, required_height - info.dst_y); + } + else { - const auto int_required_width = required_width / scale_x; - const auto int_required_height = required_height / scale_y; - const auto offset = texaddr - this_address; - info.src_y = (offset / required_pitch) / scale_y; - info.src_x = (offset % required_pitch) / surface_info.bpp / scale_x; + info.src_y = (offset / required_pitch); + info.src_x = (offset % required_pitch) / required_bpp; - if (UNLIKELY(info.src_x >= surface_info.surface_width || info.src_y >= surface_info.surface_height)) + if (UNLIKELY(info.src_x >= normalized_surface_width || info.src_y >= normalized_surface_height)) { // Region lies outside the actual texture area, but inside the 'tile' // In this case, a small region lies to the top-left corner, partially occupying the target @@ -948,31 +973,26 @@ namespace rsx info.dst_x = 0; info.dst_y = 0; - info.width = std::min(int_required_width, surface_info.surface_width - info.src_x); - info.height = std::min(int_required_height, surface_info.surface_height - info.src_y); - info.is_clipped = (info.width < int_required_width || info.height < int_required_height); + info.width = std::min(required_width, normalized_surface_width - info.src_x); + info.height = std::min(required_height, normalized_surface_height - info.src_y); } - else + + info.is_clipped = (info.width < required_width || info.height < required_height); + + if (UNLIKELY(surface_info.bpp != required_bpp)) { - const auto int_surface_width = surface_info.surface_width * scale_x; - const auto int_surface_height = surface_info.surface_height * scale_y; + // Width is calculated in the coordinate-space of the requester; normalize + info.src_x = (info.src_x * required_bpp) / surface_info.bpp; + info.width = (info.width * required_bpp) / surface_info.bpp; + } - const auto offset = this_address - texaddr; - info.dst_y = (offset / required_pitch); - info.dst_x = (offset % required_pitch) / surface_info.bpp; - - if (UNLIKELY(info.dst_x >= int_surface_width || info.dst_y >= int_surface_height)) - { - // False positive - continue; - } - - info.src_x = 0; - info.src_y = 0; - info.width = std::min(int_surface_width, required_width - info.dst_x); - info.height = std::min(int_surface_height, required_height - info.dst_y); - info.is_clipped = (info.width < required_width || info.height < required_height); + if (UNLIKELY(scale_x > 1)) + { + info.src_x /= scale_x; + info.dst_x /= scale_x; info.width /= scale_x; + info.src_y /= scale_y; + info.dst_y /= scale_y; info.height /= scale_y; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 338d5e8638..d6a01f98c9 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2124,7 +2124,8 @@ namespace rsx break; } - const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch); + const auto bpp = get_format_block_size_in_bytes(format); + const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp); if (!overlapping_fbos.empty() || !overlapping_locals.empty()) { @@ -2266,9 +2267,9 @@ namespace rsx src_address += (src.width - src_w) * src_bpp; } - auto rtt_lookup = [&m_rtts, &cmd](u32 address, u32 width, u32 height, u32 pitch, bool allow_clipped) -> typename surface_store_type::surface_overlap_info + auto rtt_lookup = [&m_rtts, &cmd](u32 address, u32 width, u32 height, u32 pitch, u32 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info { - const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch); + const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp); if (list.empty() || (list.back().is_clipped && !allow_clipped)) { return {}; @@ -2278,11 +2279,11 @@ namespace rsx }; // Check if src/dst are parts of render targets - auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false); + auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false); dst_is_render_target = dst_subres.surface != nullptr; // TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true); + auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, true); src_is_render_target = src_subres.surface != nullptr; // Always use GPU blit if src or dst is in the surface store @@ -2319,7 +2320,7 @@ namespace rsx src_subres.surface->read_barrier(cmd); const auto surf = src_subres.surface; - auto bpp = surf->get_native_pitch() / surf->get_surface_width(); + const auto bpp = surf->get_bpp(); if (bpp != src_bpp) { //Enable type scaling in src @@ -2327,14 +2328,6 @@ namespace rsx typeless_info.src_is_depth = src_subres.is_depth; typeless_info.src_scaling_hint = (f32)bpp / src_bpp; typeless_info.src_gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; - - src_w = (u16)(src_w / typeless_info.src_scaling_hint); - if (!src_subres.is_clipped) - src_subres.width = (u16)(src_subres.width / typeless_info.src_scaling_hint); - else - src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true); - - verify(HERE), src_subres.surface != nullptr; } } @@ -2343,7 +2336,7 @@ namespace rsx // Full barrier is required in case of partial transfers dst_subres.surface->read_barrier(cmd); - auto bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width(); + auto bpp = dst_subres.surface->get_bpp(); if (bpp != dst_bpp) { //Enable type scaling in dst @@ -2351,14 +2344,6 @@ namespace rsx typeless_info.dst_is_depth = dst_subres.is_depth; typeless_info.dst_scaling_hint = (f32)bpp / dst_bpp; typeless_info.dst_gcm_format = dst_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; - - dst_w = (u16)(dst_w / typeless_info.dst_scaling_hint); - if (!dst_subres.is_clipped) - dst_subres.width = (u16)(dst_subres.width / typeless_info.dst_scaling_hint); - else - dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false); - - verify(HERE), dst_subres.surface != nullptr; } } @@ -2379,7 +2364,7 @@ namespace rsx { // Optimizations table based on common width/height pairings. If we guess wrong, the upload resolver will fix it anyway // TODO: Add more entries based on empirical data - if (LIKELY(dst.width == 1280)) + if (LIKELY(dst_dimensions.width == 1280)) { dst_dimensions.height = std::max(dst.height, 720); } @@ -2450,18 +2435,7 @@ namespace rsx else { // Destination dimensions are relaxed (true) - dst_area.x1 = dst_subres.src_x; - dst_area.y1 = dst_subres.src_y; - dst_area.x2 += dst_subres.src_x; - dst_area.y2 += dst_subres.src_y; - - f32 scale_x = get_internal_scaling_x(dst_subres.surface); - f32 scale_y = get_internal_scaling_y(dst_subres.surface); - - dst_area.x1 = s32(scale_x * dst_area.x1); - dst_area.x2 = s32(scale_x * dst_area.x2); - dst_area.y1 = s32(scale_y * dst_area.y1); - dst_area.y2 = s32(scale_y * dst_area.y2); + dst_area = dst_subres.get_src_area(); dest_texture = dst_subres.surface->get_surface(); typeless_info.dst_context = texture_upload_context::framebuffer_storage; @@ -2585,27 +2559,7 @@ namespace rsx } else { - if (LIKELY(!dst_is_render_target)) - { - u16 src_subres_w = src_subres.width; - u16 src_subres_h = src_subres.height; - get_rsx_dimensions(src_subres_w, src_subres_h, src_subres.surface); - - const int dst_width = (int)(src_subres_w * scale_x * typeless_info.src_scaling_hint); - const int dst_height = (int)(src_subres_h * scale_y); - - dst_area.x2 = dst_area.x1 + dst_width; - dst_area.y2 = dst_area.y1 + dst_height; - } - - src_area.x2 = src_subres.width; - src_area.y2 = src_subres.height; - - src_area.x1 = src_subres.src_x; - src_area.y1 = src_subres.src_y; - src_area.x2 += src_subres.src_x; - src_area.y2 += src_subres.src_y; - + src_area = src_subres.get_src_area(); vram_texture = src_subres.surface->get_surface(); typeless_info.src_context = texture_upload_context::framebuffer_storage; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 6edf225490..b0304a0d17 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1645,7 +1645,7 @@ void GLGSRender::flip(int buffer) else { gl::command_context cmd = { gl_state }; - const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp()); if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) { diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index c779bb37fb..136a4ba09d 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -613,8 +613,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init return; } - auto src_bpp = src_texture->get_native_pitch() / src_texture->get_surface_width(); - auto dst_bpp = get_native_pitch() / get_surface_width(); + const auto src_bpp = src_texture->get_bpp(); + const auto dst_bpp = get_bpp(); rsx::typeless_xfer typeless_info{}; const bool dst_is_depth = is_depth(get_internal_format()); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index f57017fb06..a6238ab440 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -217,7 +217,7 @@ struct gl_render_target_traits info->native_pitch = surface->get_native_pitch(); info->surface_width = surface->get_surface_width(); info->surface_height = surface->get_surface_height(); - info->bpp = static_cast(info->native_pitch / info->surface_width); + info->bpp = surface->get_bpp(); } static void prepare_rtt_for_drawing(void *, gl::render_target *rtt) { rtt->reset_refs(); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 789e0e9b0d..09fc2d8dd8 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3288,7 +3288,7 @@ void VKGSRender::flip(int buffer) } else { - const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp()); if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) { // Confirmed to be the newest data source in that range diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 3826f4ec84..f61cfd8001 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -106,8 +106,8 @@ namespace vk return; } - auto src_bpp = src_texture->get_native_pitch() / src_texture->get_surface_width(); - auto dst_bpp = get_native_pitch() / get_surface_width(); + const auto src_bpp = src_texture->get_bpp(); + const auto dst_bpp = get_bpp(); rsx::typeless_xfer typeless_info{}; const auto region = rsx::get_transferable_region(this); @@ -259,7 +259,7 @@ namespace rsx info->native_pitch = surface->native_pitch; info->surface_width = surface->get_surface_width(); info->surface_height = surface->get_surface_height(); - info->bpp = static_cast(info->native_pitch / info->surface_width); + info->bpp = surface->get_bpp(); } static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface)