From b788e05396f40fea7e5c34efccef5be9061af468 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 20 Jun 2023 19:09:15 +0300 Subject: [PATCH] rsx: Implement GPU copy ellision through coordinate transform - TODO: Some corner cases still exist where format may not be a match after a cache merge. --- rpcs3/Emu/RSX/Common/texture_cache.h | 41 +++++++++-- rpcs3/Emu/RSX/Common/texture_cache_helpers.h | 71 ++++++++++++++++++-- rpcs3/Emu/RSX/GL/GLTextureCache.h | 3 +- rpcs3/Emu/RSX/VK/VKTextureCache.h | 3 +- 4 files changed, 104 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index e822d4e5dc..f41e48ee3b 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -26,6 +26,7 @@ namespace rsx using image_view_type = typename traits::image_view_type; using image_storage_type = typename traits::image_storage_type; using texture_format = typename traits::texture_format; + using viewable_image_type = typename traits::viewable_image_type; using predictor_type = texture_cache_predictor; using ranged_storage = rsx::ranged_storage; @@ -161,6 +162,11 @@ namespace rsx { static_cast(*this) = attr; } + + viewable_image_type as_viewable() const + { + return static_cast(external_handle); + } }; struct sampled_image_descriptor : public sampled_image_descriptor_base @@ -1904,6 +1910,17 @@ namespace rsx auto new_attr = attr; new_attr.gcm_format = gcm_format; + if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped) + { + // Clipped view + auto viewed_image = last->get_raw_texture(); + sampled_image_descriptor result = { viewed_image->get_view(encoded_remap, remap), last->get_context(), + viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() }; + + helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height())); + return result; + } + return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {}, last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap }; } @@ -1912,15 +1929,27 @@ namespace rsx auto result = helpers::merge_cache_resources( cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool); + const bool is_simple_subresource_copy = + (result.external_subresource_desc.op == deferred_request_command::copy_image_static) || + (result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic); + + if (is_simple_subresource_copy && attr.edge_clamped) + { + helpers::convert_image_copy_to_clip_descriptor( + result, + position2i(result.external_subresource_desc.x, result.external_subresource_desc.y), + size2i(result.external_subresource_desc.width, result.external_subresource_desc.width), + size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()), + encoded_remap, remap, false /*FIXME*/); + + return result; + } + if (options.skip_texture_merge) { - switch (result.external_subresource_desc.op) + if (is_simple_subresource_copy) { - case deferred_request_command::copy_image_static: - case deferred_request_command::copy_image_dynamic: return result; - default: - break; } return {}; @@ -2146,12 +2175,14 @@ namespace rsx attributes.depth = 1; attributes.height = 1; attributes.slice_h = 1; + attributes.edge_clamped = (tex.wrap_s() == rsx::texture_wrap_mode::clamp_to_edge); scale.height = scale.depth = 0.f; subsurface_count = 1; required_surface_height = 1; break; case rsx::texture_dimension_extended::texture_dimension_2d: attributes.depth = 1; + attributes.edge_clamped = (tex.wrap_s() == rsx::texture_wrap_mode::clamp_to_edge && tex.wrap_t() == rsx::texture_wrap_mode::clamp_to_edge); scale.depth = 0.f; subsurface_count = options.is_compressed_format? 1 : tex.get_exact_mipmap_count(); attributes.slice_h = required_surface_height = attributes.height; diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index 0686a84e53..79038b9bd8 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -58,6 +58,7 @@ namespace rsx u16 slice_h; u8 bpp; bool swizzled; + bool edge_clamped; }; struct blit_op_result @@ -501,6 +502,47 @@ namespace rsx return false; } + template + void calculate_sample_clip_parameters( + sampled_image_descriptor& desc, + const position2i& offset, + const size2i& desired_dimensions, + const size2i& actual_dimensions) + { + const f32 scale_x = f32(desired_dimensions.width) / actual_dimensions.width; + const f32 scale_y = f32(desired_dimensions.height) / actual_dimensions.height; + const f32 offset_x = f32(offset.x) / actual_dimensions.width; + const f32 offset_y = f32(offset.y) / actual_dimensions.height; + + desc.texcoord_xform.scale[0] *= scale_x; + desc.texcoord_xform.scale[1] *= scale_y; + desc.texcoord_xform.bias[0] += offset_x; + desc.texcoord_xform.bias[1] += offset_y; + desc.texcoord_xform.clamp_min[0] = offset_x; + desc.texcoord_xform.clamp_min[1] = offset_y; + desc.texcoord_xform.clamp_max[0] = offset_x + scale_x; + desc.texcoord_xform.clamp_max[1] = offset_y + scale_y; + desc.texcoord_xform.clamp = true; + } + + template + void convert_image_copy_to_clip_descriptor( + sampled_image_descriptor& desc, + const position2i& offset, + const size2i& desired_dimensions, + const size2i& actual_dimensions, + u32 encoded_remap, + const texture_channel_remap_t& decoded_remap, + bool cyclic_reference) + { + desc.image_handle = desc.external_subresource_desc.as_viewable()->get_view(encoded_remap, decoded_remap); + desc.is_cyclic_reference = cyclic_reference; + desc.samples = desc.external_subresource_desc.external_handle->samples(); + desc.external_subresource_desc = {}; + + calculate_sample_clip_parameters(desc, offset, desired_dimensions, actual_dimensions); + } + template sampled_image_descriptor process_framebuffer_resource_fast(commandbuffer_type& cmd, render_target_type texptr, @@ -557,22 +599,30 @@ namespace rsx ensure(attr.height == 1); } - bool requires_processing = false; + // A GPU operation must be performed on the data before sampling. Implies transfer_read access. + bool requires_processing = force_convert; + // A GPU clip operation may be performed by combining texture coordinate scaling with a clamp. + bool requires_clip = false; + rsx::surface_access access_type = rsx::surface_access::shader_read; - if (attr.width != surface_width || attr.height != surface_height || force_convert) + if (attr.width != surface_width || attr.height != surface_height) { - // A GPU operation must be performed on the data before sampling. Implies transfer_read access - requires_processing = true; + // If we can get away with clip only, do it + if (attr.edge_clamped) + requires_clip = true; + else + requires_processing = true; } - else if (surface_is_rop_target && g_cfg.video.strict_rendering_mode) + + if (surface_is_rop_target && g_cfg.video.strict_rendering_mode) { // Framebuffer feedback avoidance. For MSAA, we do not need to make copies; just use the resolve target if (texptr->samples() == 1) { requires_processing = true; } - else + else if (!requires_processing) { // Select resolve target instead of MSAA image access_type = rsx::surface_access::transfer_read; @@ -592,8 +642,15 @@ namespace rsx texptr->memory_barrier(cmd, access_type); auto viewed_surface = texptr->get_surface(access_type); - return { viewed_surface->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage, + sampled_image_descriptor result = { viewed_surface->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage, texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target, viewed_surface->samples() }; + + if (requires_clip) + { + calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(surface_width, surface_height)); + } + + return result; } texptr->memory_barrier(cmd, rsx::surface_access::transfer_read); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 62ecfc2e1e..9d60684d2a 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -26,6 +26,7 @@ namespace gl using image_view_type = gl::texture_view*; using image_storage_type = gl::texture; using texture_format = gl::texture::format; + using viewable_image_type = gl::viewable_image*; }; class cached_texture_section : public rsx::cached_texture_section @@ -388,7 +389,7 @@ namespace gl return vram_texture->get_view(remap_encoding, remap); } - gl::texture* get_raw_texture() const + gl::viewable_image* get_raw_texture() const { return managed_texture.get(); } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 5d16b47c45..e2aace5832 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -28,6 +28,7 @@ namespace vk using image_view_type = vk::image_view*; using image_storage_type = vk::image; using texture_format = VkFormat; + using viewable_image_type = vk::viewable_image*; }; class cached_texture_section : public rsx::cached_texture_section @@ -153,7 +154,7 @@ namespace vk return vram_texture->get_view(0xAAE4, rsx::default_remap_vector); } - vk::image* get_raw_texture() + vk::viewable_image* get_raw_texture() { return managed_texture.get(); }