1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

rsx: Implement GPU copy ellision through coordinate transform

- TODO: Some corner cases still exist where format may not be a match after a cache merge.
This commit is contained in:
kd-11 2023-06-20 19:09:15 +03:00 committed by kd-11
parent 5dc4e74c98
commit b788e05396
4 changed files with 104 additions and 14 deletions

View File

@ -26,6 +26,7 @@ namespace rsx
using image_view_type = typename traits::image_view_type;
using image_storage_type = typename traits::image_storage_type;
using texture_format = typename traits::texture_format;
using viewable_image_type = typename traits::viewable_image_type;
using predictor_type = texture_cache_predictor<traits>;
using ranged_storage = rsx::ranged_storage<traits>;
@ -161,6 +162,11 @@ namespace rsx
{
static_cast<image_section_attributes_t&>(*this) = attr;
}
viewable_image_type as_viewable() const
{
return static_cast<viewable_image_type>(external_handle);
}
};
struct sampled_image_descriptor : public sampled_image_descriptor_base
@ -1904,6 +1910,17 @@ namespace rsx
auto new_attr = attr;
new_attr.gcm_format = gcm_format;
if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped)
{
// Clipped view
auto viewed_image = last->get_raw_texture();
sampled_image_descriptor result = { viewed_image->get_view(encoded_remap, remap), last->get_context(),
viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() };
helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height()));
return result;
}
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap };
}
@ -1912,15 +1929,27 @@ namespace rsx
auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool);
const bool is_simple_subresource_copy =
(result.external_subresource_desc.op == deferred_request_command::copy_image_static) ||
(result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic);
if (is_simple_subresource_copy && attr.edge_clamped)
{
helpers::convert_image_copy_to_clip_descriptor(
result,
position2i(result.external_subresource_desc.x, result.external_subresource_desc.y),
size2i(result.external_subresource_desc.width, result.external_subresource_desc.width),
size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()),
encoded_remap, remap, false /*FIXME*/);
return result;
}
if (options.skip_texture_merge)
{
switch (result.external_subresource_desc.op)
if (is_simple_subresource_copy)
{
case deferred_request_command::copy_image_static:
case deferred_request_command::copy_image_dynamic:
return result;
default:
break;
}
return {};
@ -2146,12 +2175,14 @@ namespace rsx
attributes.depth = 1;
attributes.height = 1;
attributes.slice_h = 1;
attributes.edge_clamped = (tex.wrap_s() == rsx::texture_wrap_mode::clamp_to_edge);
scale.height = scale.depth = 0.f;
subsurface_count = 1;
required_surface_height = 1;
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
attributes.depth = 1;
attributes.edge_clamped = (tex.wrap_s() == rsx::texture_wrap_mode::clamp_to_edge && tex.wrap_t() == rsx::texture_wrap_mode::clamp_to_edge);
scale.depth = 0.f;
subsurface_count = options.is_compressed_format? 1 : tex.get_exact_mipmap_count();
attributes.slice_h = required_surface_height = attributes.height;

View File

@ -58,6 +58,7 @@ namespace rsx
u16 slice_h;
u8 bpp;
bool swizzled;
bool edge_clamped;
};
struct blit_op_result
@ -501,6 +502,47 @@ namespace rsx
return false;
}
template <typename sampled_image_descriptor>
void calculate_sample_clip_parameters(
sampled_image_descriptor& desc,
const position2i& offset,
const size2i& desired_dimensions,
const size2i& actual_dimensions)
{
const f32 scale_x = f32(desired_dimensions.width) / actual_dimensions.width;
const f32 scale_y = f32(desired_dimensions.height) / actual_dimensions.height;
const f32 offset_x = f32(offset.x) / actual_dimensions.width;
const f32 offset_y = f32(offset.y) / actual_dimensions.height;
desc.texcoord_xform.scale[0] *= scale_x;
desc.texcoord_xform.scale[1] *= scale_y;
desc.texcoord_xform.bias[0] += offset_x;
desc.texcoord_xform.bias[1] += offset_y;
desc.texcoord_xform.clamp_min[0] = offset_x;
desc.texcoord_xform.clamp_min[1] = offset_y;
desc.texcoord_xform.clamp_max[0] = offset_x + scale_x;
desc.texcoord_xform.clamp_max[1] = offset_y + scale_y;
desc.texcoord_xform.clamp = true;
}
template <typename sampled_image_descriptor>
void convert_image_copy_to_clip_descriptor(
sampled_image_descriptor& desc,
const position2i& offset,
const size2i& desired_dimensions,
const size2i& actual_dimensions,
u32 encoded_remap,
const texture_channel_remap_t& decoded_remap,
bool cyclic_reference)
{
desc.image_handle = desc.external_subresource_desc.as_viewable()->get_view(encoded_remap, decoded_remap);
desc.is_cyclic_reference = cyclic_reference;
desc.samples = desc.external_subresource_desc.external_handle->samples();
desc.external_subresource_desc = {};
calculate_sample_clip_parameters(desc, offset, desired_dimensions, actual_dimensions);
}
template <typename sampled_image_descriptor, typename commandbuffer_type, typename render_target_type>
sampled_image_descriptor process_framebuffer_resource_fast(commandbuffer_type& cmd,
render_target_type texptr,
@ -557,22 +599,30 @@ namespace rsx
ensure(attr.height == 1);
}
bool requires_processing = false;
// A GPU operation must be performed on the data before sampling. Implies transfer_read access.
bool requires_processing = force_convert;
// A GPU clip operation may be performed by combining texture coordinate scaling with a clamp.
bool requires_clip = false;
rsx::surface_access access_type = rsx::surface_access::shader_read;
if (attr.width != surface_width || attr.height != surface_height || force_convert)
if (attr.width != surface_width || attr.height != surface_height)
{
// A GPU operation must be performed on the data before sampling. Implies transfer_read access
requires_processing = true;
// If we can get away with clip only, do it
if (attr.edge_clamped)
requires_clip = true;
else
requires_processing = true;
}
else if (surface_is_rop_target && g_cfg.video.strict_rendering_mode)
if (surface_is_rop_target && g_cfg.video.strict_rendering_mode)
{
// Framebuffer feedback avoidance. For MSAA, we do not need to make copies; just use the resolve target
if (texptr->samples() == 1)
{
requires_processing = true;
}
else
else if (!requires_processing)
{
// Select resolve target instead of MSAA image
access_type = rsx::surface_access::transfer_read;
@ -592,8 +642,15 @@ namespace rsx
texptr->memory_barrier(cmd, access_type);
auto viewed_surface = texptr->get_surface(access_type);
return { viewed_surface->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage,
sampled_image_descriptor result = { viewed_surface->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage,
texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target, viewed_surface->samples() };
if (requires_clip)
{
calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(surface_width, surface_height));
}
return result;
}
texptr->memory_barrier(cmd, rsx::surface_access::transfer_read);

View File

@ -26,6 +26,7 @@ namespace gl
using image_view_type = gl::texture_view*;
using image_storage_type = gl::texture;
using texture_format = gl::texture::format;
using viewable_image_type = gl::viewable_image*;
};
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section, gl::texture_cache_traits>
@ -388,7 +389,7 @@ namespace gl
return vram_texture->get_view(remap_encoding, remap);
}
gl::texture* get_raw_texture() const
gl::viewable_image* get_raw_texture() const
{
return managed_texture.get();
}

View File

@ -28,6 +28,7 @@ namespace vk
using image_view_type = vk::image_view*;
using image_storage_type = vk::image;
using texture_format = VkFormat;
using viewable_image_type = vk::viewable_image*;
};
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section, vk::texture_cache_traits>
@ -153,7 +154,7 @@ namespace vk
return vram_texture->get_view(0xAAE4, rsx::default_remap_vector);
}
vk::image* get_raw_texture()
vk::viewable_image* get_raw_texture()
{
return managed_texture.get();
}