mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 18:53:28 +01:00
rsx: Implement GPU copy ellision through coordinate transform
- TODO: Some corner cases still exist where format may not be a match after a cache merge.
This commit is contained in:
parent
5dc4e74c98
commit
b788e05396
@ -26,6 +26,7 @@ namespace rsx
|
|||||||
using image_view_type = typename traits::image_view_type;
|
using image_view_type = typename traits::image_view_type;
|
||||||
using image_storage_type = typename traits::image_storage_type;
|
using image_storage_type = typename traits::image_storage_type;
|
||||||
using texture_format = typename traits::texture_format;
|
using texture_format = typename traits::texture_format;
|
||||||
|
using viewable_image_type = typename traits::viewable_image_type;
|
||||||
|
|
||||||
using predictor_type = texture_cache_predictor<traits>;
|
using predictor_type = texture_cache_predictor<traits>;
|
||||||
using ranged_storage = rsx::ranged_storage<traits>;
|
using ranged_storage = rsx::ranged_storage<traits>;
|
||||||
@ -161,6 +162,11 @@ namespace rsx
|
|||||||
{
|
{
|
||||||
static_cast<image_section_attributes_t&>(*this) = attr;
|
static_cast<image_section_attributes_t&>(*this) = attr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
viewable_image_type as_viewable() const
|
||||||
|
{
|
||||||
|
return static_cast<viewable_image_type>(external_handle);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct sampled_image_descriptor : public sampled_image_descriptor_base
|
struct sampled_image_descriptor : public sampled_image_descriptor_base
|
||||||
@ -1904,6 +1910,17 @@ namespace rsx
|
|||||||
auto new_attr = attr;
|
auto new_attr = attr;
|
||||||
new_attr.gcm_format = gcm_format;
|
new_attr.gcm_format = gcm_format;
|
||||||
|
|
||||||
|
if (last->get_gcm_format() == attr.gcm_format && attr.edge_clamped)
|
||||||
|
{
|
||||||
|
// Clipped view
|
||||||
|
auto viewed_image = last->get_raw_texture();
|
||||||
|
sampled_image_descriptor result = { viewed_image->get_view(encoded_remap, remap), last->get_context(),
|
||||||
|
viewed_image->format_class(), scale, extended_dimension, false, viewed_image->samples() };
|
||||||
|
|
||||||
|
helpers::calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(normalized_width, last->get_height()));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
|
return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {},
|
||||||
last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap };
|
last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap };
|
||||||
}
|
}
|
||||||
@ -1912,15 +1929,27 @@ namespace rsx
|
|||||||
auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
|
auto result = helpers::merge_cache_resources<sampled_image_descriptor>(
|
||||||
cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool);
|
cmd, overlapping_fbos, overlapping_locals, attr, scale, extended_dimension, encoded_remap, remap, _pool);
|
||||||
|
|
||||||
|
const bool is_simple_subresource_copy =
|
||||||
|
(result.external_subresource_desc.op == deferred_request_command::copy_image_static) ||
|
||||||
|
(result.external_subresource_desc.op == deferred_request_command::copy_image_dynamic);
|
||||||
|
|
||||||
|
if (is_simple_subresource_copy && attr.edge_clamped)
|
||||||
|
{
|
||||||
|
helpers::convert_image_copy_to_clip_descriptor(
|
||||||
|
result,
|
||||||
|
position2i(result.external_subresource_desc.x, result.external_subresource_desc.y),
|
||||||
|
size2i(result.external_subresource_desc.width, result.external_subresource_desc.width),
|
||||||
|
size2i(result.external_subresource_desc.external_handle->width(), result.external_subresource_desc.external_handle->height()),
|
||||||
|
encoded_remap, remap, false /*FIXME*/);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
if (options.skip_texture_merge)
|
if (options.skip_texture_merge)
|
||||||
{
|
{
|
||||||
switch (result.external_subresource_desc.op)
|
if (is_simple_subresource_copy)
|
||||||
{
|
{
|
||||||
case deferred_request_command::copy_image_static:
|
|
||||||
case deferred_request_command::copy_image_dynamic:
|
|
||||||
return result;
|
return result;
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
@ -2146,12 +2175,14 @@ namespace rsx
|
|||||||
attributes.depth = 1;
|
attributes.depth = 1;
|
||||||
attributes.height = 1;
|
attributes.height = 1;
|
||||||
attributes.slice_h = 1;
|
attributes.slice_h = 1;
|
||||||
|
attributes.edge_clamped = (tex.wrap_s() == rsx::texture_wrap_mode::clamp_to_edge);
|
||||||
scale.height = scale.depth = 0.f;
|
scale.height = scale.depth = 0.f;
|
||||||
subsurface_count = 1;
|
subsurface_count = 1;
|
||||||
required_surface_height = 1;
|
required_surface_height = 1;
|
||||||
break;
|
break;
|
||||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||||
attributes.depth = 1;
|
attributes.depth = 1;
|
||||||
|
attributes.edge_clamped = (tex.wrap_s() == rsx::texture_wrap_mode::clamp_to_edge && tex.wrap_t() == rsx::texture_wrap_mode::clamp_to_edge);
|
||||||
scale.depth = 0.f;
|
scale.depth = 0.f;
|
||||||
subsurface_count = options.is_compressed_format? 1 : tex.get_exact_mipmap_count();
|
subsurface_count = options.is_compressed_format? 1 : tex.get_exact_mipmap_count();
|
||||||
attributes.slice_h = required_surface_height = attributes.height;
|
attributes.slice_h = required_surface_height = attributes.height;
|
||||||
|
@ -58,6 +58,7 @@ namespace rsx
|
|||||||
u16 slice_h;
|
u16 slice_h;
|
||||||
u8 bpp;
|
u8 bpp;
|
||||||
bool swizzled;
|
bool swizzled;
|
||||||
|
bool edge_clamped;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct blit_op_result
|
struct blit_op_result
|
||||||
@ -501,6 +502,47 @@ namespace rsx
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename sampled_image_descriptor>
|
||||||
|
void calculate_sample_clip_parameters(
|
||||||
|
sampled_image_descriptor& desc,
|
||||||
|
const position2i& offset,
|
||||||
|
const size2i& desired_dimensions,
|
||||||
|
const size2i& actual_dimensions)
|
||||||
|
{
|
||||||
|
const f32 scale_x = f32(desired_dimensions.width) / actual_dimensions.width;
|
||||||
|
const f32 scale_y = f32(desired_dimensions.height) / actual_dimensions.height;
|
||||||
|
const f32 offset_x = f32(offset.x) / actual_dimensions.width;
|
||||||
|
const f32 offset_y = f32(offset.y) / actual_dimensions.height;
|
||||||
|
|
||||||
|
desc.texcoord_xform.scale[0] *= scale_x;
|
||||||
|
desc.texcoord_xform.scale[1] *= scale_y;
|
||||||
|
desc.texcoord_xform.bias[0] += offset_x;
|
||||||
|
desc.texcoord_xform.bias[1] += offset_y;
|
||||||
|
desc.texcoord_xform.clamp_min[0] = offset_x;
|
||||||
|
desc.texcoord_xform.clamp_min[1] = offset_y;
|
||||||
|
desc.texcoord_xform.clamp_max[0] = offset_x + scale_x;
|
||||||
|
desc.texcoord_xform.clamp_max[1] = offset_y + scale_y;
|
||||||
|
desc.texcoord_xform.clamp = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename sampled_image_descriptor>
|
||||||
|
void convert_image_copy_to_clip_descriptor(
|
||||||
|
sampled_image_descriptor& desc,
|
||||||
|
const position2i& offset,
|
||||||
|
const size2i& desired_dimensions,
|
||||||
|
const size2i& actual_dimensions,
|
||||||
|
u32 encoded_remap,
|
||||||
|
const texture_channel_remap_t& decoded_remap,
|
||||||
|
bool cyclic_reference)
|
||||||
|
{
|
||||||
|
desc.image_handle = desc.external_subresource_desc.as_viewable()->get_view(encoded_remap, decoded_remap);
|
||||||
|
desc.is_cyclic_reference = cyclic_reference;
|
||||||
|
desc.samples = desc.external_subresource_desc.external_handle->samples();
|
||||||
|
desc.external_subresource_desc = {};
|
||||||
|
|
||||||
|
calculate_sample_clip_parameters(desc, offset, desired_dimensions, actual_dimensions);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename sampled_image_descriptor, typename commandbuffer_type, typename render_target_type>
|
template <typename sampled_image_descriptor, typename commandbuffer_type, typename render_target_type>
|
||||||
sampled_image_descriptor process_framebuffer_resource_fast(commandbuffer_type& cmd,
|
sampled_image_descriptor process_framebuffer_resource_fast(commandbuffer_type& cmd,
|
||||||
render_target_type texptr,
|
render_target_type texptr,
|
||||||
@ -557,22 +599,30 @@ namespace rsx
|
|||||||
ensure(attr.height == 1);
|
ensure(attr.height == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool requires_processing = false;
|
// A GPU operation must be performed on the data before sampling. Implies transfer_read access.
|
||||||
|
bool requires_processing = force_convert;
|
||||||
|
// A GPU clip operation may be performed by combining texture coordinate scaling with a clamp.
|
||||||
|
bool requires_clip = false;
|
||||||
|
|
||||||
rsx::surface_access access_type = rsx::surface_access::shader_read;
|
rsx::surface_access access_type = rsx::surface_access::shader_read;
|
||||||
|
|
||||||
if (attr.width != surface_width || attr.height != surface_height || force_convert)
|
if (attr.width != surface_width || attr.height != surface_height)
|
||||||
{
|
{
|
||||||
// A GPU operation must be performed on the data before sampling. Implies transfer_read access
|
// If we can get away with clip only, do it
|
||||||
requires_processing = true;
|
if (attr.edge_clamped)
|
||||||
|
requires_clip = true;
|
||||||
|
else
|
||||||
|
requires_processing = true;
|
||||||
}
|
}
|
||||||
else if (surface_is_rop_target && g_cfg.video.strict_rendering_mode)
|
|
||||||
|
if (surface_is_rop_target && g_cfg.video.strict_rendering_mode)
|
||||||
{
|
{
|
||||||
// Framebuffer feedback avoidance. For MSAA, we do not need to make copies; just use the resolve target
|
// Framebuffer feedback avoidance. For MSAA, we do not need to make copies; just use the resolve target
|
||||||
if (texptr->samples() == 1)
|
if (texptr->samples() == 1)
|
||||||
{
|
{
|
||||||
requires_processing = true;
|
requires_processing = true;
|
||||||
}
|
}
|
||||||
else
|
else if (!requires_processing)
|
||||||
{
|
{
|
||||||
// Select resolve target instead of MSAA image
|
// Select resolve target instead of MSAA image
|
||||||
access_type = rsx::surface_access::transfer_read;
|
access_type = rsx::surface_access::transfer_read;
|
||||||
@ -592,8 +642,15 @@ namespace rsx
|
|||||||
|
|
||||||
texptr->memory_barrier(cmd, access_type);
|
texptr->memory_barrier(cmd, access_type);
|
||||||
auto viewed_surface = texptr->get_surface(access_type);
|
auto viewed_surface = texptr->get_surface(access_type);
|
||||||
return { viewed_surface->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage,
|
sampled_image_descriptor result = { viewed_surface->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage,
|
||||||
texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target, viewed_surface->samples() };
|
texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target, viewed_surface->samples() };
|
||||||
|
|
||||||
|
if (requires_clip)
|
||||||
|
{
|
||||||
|
calculate_sample_clip_parameters(result, position2i(0, 0), size2i(attr.width, attr.height), size2i(surface_width, surface_height));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
texptr->memory_barrier(cmd, rsx::surface_access::transfer_read);
|
texptr->memory_barrier(cmd, rsx::surface_access::transfer_read);
|
||||||
|
@ -26,6 +26,7 @@ namespace gl
|
|||||||
using image_view_type = gl::texture_view*;
|
using image_view_type = gl::texture_view*;
|
||||||
using image_storage_type = gl::texture;
|
using image_storage_type = gl::texture;
|
||||||
using texture_format = gl::texture::format;
|
using texture_format = gl::texture::format;
|
||||||
|
using viewable_image_type = gl::viewable_image*;
|
||||||
};
|
};
|
||||||
|
|
||||||
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section, gl::texture_cache_traits>
|
class cached_texture_section : public rsx::cached_texture_section<gl::cached_texture_section, gl::texture_cache_traits>
|
||||||
@ -388,7 +389,7 @@ namespace gl
|
|||||||
return vram_texture->get_view(remap_encoding, remap);
|
return vram_texture->get_view(remap_encoding, remap);
|
||||||
}
|
}
|
||||||
|
|
||||||
gl::texture* get_raw_texture() const
|
gl::viewable_image* get_raw_texture() const
|
||||||
{
|
{
|
||||||
return managed_texture.get();
|
return managed_texture.get();
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ namespace vk
|
|||||||
using image_view_type = vk::image_view*;
|
using image_view_type = vk::image_view*;
|
||||||
using image_storage_type = vk::image;
|
using image_storage_type = vk::image;
|
||||||
using texture_format = VkFormat;
|
using texture_format = VkFormat;
|
||||||
|
using viewable_image_type = vk::viewable_image*;
|
||||||
};
|
};
|
||||||
|
|
||||||
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section, vk::texture_cache_traits>
|
class cached_texture_section : public rsx::cached_texture_section<vk::cached_texture_section, vk::texture_cache_traits>
|
||||||
@ -153,7 +154,7 @@ namespace vk
|
|||||||
return vram_texture->get_view(0xAAE4, rsx::default_remap_vector);
|
return vram_texture->get_view(0xAAE4, rsx::default_remap_vector);
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::image* get_raw_texture()
|
vk::viewable_image* get_raw_texture()
|
||||||
{
|
{
|
||||||
return managed_texture.get();
|
return managed_texture.get();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user