1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 12:12:50 +01:00

rsx/texture_cache: Add support for reusing dirty images if possible

- Avoids a silly situation where a texture is discarded and an identical copy created immediately afterward.
  Unfortunately allocating memory blocks is really slow so avoid it as much as possible.
This commit is contained in:
kd-11 2021-01-26 23:46:32 +03:00 committed by kd-11
parent 0c10f47e85
commit bf66c36ba4
9 changed files with 158 additions and 70 deletions

View File

@ -1020,7 +1020,7 @@ namespace rsx
return nullptr;
}
section_storage_type* find_cached_texture(const address_range &range, u32 gcm_format, bool create_if_not_found, bool confirm_dimensions, u16 width = 0, u16 height = 0, u16 depth = 0, u16 mipmaps = 0)
section_storage_type* find_cached_texture(const address_range &range, const image_section_attributes_t& attr, bool create_if_not_found, bool confirm_dimensions, bool allow_dirty)
{
auto &block = m_storage.block_for(range);
@ -1036,9 +1036,9 @@ namespace rsx
{
if (tex.matches(range))
{
if (!tex.is_dirty())
if (allow_dirty || !tex.is_dirty())
{
if (!confirm_dimensions || tex.matches(gcm_format, width, height, depth, mipmaps))
if (!confirm_dimensions || tex.matches(attr.gcm_format, attr.width, attr.height, attr.depth, attr.mipmaps))
{
#ifndef TEXTURE_CACHE_DEBUG
return &tex;
@ -1073,7 +1073,7 @@ namespace rsx
{
auto &tex = *dimensions_mismatch;
rsx_log.warning("Cached object for address 0x%X was found, but it does not match stored parameters (width=%d vs %d; height=%d vs %d; depth=%d vs %d; mipmaps=%d vs %d)",
range.start, width, tex.get_width(), height, tex.get_height(), depth, tex.get_depth(), mipmaps, tex.get_mipmaps());
range.start, attr.width, tex.get_width(), attr.height, tex.get_height(), attr.depth, tex.get_depth(), attr.mipmaps, tex.get_mipmaps());
}
if (!create_if_not_found)
@ -1123,14 +1123,15 @@ namespace rsx
}
template <typename ...FlushArgs, typename ...Args>
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, bool is_active_surface, u32 width, u32 height, u32 pitch, Args&&... extras)
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, bool is_active_surface, u16 width, u16 height, u16 pitch, Args&&... extras)
{
AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled
std::lock_guard lock(m_cache_mutex);
// Find a cached section to use
section_storage_type& region = *find_cached_texture(rsx_range, RSX_GCM_FORMAT_IGNORED, true, true, width, height);
image_section_attributes_t search_desc = { .gcm_format = RSX_GCM_FORMAT_IGNORED, .width = width, .height = height };
section_storage_type& region = *find_cached_texture(rsx_range, search_desc, true, true, false);
// Prepare and initialize fbo region
if (region.exists() && region.get_context() != texture_upload_context::framebuffer_storage)
@ -1207,7 +1208,7 @@ namespace rsx
{
if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer)
{
auto* region_ptr = find_cached_texture(rsx_range, RSX_GCM_FORMAT_IGNORED, false, false);
auto* region_ptr = find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, false, false, false);
if (region_ptr && region_ptr->is_locked() && region_ptr->get_context() == texture_upload_context::framebuffer_storage)
{
ensure(region_ptr->get_protection() == utils::protection::no);
@ -1220,7 +1221,7 @@ namespace rsx
{
std::lock_guard lock(m_cache_mutex);
auto* region_ptr = find_cached_texture(memory_range, RSX_GCM_FORMAT_IGNORED, false, false);
auto* region_ptr = find_cached_texture(memory_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, false, false, false);
if (region_ptr == nullptr)
{
AUDIT(m_flush_always_cache.find(memory_range) == m_flush_always_cache.end());
@ -2815,7 +2816,7 @@ namespace rsx
// Reset this object's synchronization status if it is locked
lock.upgrade();
if (const auto found = find_cached_texture(dst_subres.surface->get_memory_range(), RSX_GCM_FORMAT_IGNORED, false, false))
if (const auto found = find_cached_texture(dst_subres.surface->get_memory_range(), { .gcm_format = RSX_GCM_FORMAT_IGNORED }, false, false, false))
{
if (found->is_locked())
{

View File

@ -53,6 +53,7 @@ namespace rsx
u16 width;
u16 height;
u16 depth;
u16 mipmaps;
u16 pitch;
u16 slice_h;
u8 bpp;

View File

@ -618,25 +618,9 @@ namespace gl
height = utils::align(height, 4);
}
GLenum target;
GLenum internal_format = get_sized_internal_format(gcm_format);
auto format_class = rsx::classify_format(gcm_format);
switch (type)
{
case rsx::texture_dimension_extended::texture_dimension_1d:
target = GL_TEXTURE_1D;
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
target = GL_TEXTURE_2D;
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
target = GL_TEXTURE_3D;
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
target = GL_TEXTURE_CUBE_MAP;
break;
}
const GLenum target = get_target(type);
const GLenum internal_format = get_sized_internal_format(gcm_format);
const auto format_class = rsx::classify_format(gcm_format);
return new gl::viewable_image(target, width, height, depth, mipmaps, internal_format, format_class);
}

View File

@ -15,6 +15,7 @@ namespace gl
class blitter;
extern GLenum get_sized_internal_format(u32);
extern GLenum get_target(rsx::texture_dimension_extended type);
extern void copy_typeless(texture*, const texture*, const coord3u&, const coord3u&);
extern blitter *g_hw_blitter;
@ -668,28 +669,68 @@ namespace gl
copy_transfer_regions_impl(cmd, dst->image(), region);
}
cached_texture_section* create_new_texture(gl::command_context&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,
cached_texture_section* create_new_texture(gl::command_context &cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,
u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, bool swizzled, rsx::texture_create_flags flags) override
{
auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type);
const rsx::image_section_attributes_t search_desc = { .gcm_format = gcm_format, .width = width, .height = height, .depth = depth, .mipmaps = mipmaps };
const bool allow_dirty = (context != rsx::texture_upload_context::framebuffer_storage);
auto& cached = *find_cached_texture(rsx_range, search_desc, true, true, allow_dirty);
ensure(!cached.is_locked());
gl::viewable_image* image = nullptr;
if (cached.exists())
{
// Try and reuse this image data. It is very likely to match our needs
image = dynamic_cast<gl::viewable_image*>(cached.get_raw_texture());
ensure(image);
ensure(cached.is_managed());
if (cached.get_image_type() != type)
{
// Type mismatch, discard
cached.destroy();
image = nullptr;
}
else
{
cached.set_dimensions(width, height, depth, pitch);
cached.set_format(texture::format::rgba, texture::type::ubyte, true);
// Clear the image before use if it is not going to be uploaded wholly from CPU
if (context != rsx::texture_upload_context::shader_read)
{
if (image->format_class() == RSX_FORMAT_CLASS_COLOR)
{
g_hw_blitter->fast_clear_image(cmd, image, color4f{});
}
else
{
g_hw_blitter->fast_clear_image(cmd, image, 1.f, 0);
}
}
}
}
if (!image)
{
ensure(!cached.exists());
image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type);
// Prepare section
cached.reset(rsx_range);
cached.set_image_type(type);
cached.set_gcm_format(gcm_format);
cached.create(width, height, depth, mipmaps, image, pitch, true);
}
cached.set_view_flags(flags);
cached.set_context(context);
cached.set_swizzled(swizzled);
cached.set_dirty(false);
const auto swizzle = get_component_mapping(gcm_format, flags);
image->set_native_component_layout(swizzle);
auto& cached = *find_cached_texture(rsx_range, gcm_format, true, true, width, height, depth, mipmaps);
ensure(!cached.is_locked());
// Prepare section
cached.reset(rsx_range);
cached.set_view_flags(flags);
cached.set_context(context);
cached.set_image_type(type);
cached.set_gcm_format(gcm_format);
cached.set_swizzled(swizzled);
cached.create(width, height, depth, mipmaps, image, pitch, true);
cached.set_dirty(false);
if (context != rsx::texture_upload_context::blit_engine_dst)
{
AUDIT(cached.get_memory_read_flags() != rsx::memory_read_flags::flush_always);
@ -737,7 +778,7 @@ namespace gl
cached_texture_section* create_nul_section(gl::command_context& /*cmd*/, const utils::address_range& rsx_range, bool /*memory_load*/) override
{
auto& cached = *find_cached_texture(rsx_range, RSX_GCM_FORMAT_IGNORED, true, false);
auto& cached = *find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, true, false, false);
ensure(!cached.is_locked());
// Prepare section

View File

@ -313,6 +313,12 @@ namespace vk
pack_unpack_swap_bytes = swap_bytes;
}
void set_rsx_pitch(u16 pitch)
{
ensure(!is_locked());
rsx_pitch = pitch;
}
bool is_synchronized() const
{
return synchronized;
@ -761,11 +767,9 @@ namespace vk
cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,
u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, bool swizzled, rsx::texture_create_flags flags) override
{
const u16 section_depth = depth;
const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap;
const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format);
const VkImageAspectFlags aspect_flags = get_aspect_flags(vk_format);
const auto section_depth = depth;
// Define desirable attributes based on type
VkImageType image_type;
VkImageViewType image_view_type;
VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
@ -801,30 +805,77 @@ namespace vk
fmt::throw_exception("Unreachable");
}
auto *image = new vk::viewable_image(*m_device, m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type,
vk_format,
width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0,
rsx::classify_format(gcm_format));
image->native_component_map = apply_component_mapping_flags(gcm_format, flags, rsx::default_remap_vector);
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_flags, 0, mipmaps, 0, layer });
cached_texture_section& region = *find_cached_texture(rsx_range, gcm_format, true, true, width, height, section_depth);
// Check what actually exists at that address
const rsx::image_section_attributes_t search_desc = { .gcm_format = gcm_format, .width = width, .height = height, .depth = section_depth, .mipmaps = mipmaps };
const bool allow_dirty = (context != rsx::texture_upload_context::framebuffer_storage);
cached_texture_section& region = *find_cached_texture(rsx_range, search_desc, true, true, allow_dirty);
ensure(!region.is_locked());
// New section, we must prepare it
region.reset(rsx_range);
region.set_context(context);
region.set_gcm_format(gcm_format);
region.set_image_type(type);
region.set_swizzled(swizzled);
vk::viewable_image* image = nullptr;
if (region.exists())
{
image = dynamic_cast<vk::viewable_image*>(region.get_raw_texture());
ensure(image);
ensure(region.is_managed());
region.create(width, height, section_depth, mipmaps, image, pitch, true, gcm_format);
if (region.get_image_type() != type || image->depth() != depth) // TODO
{
// Incompatible view/type
region.destroy();
image = nullptr;
}
else
{
// Reuse
region.set_rsx_pitch(pitch);
if (context != rsx::texture_upload_context::shader_read)
{
// Wipe memory
image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkImageSubresourceRange range{ image->aspect(), 0, image->mipmaps(), 0, image->layers() };
if (image->aspect() & VK_IMAGE_ASPECT_COLOR_BIT)
{
VkClearColorValue color = { {0.f, 0.f, 0.f, 1.f} };
vkCmdClearColorImage(cmd, image->value, image->current_layout, &color, 1, &range);
}
else
{
VkClearDepthStencilValue clear{ 1.f, 255 };
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &range);
}
}
}
}
if (!image)
{
const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap;
const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format);
image = new vk::viewable_image(*m_device, m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type,
vk_format,
width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0,
rsx::classify_format(gcm_format));
// New section, we must prepare it
region.reset(rsx_range);
region.set_gcm_format(gcm_format);
region.set_image_type(type);
region.create(width, height, section_depth, mipmaps, image, pitch, true, gcm_format);
}
region.set_view_flags(flags);
region.set_context(context);
region.set_swizzled(swizzled);
region.set_dirty(false);
image->native_component_map = apply_component_mapping_flags(gcm_format, flags, rsx::default_remap_vector);
image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
// Its not necessary to lock blit dst textures as they are just reused as necessary
switch (context)
{
@ -850,7 +901,7 @@ namespace vk
cached_texture_section* create_nul_section(vk::command_buffer& cmd, const utils::address_range& rsx_range, bool memory_load) override
{
auto& region = *find_cached_texture(rsx_range, RSX_GCM_FORMAT_IGNORED, true, false);
auto& region = *find_cached_texture(rsx_range, { .gcm_format = RSX_GCM_FORMAT_IGNORED }, true, false, false);
ensure(!region.is_locked());
// Prepare section

View File

@ -146,6 +146,11 @@ namespace vk
return info.format;
}
VkImageType image::type() const
{
return info.imageType;
}
VkImageAspectFlags image::aspect() const
{
return m_storage_aspect;

View File

@ -69,6 +69,7 @@ namespace vk
u32 layers() const;
u8 samples() const;
VkFormat format() const;
VkImageType type() const;
VkImageAspectFlags aspect() const;
rsx::format_class format_class() const;

View File

@ -465,6 +465,7 @@
<ClInclude Include="Emu\NP\rpcn_config.h" />
<ClInclude Include="Emu\RSX\Common\ShaderInterpreter.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache_helpers.h" />
<ClInclude Include="Emu\RSX\Common\texture_cache_types.h" />
<ClInclude Include="Emu\RSX\display.h" />
<ClInclude Include="Emu\RSX\GSFrameBase.h" />
<ClInclude Include="Emu\RSX\Overlays\overlay_fonts.h" />

View File

@ -1901,10 +1901,13 @@
<Filter>Emu\GPU\RSX</Filter>
</ClInclude>
<ClInclude Include="..\Utilities\dyn_lib.hpp">
<Filter>Header Files</Filter>
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\RSXDisAsm.h">
<Filter>Emu\GPU\RSX</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\texture_cache_types.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>