mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-23 03:02:53 +01:00
rsx/vk/gl: Enforce format matching for render target resources. Fall back to raw data copy if match fails
- Forces Bitcast of texture data if input format cannot possibly be the same as the existing texture format - rsx: Other minor improvements to texture cache :- - remove obsolete blit engine incompatibility warning. The texture will be re-uploaded if it is indeed incompatible - Implement warn_once and err_once to avoid spamming the log with systemic errors - Track mispredicted flushes - Reswizzle bitcasted texture data to native layout TODO: Also needs reshuffle according to input remap vector
This commit is contained in:
parent
68b3229756
commit
20d4c09a1c
@ -353,8 +353,8 @@ namespace rsx
|
||||
|
||||
std::unordered_map<u32, framebuffer_memory_characteristics> m_cache_miss_statistics_table;
|
||||
|
||||
//Set when a hw blit engine incompatibility is detected
|
||||
bool blit_engine_incompatibility_warning_raised = false;
|
||||
//Map of messages to only emit once
|
||||
std::unordered_map<std::string, bool> m_once_only_messages_map;
|
||||
|
||||
//Set when a shader read-only texture data suddenly becomes contested, usually by fbo memory
|
||||
bool read_only_tex_invalidate = false;
|
||||
@ -371,6 +371,7 @@ namespace rsx
|
||||
//Other statistics
|
||||
std::atomic<u32> m_num_flush_requests = { 0 };
|
||||
std::atomic<u32> m_num_cache_misses = { 0 };
|
||||
std::atomic<u32> m_num_cache_mispredictions = { 0 };
|
||||
|
||||
/* Helpers */
|
||||
virtual void free_texture_section(section_storage_type&) = 0;
|
||||
@ -386,6 +387,7 @@ namespace rsx
|
||||
virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::array<image_resource_type, 6>& sources) = 0;
|
||||
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy) = 0;
|
||||
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
|
||||
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;
|
||||
|
||||
constexpr u32 get_block_size() const { return 0x1000000; }
|
||||
inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
|
||||
@ -395,6 +397,33 @@ namespace rsx
|
||||
m_cache_update_tag++;
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
void emit_once(bool error, const char* fmt, Args&&... params)
|
||||
{
|
||||
const std::string message = fmt::format(fmt, std::forward<Args>(params)...);
|
||||
if (m_once_only_messages_map.find(message) != m_once_only_messages_map.end())
|
||||
return;
|
||||
|
||||
if (error)
|
||||
logs::RSX.error(message.c_str());
|
||||
else
|
||||
logs::RSX.warning(message.c_str());
|
||||
|
||||
m_once_only_messages_map[message] = true;
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
void err_once(const char* fmt, Args&&... params)
|
||||
{
|
||||
emit_once(true, fmt, std::forward<Args>(params)...);
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
void warn_once(const char* fmt, Args&&... params)
|
||||
{
|
||||
emit_once(false, fmt, std::forward<Args>(params)...);
|
||||
}
|
||||
|
||||
private:
|
||||
//Internal implementation methods and helpers
|
||||
|
||||
@ -1431,6 +1460,12 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
if (!requires_processing)
|
||||
{
|
||||
//Check if we need to do anything about the formats
|
||||
requires_processing = !render_target_format_is_compatible(texptr, format);
|
||||
}
|
||||
|
||||
if (requires_processing)
|
||||
{
|
||||
const auto w = rsx::apply_resolution_scale(internal_width, true);
|
||||
@ -1610,7 +1645,7 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
if ((!blit_engine_incompatibility_warning_raised && g_cfg.video.use_gpu_texture_scaling) || is_hw_blit_engine_compatible(format))
|
||||
if (is_hw_blit_engine_compatible(format))
|
||||
{
|
||||
//Find based on range instead
|
||||
auto overlapping_surfaces = find_texture_from_range(texaddr, tex_size);
|
||||
@ -1641,14 +1676,6 @@ namespace rsx
|
||||
break;
|
||||
}
|
||||
|
||||
if (!blit_engine_incompatibility_warning_raised && !is_hw_blit_engine_compatible(format))
|
||||
{
|
||||
LOG_ERROR(RSX, "Format 0x%X is not compatible with the hardware blit acceleration."
|
||||
" Consider turning off GPU texture scaling in the options to partially handle textures on your CPU.", format);
|
||||
blit_engine_incompatibility_warning_raised = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (surface->get_sampler_status() != rsx::texture_sampler_status::status_ready)
|
||||
set_up_remap_vector(*surface, tex.decoded_remap());
|
||||
|
||||
@ -2041,6 +2068,11 @@ namespace rsx
|
||||
cached_dest->reprotect(utils::protection::no);
|
||||
m_cache[get_block_address(cached_dest->get_section_base())].notify();
|
||||
}
|
||||
else if (cached_dest->is_synchronized())
|
||||
{
|
||||
//Prematurely read back
|
||||
m_num_cache_mispredictions++;
|
||||
}
|
||||
|
||||
cached_dest->touch();
|
||||
}
|
||||
@ -2100,6 +2132,7 @@ namespace rsx
|
||||
{
|
||||
m_num_flush_requests.store(0u);
|
||||
m_num_cache_misses.store(0u);
|
||||
m_num_cache_mispredictions.store(0u);
|
||||
}
|
||||
|
||||
virtual const u32 get_unreleased_textures_count() const
|
||||
@ -2117,6 +2150,11 @@ namespace rsx
|
||||
return m_num_flush_requests;
|
||||
}
|
||||
|
||||
virtual u32 get_num_cache_mispredictions() const
|
||||
{
|
||||
return m_num_cache_mispredictions;
|
||||
}
|
||||
|
||||
virtual f32 get_cache_miss_ratio() const
|
||||
{
|
||||
const auto num_flushes = m_num_flush_requests.load();
|
||||
|
@ -1376,13 +1376,14 @@ void GLGSRender::flip(int buffer)
|
||||
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us");
|
||||
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
||||
|
||||
auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
||||
auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||
auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
|
||||
auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
|
||||
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
||||
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
|
||||
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
|
||||
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
|
||||
m_text_printer.print_text(0, 108, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
||||
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
|
||||
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Flush requests: " + std::to_string(num_flushes) + " (" + std::to_string(cache_miss_ratio) + "% hard faults)");
|
||||
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispedictions)", num_flushes, cache_miss_ratio, num_mispredict));
|
||||
}
|
||||
|
||||
m_frame->flip(m_context);
|
||||
|
@ -654,10 +654,13 @@ namespace gl
|
||||
m_temporary_surfaces.resize(0);
|
||||
}
|
||||
|
||||
u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u16 x, u16 y, u16 width, u16 height, bool copy = true)
|
||||
u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format, u16 x, u16 y, u16 width, u16 height, bool copy = true)
|
||||
{
|
||||
u32 dst_id = 0;
|
||||
|
||||
if (sized_internal_fmt == GL_NONE)
|
||||
sized_internal_fmt = gl::get_sized_internal_format(gcm_format);
|
||||
|
||||
GLenum ifmt;
|
||||
glBindTexture(GL_TEXTURE_2D, src_id);
|
||||
glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&ifmt);
|
||||
@ -702,6 +705,13 @@ namespace gl
|
||||
}
|
||||
}
|
||||
|
||||
if (ifmt != sized_internal_fmt)
|
||||
{
|
||||
err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, ifmt);
|
||||
//Apply base component map onto the new texture if a data cast has been done
|
||||
apply_component_mapping_flags(dst_type, gcm_format, rsx::texture_create_flags::default_component_order);
|
||||
}
|
||||
|
||||
return dst_id;
|
||||
}
|
||||
|
||||
@ -764,20 +774,18 @@ namespace gl
|
||||
|
||||
u32 create_temporary_subresource_view(void*&, u32* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override
|
||||
{
|
||||
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
|
||||
return create_temporary_subresource_impl(*src, ifmt, GL_TEXTURE_2D, x, y, w, h);
|
||||
return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h);
|
||||
}
|
||||
|
||||
u32 create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override
|
||||
{
|
||||
if (auto as_rtt = dynamic_cast<gl::render_target*>(src))
|
||||
{
|
||||
return create_temporary_subresource_impl(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), GL_TEXTURE_2D, x, y, w, h);
|
||||
return create_temporary_subresource_impl(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), GL_TEXTURE_2D, gcm_format, x, y, w, h);
|
||||
}
|
||||
else
|
||||
{
|
||||
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
|
||||
return create_temporary_subresource_impl(src->id(), ifmt, GL_TEXTURE_2D, x, y, w, h);
|
||||
return create_temporary_subresource_impl(src->id(), GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h);
|
||||
}
|
||||
}
|
||||
|
||||
@ -820,8 +828,7 @@ namespace gl
|
||||
|
||||
u32 generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy) override
|
||||
{
|
||||
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
|
||||
auto result = create_temporary_subresource_impl(sections_to_copy.front().src, ifmt, GL_TEXTURE_2D, 0, 0, width, height, false);
|
||||
auto result = create_temporary_subresource_impl(sections_to_copy.front().src, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, false);
|
||||
|
||||
for (const auto ®ion : sections_to_copy)
|
||||
{
|
||||
@ -970,6 +977,35 @@ namespace gl
|
||||
glTextureBarrierNV();
|
||||
}
|
||||
|
||||
bool render_target_format_is_compatible(gl::texture* tex, u32 gcm_format) override
|
||||
{
|
||||
if (auto as_rtt = dynamic_cast<gl::render_target*>(tex))
|
||||
{
|
||||
auto ifmt = as_rtt->get_compatible_internal_format();
|
||||
switch (gcm_format)
|
||||
{
|
||||
default:
|
||||
//TODO
|
||||
err_once("Format incompatibility detected, reporting failure to force data copy (GL_INTERNAL_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)ifmt, gcm_format);
|
||||
return false;
|
||||
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
|
||||
return (ifmt == gl::texture::internal_format::rgba16f);
|
||||
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||
return (ifmt == gl::texture::internal_format::rgba32f);
|
||||
case CELL_GCM_TEXTURE_X32_FLOAT:
|
||||
return (ifmt == gl::texture::internal_format::r32f);
|
||||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
return (ifmt == gl::texture::internal_format::r5g6b5);
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
return (ifmt == gl::texture::internal_format::depth24_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8);
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||
return (ifmt == gl::texture::internal_format::rgba8 || ifmt == gl::texture::internal_format::depth24_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8);
|
||||
}
|
||||
}
|
||||
|
||||
fmt::throw_exception("Format comparison for non-rendertargets is not implemented" HERE);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
texture_cache() {}
|
||||
|
@ -3113,15 +3113,16 @@ void VKGSRender::flip(int buffer)
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
|
||||
|
||||
auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
||||
auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||
auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
|
||||
auto num_flushes = m_texture_cache.get_num_flush_requests();
|
||||
auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
|
||||
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
||||
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||
const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
|
||||
const auto num_flushes = m_texture_cache.get_num_flush_requests();
|
||||
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
|
||||
const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Flush requests: " + std::to_string(num_flushes) + " (" + std::to_string(cache_miss_ratio) + "% hard faults)");
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispedictions)", num_flushes, cache_miss_ratio, num_mispredict));
|
||||
}
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);
|
||||
|
@ -597,8 +597,18 @@ namespace vk
|
||||
w, h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, source->info.flags));
|
||||
|
||||
VkComponentMapping view_swizzle = source->native_component_map;
|
||||
if (dst_format != source->info.format)
|
||||
{
|
||||
//This is a data cast operation
|
||||
//Use native mapping for the new type
|
||||
//TODO: Also reapply the view swizzle
|
||||
const auto remap = get_component_mapping(gcm_format);
|
||||
view_swizzle = { remap[1], remap[2], remap[3], remap[0] };
|
||||
}
|
||||
|
||||
VkImageSubresourceRange view_range = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 };
|
||||
view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, view_type, dst_format, source->native_component_map, view_range));
|
||||
view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, view_type, dst_format, view_swizzle, view_range));
|
||||
|
||||
if (copy)
|
||||
{
|
||||
@ -983,6 +993,30 @@ namespace vk
|
||||
vk::insert_texture_barrier(cmd, tex);
|
||||
}
|
||||
|
||||
bool render_target_format_is_compatible(vk::image* tex, u32 gcm_format) override
|
||||
{
|
||||
auto vk_format = tex->info.format;
|
||||
switch (gcm_format)
|
||||
{
|
||||
default:
|
||||
//TODO
|
||||
err_once("Format incompatibility detected, reporting failure to force data copy (VK_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)vk_format, gcm_format);
|
||||
return false;
|
||||
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
|
||||
return (vk_format == VK_FORMAT_R16G16B16A16_SFLOAT);
|
||||
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||
return (vk_format == VK_FORMAT_R32G32B32A32_SFLOAT);
|
||||
case CELL_GCM_TEXTURE_X32_FLOAT:
|
||||
return (vk_format == VK_FORMAT_R32_SFLOAT);
|
||||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
return (vk_format == VK_FORMAT_R5G6B5_UNORM_PACK16);
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT);
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||
return (vk_format == VK_FORMAT_B8G8R8A8_UNORM || vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
struct vk_blit_op_result : public blit_op_result
|
||||
|
Loading…
Reference in New Issue
Block a user