1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

rsx: fbo fixes 2.5

- Implement flush-always behaviour to partially fix readback from a currently bound fbo
  - Without this, only the first read is correct, as more draws are added the results become 'wrong'
  - Fixes WCB and cpublit behviour
- Synchronize blit_dst surfaces to avoid data loss when gpu texture scaling is used
  - Its still faster in such cases to disable gpu texture scaling but some types cannot be disabled without force cpu blit (e.g framebuffer transfers)
- Memory management tuning
  - rsx: on-demand texture cache rescanning for unprotected sections
  - rsx: Only framebuffer resources are upscaled
  - Do not resize regular blit engine resources
  - Lazy initialize readback buffer when using opengl
  -- These measures should help minimize vram usage
This commit is contained in:
kd-11 2018-02-03 11:37:42 +03:00
parent 02e571adbc
commit 89c548b5d3
11 changed files with 282 additions and 63 deletions

View File

@ -806,9 +806,17 @@ std::string FragmentProgramDecompiler::Decompile()
if (m_loop_count) AddFlowOp("break");
else LOG_ERROR(RSX, "BRK opcode found outside of a loop");
break;
case RSX_FP_OPCODE_CAL: LOG_ERROR(RSX, "Unimplemented SIP instruction: CAL"); break;
case RSX_FP_OPCODE_FENCT: forced_unit = FORCE_SCT; break;
case RSX_FP_OPCODE_FENCB: forced_unit = FORCE_SCB; break;
case RSX_FP_OPCODE_CAL:
LOG_ERROR(RSX, "Unimplemented SIP instruction: CAL");
break;
case RSX_FP_OPCODE_FENCT:
AddCode("//FENCT");
forced_unit = FORCE_SCT;
break;
case RSX_FP_OPCODE_FENCB:
AddCode("//FENCB");
forced_unit = FORCE_SCB;
break;
case RSX_FP_OPCODE_IFE:
AddCode("if($cond)");
if (src2.end_offset != src1.else_offset)
@ -849,7 +857,9 @@ std::string FragmentProgramDecompiler::Decompile()
m_code_level++;
}
break;
case RSX_FP_OPCODE_RET: AddFlowOp("return"); break;
case RSX_FP_OPCODE_RET:
AddFlowOp("return");
break;
default:
return false;

View File

@ -23,6 +23,12 @@ namespace rsx
status_ready = 1
};
enum memory_read_flags
{
flush_always = 0,
flush_once = 1
};
struct cached_texture_section : public rsx::buffered_section
{
u16 width;
@ -37,6 +43,7 @@ namespace rsx
u64 cache_tag = 0;
memory_read_flags readback_behaviour = memory_read_flags::flush_once;
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
rsx::texture_upload_context context = rsx::texture_upload_context::shader_read;
rsx::texture_dimension_extended image_type = rsx::texture_dimension_extended::texture_dimension_2d;
@ -97,6 +104,11 @@ namespace rsx
gcm_format = format;
}
void set_memory_read_flags(memory_read_flags flags)
{
readback_behaviour = flags;
}
u16 get_width() const
{
return width;
@ -127,6 +139,11 @@ namespace rsx
return gcm_format;
}
memory_read_flags get_memory_read_flags() const
{
return readback_behaviour;
}
rsx::texture_sampler_status get_sampler_status() const
{
return sampler_status;
@ -159,6 +176,12 @@ namespace rsx
valid_count++;
}
void notify()
{
verify(HERE), valid_count >= 0;
valid_count++;
}
void add(section_storage_type& section, u32 addr, u32 data_size)
{
data.push_back(std::move(section));
@ -277,6 +300,10 @@ namespace rsx
//Set when a shader read-only texture data suddenly becomes contested, usually by fbo memory
bool read_only_tex_invalidate = false;
//Store of all objects in a flush_always state. A lazy readback is attempted every draw call
std::unordered_map<u32, u32> m_flush_always_cache;
u64 m_flush_always_update_timestamp = 0;
//Memory usage
const s32 m_max_zombie_objects = 64; //Limit on how many texture objects to keep around for reuse after they are invalidated
std::atomic<s32> m_unreleased_texture_objects = { 0 }; //Number of invalidated objects not yet freed from memory
@ -697,8 +724,13 @@ namespace rsx
{
if (!confirm_dimensions || tex.matches(rsx_address, width, height, depth, mipmaps))
{
if (!tex.is_locked() && tex.get_context() == texture_upload_context::framebuffer_storage)
range_data.notify(rsx_address, rsx_size);
if (!tex.is_locked())
{
//Data is valid from cache pov but region has been unlocked and flushed
if (tex.get_context() == texture_upload_context::framebuffer_storage ||
tex.get_context() == texture_upload_context::blit_engine_dst)
range_data.notify();
}
return tex;
}
@ -803,6 +835,27 @@ namespace rsx
region.set_sampler_status(rsx::texture_sampler_status::status_uninitialized);
region.set_image_type(rsx::texture_dimension_extended::texture_dimension_2d);
update_cache_tag();
region.set_memory_read_flags(memory_read_flags::flush_always);
m_flush_always_cache[memory_address] = memory_size;
}
void set_memory_read_flags(u32 memory_address, u32 memory_size, memory_read_flags flags)
{
writer_lock lock(m_cache_mutex);
if (flags != memory_read_flags::flush_always)
m_flush_always_cache.erase(memory_address);
section_storage_type& region = find_cached_texture(memory_address, memory_size, false);
if (!region.exists() || region.get_context() != texture_upload_context::framebuffer_storage)
return;
if (flags == memory_read_flags::flush_always)
m_flush_always_cache[memory_address] = memory_size;
region.set_memory_read_flags(flags);
}
template <typename ...Args>
@ -1816,6 +1869,12 @@ namespace rsx
if (cached_dest)
{
if (!cached_dest->is_locked())
{
cached_dest->reprotect(utils::protection::no);
m_cache[get_block_address(cached_dest->get_section_base())].notify();
}
//Prep surface
auto channel_order = src_is_render_target ? rsx::texture_create_flags::native_component_order :
dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
@ -1869,6 +1928,30 @@ namespace rsx
return true;
}
void do_update()
{
if (m_flush_always_cache.size())
{
if (m_cache_update_tag.load(std::memory_order_consume) != m_flush_always_update_timestamp)
{
writer_lock lock(m_cache_mutex);
for (const auto &It : m_flush_always_cache)
{
auto& section = find_cached_texture(It.first, It.second);
if (section.get_protection() != utils::protection::no)
{
auto &range = m_cache[get_block_address(It.first)];
section.reprotect(utils::protection::no);
range.notify();
}
}
m_flush_always_update_timestamp = m_cache_update_tag.load(std::memory_order_consume);
}
}
}
virtual const u32 get_unreleased_textures_count() const
{
return m_unreleased_texture_objects;

View File

@ -1451,10 +1451,10 @@ void GLGSRender::do_local_task(bool /*idle*/)
{
m_frame->clear_wm_events();
std::lock_guard<std::mutex> lock(queue_guard);
if (!work_queue.empty())
{
std::lock_guard<std::mutex> lock(queue_guard);
work_queue.remove_if([](work_item &q) { return q.received; });
for (work_item& q : work_queue)
@ -1470,6 +1470,12 @@ void GLGSRender::do_local_task(bool /*idle*/)
q.cv.notify_one();
}
}
else if (!in_begin_end)
{
//This will re-engage locks and break the texture cache if another thread is waiting in access violation handler!
//Only call when there are no waiters
m_gl_texture_cache.do_update();
}
if (m_overlay_cleanup_requests.size())
{

View File

@ -35,8 +35,10 @@ namespace gl
switch (type)
{
case GL_DEBUG_TYPE_ERROR:
{
LOG_ERROR(RSX, "%s", message);
return;
}
default:
LOG_WARNING(RSX, "%s", message);
return;
@ -372,9 +374,9 @@ namespace gl
glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_MAX_LEVEL, m_max_level);
if (m_pixels)
if (m_pixels && m_generate_mipmap)
{
glTexParameteri((GLenum)m_parent->get_target(), GL_GENERATE_MIPMAP, m_generate_mipmap ? GL_TRUE : GL_FALSE);
glGenerateMipmap((GLenum)m_parent->get_target());
}
glTexParameteri((GLenum)m_parent->get_target(), GL_TEXTURE_WRAP_S, (GLint)m_wrap_s);

View File

@ -210,6 +210,7 @@ namespace gl
{
GLsync m_value = nullptr;
GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
bool signaled = false;
public:
@ -245,11 +246,16 @@ namespace gl
{
verify(HERE), m_value != nullptr;
if (signaled)
return true;
if (flags)
{
GLenum err = glClientWaitSync(m_value, flags, 0);
flags = 0;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
if (!(err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED))
return false;
}
else
{
@ -257,8 +263,12 @@ namespace gl
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
return (status == GL_SIGNALED);
if (status != GL_SIGNALED)
return false;
}
signaled = true;
return true;
}
@ -266,44 +276,49 @@ namespace gl
{
verify(HERE), m_value != nullptr;
GLenum err = GL_WAIT_FAILED;
bool done = false;
while (!done)
if (signaled == GL_FALSE)
{
if (flags)
{
err = glClientWaitSync(m_value, flags, 0);
flags = 0;
GLenum err = GL_WAIT_FAILED;
bool done = false;
switch (err)
while (!done)
{
if (flags)
{
default:
LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
err = glClientWaitSync(m_value, flags, 0);
flags = 0;
switch (err)
{
default:
LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status == GL_SIGNALED)
break;
}
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status == GL_SIGNALED)
break;
}
signaled = (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
glDeleteSync(m_value);
m_value = nullptr;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
return signaled;
}
};

View File

@ -337,6 +337,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
old_format_found = true;
}
m_gl_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height);
}
@ -361,6 +362,15 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
if (std::get<0>(m_rtts.m_bound_depth_stencil))
{
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
{
auto bpp = m_depth_surface_info.pitch / m_depth_surface_info.width;
auto old_format = (bpp == 2) ? gl::texture::format::depth : gl::texture::format::depth_stencil;
m_gl_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height);
}
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
u8 texel_size = 2;

View File

@ -90,7 +90,7 @@ namespace gl
{
private:
fence m_fence;
u32 pbo_id = 0;
//u32 pbo_id = 0;
u32 pbo_size = 0;
u32 vram_texture = 0;
@ -173,8 +173,15 @@ namespace gl
void init_buffer()
{
const f32 resolution_scale = (context == rsx::texture_upload_context::framebuffer_storage? rsx::get_resolution_scale() : 1.f);
const u32 real_buffer_size = (resolution_scale <= 1.f) ? cpu_address_range : (u32)(resolution_scale * resolution_scale * cpu_address_range);
const u32 buffer_size = align(real_buffer_size, 4096);
if (pbo_id)
{
if (pbo_size >= buffer_size)
return;
glDeleteBuffers(1, &pbo_id);
pbo_id = 0;
pbo_size = 0;
@ -182,10 +189,6 @@ namespace gl
glGenBuffers(1, &pbo_id);
const f32 resolution_scale = rsx::get_resolution_scale();
const u32 real_buffer_size = (resolution_scale < 1.f)? cpu_address_range: (u32)(resolution_scale * resolution_scale * cpu_address_range);
const u32 buffer_size = align(real_buffer_size, 4096);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_MAP_READ_BIT);
@ -193,15 +196,12 @@ namespace gl
}
public:
u32 pbo_id = 0;
void reset(u32 base, u32 size, bool flushable=false)
{
rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_conservative;
rsx::buffered_section::reset(base, size, policy);
if (flushable)
init_buffer();
flushed = false;
copied = false;
is_depth = false;
@ -252,6 +252,12 @@ namespace gl
real_pitch = 0;
}
void make_flushable()
{
//verify(HERE), pbo_id == 0;
init_buffer();
}
void set_dimensions(u32 width, u32 height, u32 /*depth*/, u32 pitch)
{
this->width = width;
@ -300,8 +306,14 @@ namespace gl
return;
}
if (!pbo_id)
{
init_buffer();
}
u32 target_texture = vram_texture;
if (real_pitch != rsx_pitch || rsx::get_resolution_scale_percent() != 100)
if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) ||
(real_pitch != rsx_pitch))
{
u32 real_width = width;
u32 real_height = height;
@ -392,7 +404,7 @@ namespace gl
}
}
if (!recovered && rsx::get_resolution_scale_percent() != 100)
if (!recovered && rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage)
{
LOG_ERROR(RSX, "Texture readback failed. Disable resolution scaling to get the 'Write Color Buffers' option to work properly");
}
@ -449,6 +461,13 @@ namespace gl
//throw if map failed since we'll segfault anyway
verify(HERE), data != nullptr;
bool require_manual_shuffle = false;
if (pack_unpack_swap_bytes)
{
if (type == gl::texture::type::sbyte || type == gl::texture::type::ubyte)
require_manual_shuffle = true;
}
if (real_pitch >= rsx_pitch || scaled_texture != 0)
{
memcpy(dst, data, cpu_address_range);
@ -461,12 +480,25 @@ namespace gl
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples_u, samples_v);
}
if (require_manual_shuffle)
{
//byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, height);
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
return true;
}
void reprotect(utils::protection prot)
{
flushed = false;
copied = false;
protect(prot);
}
void destroy()
{
if (!locked && pbo_id == 0 && vram_texture == 0 && m_fence.is_empty())
@ -777,13 +809,31 @@ namespace gl
cached.set_sampler_status(rsx::texture_sampler_status::status_uninitialized);
cached.set_image_type(type);
//Its not necessary to lock blit dst textures as they are just reused as necessary
if (context != rsx::texture_upload_context::blit_engine_dst || g_cfg.video.strict_rendering_mode)
if (context != rsx::texture_upload_context::blit_engine_dst)
{
cached.protect(utils::protection::ro);
update_cache_tag();
}
else
{
//TODO: More tests on byte order
//ARGB8+native+unswizzled is confirmed with Dark Souls II character preview
if (gcm_format == CELL_GCM_TEXTURE_A8R8G8B8)
{
bool bgra = (flags == rsx::texture_create_flags::native_component_order);
cached.set_format(bgra? gl::texture::format::bgra : gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, false);
}
else
{
cached.set_format(gl::texture::format::rgb, gl::texture::type::ushort_5_6_5, true);
}
cached.make_flushable();
cached.set_dimensions(width, height, depth, (rsx_size / height));
cached.protect(utils::protection::no);
no_access_range = cached.get_min_max(no_access_range);
}
update_cache_tag();
return &cached;
}

View File

@ -2032,6 +2032,12 @@ void VKGSRender::do_local_task(bool /*idle*/)
m_flush_requests.clear_pending_flag();
m_flush_requests.consumer_wait();
}
else if (!in_begin_end)
{
//This will re-engage locks and break the texture cache if another thread is waiting in access violation handler!
//Only call when there are no waiters
m_texture_cache.do_update();
}
if (m_last_flushable_cb > -1)
{
@ -2633,6 +2639,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (old_format == VK_FORMAT_UNDEFINED)
old_format = vk::get_compatible_surface_format(m_surface_info[i].color_format).first;
m_texture_cache.set_memory_read_flags(m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, m_surface_info[i].address, m_surface_info[i].pitch * m_surface_info[i].height,
*m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
}
@ -2643,10 +2650,21 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_surface_info[i].color_format = color_fmt;
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
m_depth_surface_info.width = clip_width;
m_depth_surface_info.height = clip_height;
m_depth_surface_info.depth_format = depth_fmt;
//Process depth surface as well
{
if (m_depth_surface_info.pitch && g_cfg.video.write_depth_buffer)
{
auto old_format = vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, m_depth_surface_info.depth_format);
m_texture_cache.set_memory_read_flags(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(old_format, m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
*m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
m_depth_surface_info.width = clip_width;
m_depth_surface_info.height = clip_height;
m_depth_surface_info.depth_format = depth_fmt;
}
//Bind created rtts as current fbo...
std::vector<u8> draw_buffers = rsx::utility::get_rtt_indexes(target);

View File

@ -168,8 +168,8 @@ namespace vk
cmd.begin();
}
const u16 internal_width = std::min(width, rsx::apply_resolution_scale(width, true));
const u16 internal_height = std::min(height, rsx::apply_resolution_scale(height, true));
const u16 internal_width = (context != rsx::texture_upload_context::framebuffer_storage? width : std::min(width, rsx::apply_resolution_scale(width, true)));
const u16 internal_height = (context != rsx::texture_upload_context::framebuffer_storage? height : std::min(height, rsx::apply_resolution_scale(height, true)));
VkImageAspectFlags aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
switch (vram_texture->info.format)
@ -332,6 +332,21 @@ namespace vk
return result;
}
void set_unpack_swap_bytes(bool swap_bytes)
{
pack_unpack_swap_bytes = swap_bytes;
}
void reprotect(utils::protection prot)
{
//Reset properties and protect again
flushed = false;
synchronized = false;
sync_timestamp = 0ull;
protect(prot);
}
bool is_synchronized() const
{
return synchronized;
@ -730,13 +745,20 @@ namespace vk
region.set_image_type(type);
//Its not necessary to lock blit dst textures as they are just reused as necessary
if (context != rsx::texture_upload_context::blit_engine_dst || g_cfg.video.strict_rendering_mode)
if (context != rsx::texture_upload_context::blit_engine_dst)
{
region.protect(utils::protection::ro);
update_cache_tag();
read_only_range = region.get_min_max(read_only_range);
}
else
{
//TODO: Confirm byte swap patterns
region.protect(utils::protection::no);
region.set_unpack_swap_bytes(true);
no_access_range = region.get_min_max(no_access_range);
}
read_only_range = region.get_min_max(read_only_range);
update_cache_tag();
return &region;
}

View File

@ -115,6 +115,7 @@ namespace rsx
else
locked_address_range = align(base + length, 4096) - locked_address_base;
verify(HERE), locked_address_range > 0;
protection = utils::protection::rw;
guard_policy = protect_policy;
locked = false;
@ -124,6 +125,7 @@ namespace rsx
{
if (prot == protection) return;
verify(HERE), locked_address_range > 0;
utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
protection = prot;
locked = prot != utils::protection::rw;

View File

@ -694,7 +694,8 @@ namespace rsx
if (convert_w == 0 || convert_h == 0)
{
LOG_ERROR(RSX, "NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored");
LOG_ERROR(RSX, "NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%d, dt_dy=%d)",
method_registers.blit_engine_ds_dx(), method_registers.blit_engine_dt_dy());
return;
}