mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-26 04:32:35 +01:00
rsx: Synchronization rewritten
- Do not do a full sync on a texture read barrier - Avoid calling zcull sync in FIFO spin wait - Do not flush memory to cache from the renderer side; this method is now obsolete
This commit is contained in:
parent
23b52e1b1c
commit
3b47e43380
@ -593,7 +593,6 @@ void GLGSRender::end()
|
||||
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
|
||||
m_draw_calls++;
|
||||
|
||||
synchronize_buffers();
|
||||
rsx::thread::end();
|
||||
}
|
||||
|
||||
@ -1100,7 +1099,6 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
|
||||
if (arg & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth;
|
||||
|
||||
init_buffers((rsx::framebuffer_creation_context)ctx, true);
|
||||
synchronize_buffers();
|
||||
clear_surface(arg);
|
||||
}
|
||||
|
||||
@ -1113,10 +1111,16 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
|
||||
return true;
|
||||
}
|
||||
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
|
||||
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
|
||||
flush_draw_buffers = true;
|
||||
{
|
||||
// Texture barrier, seemingly not very useful
|
||||
return true;
|
||||
}
|
||||
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
|
||||
{
|
||||
//flush_draw_buffers = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -1695,15 +1699,6 @@ work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrash
|
||||
return result;
|
||||
}
|
||||
|
||||
void GLGSRender::synchronize_buffers()
|
||||
{
|
||||
if (flush_draw_buffers)
|
||||
{
|
||||
write_buffers();
|
||||
flush_draw_buffers = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
|
||||
{
|
||||
if (m_gl_texture_cache.blit(src, dst, interpolate, m_rtts))
|
||||
|
@ -325,7 +325,6 @@ private:
|
||||
shared_mutex queue_guard;
|
||||
std::list<work_item> work_queue;
|
||||
|
||||
bool flush_draw_buffers = false;
|
||||
std::thread::id m_thread_id;
|
||||
|
||||
GLProgramBuffer m_prog_buffer;
|
||||
@ -369,10 +368,8 @@ private:
|
||||
|
||||
public:
|
||||
void read_buffers();
|
||||
void write_buffers();
|
||||
void set_viewport();
|
||||
|
||||
void synchronize_buffers();
|
||||
work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data);
|
||||
|
||||
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
|
||||
|
@ -179,9 +179,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||
return;
|
||||
}
|
||||
|
||||
//We are about to change buffers, flush any pending requests for the old buffers
|
||||
synchronize_buffers();
|
||||
|
||||
m_rtts_dirty = false;
|
||||
zcull_surface_active = false;
|
||||
|
||||
@ -475,28 +472,28 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
||||
case rsx::surface_target::none: break;
|
||||
|
||||
case rsx::surface_target::surface_a:
|
||||
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
|
||||
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
draw_fbo.draw_buffer(draw_fbo.color[0]);
|
||||
draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
break;
|
||||
|
||||
case rsx::surface_target::surface_b:
|
||||
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
|
||||
__glcheck draw_fbo.read_buffer(draw_fbo.color[1]);
|
||||
draw_fbo.draw_buffer(draw_fbo.color[1]);
|
||||
draw_fbo.read_buffer(draw_fbo.color[1]);
|
||||
break;
|
||||
|
||||
case rsx::surface_target::surfaces_a_b:
|
||||
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
|
||||
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
|
||||
draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
break;
|
||||
|
||||
case rsx::surface_target::surfaces_a_b_c:
|
||||
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
|
||||
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
|
||||
draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
break;
|
||||
|
||||
case rsx::surface_target::surfaces_a_b_c_d:
|
||||
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
|
||||
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
|
||||
draw_fbo.read_buffer(draw_fbo.color[0]);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -590,7 +587,7 @@ void GLGSRender::read_buffers()
|
||||
{
|
||||
if (!color_buffer.tile)
|
||||
{
|
||||
__glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type);
|
||||
std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -599,7 +596,7 @@ void GLGSRender::read_buffers()
|
||||
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
|
||||
color_buffer.read(buffer.get(), width, height, pitch);
|
||||
|
||||
__glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type);
|
||||
std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -654,8 +651,8 @@ void GLGSRender::read_buffers()
|
||||
int pixel_size = rsx::internals::get_pixel_size(rsx::method_registers.surface_depth_fmt());
|
||||
gl::buffer pbo_depth;
|
||||
|
||||
__glcheck pbo_depth.create(width * height * pixel_size);
|
||||
__glcheck pbo_depth.map([&](GLubyte* pixels)
|
||||
pbo_depth.create(width * height * pixel_size);
|
||||
pbo_depth.map([&](GLubyte* pixels)
|
||||
{
|
||||
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
|
||||
|
||||
@ -679,42 +676,6 @@ void GLGSRender::read_buffers()
|
||||
}
|
||||
}, gl::buffer::access::write);
|
||||
|
||||
__glcheck std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type);
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::write_buffers()
|
||||
{
|
||||
if (!draw_fbo)
|
||||
return;
|
||||
|
||||
if (g_cfg.video.write_color_buffers)
|
||||
{
|
||||
auto write_color_buffers = [&](int index, int count)
|
||||
{
|
||||
for (int i = index; i < index + count; ++i)
|
||||
{
|
||||
if (m_surface_info[i].pitch == 0)
|
||||
continue;
|
||||
|
||||
/**Even tiles are loaded as whole textures during read_buffers from testing.
|
||||
* Need further evaluation to determine correct behavior. Separate paths for both show no difference,
|
||||
* but using the GPU to perform the caching is many times faster.
|
||||
*/
|
||||
|
||||
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height;
|
||||
m_gl_texture_cache.flush_memory_to_cache(m_surface_info[i].address, range, true, 0xFF);
|
||||
}
|
||||
};
|
||||
|
||||
write_color_buffers(0, 4);
|
||||
}
|
||||
|
||||
if (g_cfg.video.write_depth_buffer)
|
||||
{
|
||||
if (m_depth_surface_info.pitch == 0) return;
|
||||
|
||||
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height;
|
||||
m_gl_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, range, true, 0xFF);
|
||||
std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type);
|
||||
}
|
||||
}
|
||||
|
@ -587,7 +587,7 @@ namespace rsx
|
||||
}
|
||||
else if (zcull_ctrl->has_pending())
|
||||
{
|
||||
zcull_ctrl->sync(this);
|
||||
//zcull_ctrl->sync(this);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1491,7 +1491,6 @@ void VKGSRender::end()
|
||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - textures_end).count();
|
||||
|
||||
copy_render_targets_to_dma_location();
|
||||
m_draw_calls++;
|
||||
|
||||
rsx::thread::end();
|
||||
@ -1638,8 +1637,6 @@ void VKGSRender::clear_surface(u32 mask)
|
||||
|
||||
if (!framebuffer_status_valid) return;
|
||||
|
||||
copy_render_targets_to_dma_location();
|
||||
|
||||
float depth_clear = 1.f;
|
||||
u32 stencil_clear = 0;
|
||||
u32 depth_stencil_mask = 0;
|
||||
@ -1793,53 +1790,6 @@ void VKGSRender::clear_surface(u32 mask)
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::sync_at_semaphore_release()
|
||||
{
|
||||
m_flush_draw_buffers = true;
|
||||
}
|
||||
|
||||
void VKGSRender::copy_render_targets_to_dma_location()
|
||||
{
|
||||
if (!m_flush_draw_buffers)
|
||||
return;
|
||||
|
||||
if (!g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer)
|
||||
return;
|
||||
|
||||
//TODO: Make this asynchronous. Should be similar to a glFlush() but in this case its similar to glFinish
|
||||
//This is due to all the hard waits for fences
|
||||
//TODO: Use a command buffer array to allow explicit draw command tracking
|
||||
|
||||
vk::enter_uninterruptible();
|
||||
|
||||
if (g_cfg.video.write_color_buffers)
|
||||
{
|
||||
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
|
||||
{
|
||||
if (!m_surface_info[index].pitch)
|
||||
continue;
|
||||
|
||||
m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height, true, 0xFF,
|
||||
*m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||
}
|
||||
}
|
||||
|
||||
if (g_cfg.video.write_depth_buffer)
|
||||
{
|
||||
if (m_depth_surface_info.pitch)
|
||||
{
|
||||
m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, true, 0xFF,
|
||||
*m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||
}
|
||||
}
|
||||
|
||||
vk::leave_uninterruptible();
|
||||
|
||||
flush_command_queue();
|
||||
|
||||
m_flush_draw_buffers = false;
|
||||
}
|
||||
|
||||
void VKGSRender::flush_command_queue(bool hard_sync)
|
||||
{
|
||||
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
|
||||
@ -2192,9 +2142,11 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
|
||||
clear_surface(arg);
|
||||
return true;
|
||||
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
|
||||
// Texture barrier, seemingly not very useful
|
||||
return true;
|
||||
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
|
||||
sync_at_semaphore_release();
|
||||
return false; //call rsx::thread method implementation
|
||||
//sync_at_semaphore_release();
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -2541,7 +2493,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
||||
if (m_draw_fbo && !m_rtts_dirty)
|
||||
return;
|
||||
|
||||
copy_render_targets_to_dma_location();
|
||||
m_rtts_dirty = false;
|
||||
|
||||
u32 clip_width = rsx::method_registers.surface_clip_width();
|
||||
|
@ -354,7 +354,6 @@ private:
|
||||
s64 m_flip_time = 0;
|
||||
|
||||
u8 m_draw_buffers_count = 0;
|
||||
bool m_flush_draw_buffers = false;
|
||||
|
||||
shared_mutex m_flush_queue_mutex;
|
||||
flush_request_task m_flush_requests;
|
||||
@ -380,9 +379,7 @@ private:
|
||||
void clear_surface(u32 mask);
|
||||
void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||
void open_command_buffer();
|
||||
void sync_at_semaphore_release();
|
||||
void prepare_rtts(rsx::framebuffer_creation_context context);
|
||||
void copy_render_targets_to_dma_location();
|
||||
|
||||
void flush_command_queue(bool hard_sync = false);
|
||||
void queue_swap_request();
|
||||
|
@ -160,6 +160,8 @@ namespace rsx
|
||||
|
||||
void texture_read_semaphore_release(thread* rsx, u32 _reg, u32 arg)
|
||||
{
|
||||
// Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier
|
||||
|
||||
const u32 index = method_registers.semaphore_offset_4097() >> 4;
|
||||
// lle-gcm likes to inject system reserved semaphores, presumably for system/vsh usage
|
||||
// Avoid calling render to avoid any havoc(flickering) they may cause from invalid flush/write
|
||||
@ -169,7 +171,6 @@ namespace rsx
|
||||
//
|
||||
}
|
||||
|
||||
rsx->sync();
|
||||
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
|
||||
sema.semaphore[index].val = arg;
|
||||
sema.semaphore[index].pad = 0;
|
||||
@ -178,6 +179,8 @@ namespace rsx
|
||||
|
||||
void back_end_write_semaphore_release(thread* rsx, u32 _reg, u32 arg)
|
||||
{
|
||||
// Full pipeline barrier
|
||||
|
||||
const u32 index = method_registers.semaphore_offset_4097() >> 4;
|
||||
if (index > 63 && !rsx->do_method(NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, arg))
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user