diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index e65bf9f495..a61a8710cd 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2220,9 +2220,9 @@ namespace rsx //verify (HERE), async_tasks_pending.load() == 0; } - void thread::sync_hint(FIFO_hint /*hint*/, void* /*args*/) + void thread::sync_hint(FIFO_hint /*hint*/, void* args) { - zcull_ctrl->on_sync_hint(); + zcull_ctrl->on_sync_hint(args); } void thread::flush_fifo() @@ -2639,7 +2639,7 @@ namespace rsx end_occlusion_query(m_current_task); m_current_task->active = false; m_current_task->pending = true; - m_current_task->sync_tag = ++m_timer; + m_current_task->sync_tag = m_timer++; m_current_task->timestamp = m_tsc; m_pending_writes.push_back({}); @@ -2670,7 +2670,7 @@ namespace rsx m_current_task->active = false; m_current_task->pending = true; m_current_task->timestamp = m_tsc; - m_current_task->sync_tag = ++m_timer; + m_current_task->sync_tag = m_timer++; m_pending_writes.back().query = m_current_task; allocate_new_query(ptimer); @@ -2800,12 +2800,16 @@ namespace rsx void ZCULL_control::on_draw() { if (m_current_task) + { m_current_task->num_draws++; + m_current_task->sync_tag = m_timer++; + } } - void ZCULL_control::on_sync_hint() + void ZCULL_control::on_sync_hint(void* args) { - m_sync_tag = ++m_timer; + auto query = static_cast(args); + m_sync_tag = std::max(m_sync_tag, query->sync_tag); } void ZCULL_control::write(vm::addr_t sink, u64 timestamp, u32 type, u32 value) @@ -2974,7 +2978,7 @@ namespace rsx if (It->query->num_draws && It->query->sync_tag > m_sync_tag) { ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query); - verify(HERE), It->query->sync_tag < m_sync_tag; + verify(HERE), It->query->sync_tag <= m_sync_tag; } break; @@ -2991,20 +2995,19 @@ namespace rsx // Schedule ahead m_next_tsc = m_tsc + min_zcull_tick_us; -#if 0 // Schedule a queue flush if needed - if (front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag) + if (!g_cfg.video.relaxed_zcull_sync && + front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag) { const auto elapsed = m_tsc - front.query->timestamp; if (elapsed > max_zcull_delay_us) { - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(front.query)); - verify(HERE), front.query->sync_tag < m_sync_tag; + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, front.query); + verify(HERE), front.query->sync_tag <= m_sync_tag; } return; } -#endif } } @@ -3176,7 +3179,7 @@ namespace rsx if (UNLIKELY(query->sync_tag > m_sync_tag)) { ptimer->sync_hint(FIFO_hint::hint_zcull_sync, query); - verify(HERE), m_sync_tag > query->sync_tag; + verify(HERE), m_sync_tag >= query->sync_tag; } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 9663111263..72960c60ca 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -375,9 +375,8 @@ namespace rsx struct ZCULL_control { // Delay before a report update operation is forced to retire - const u32 max_zcull_delay_us = 4000; - const u32 min_zcull_delay_us = 50; - const u32 min_zcull_tick_us = 500; + const u32 max_zcull_delay_us = 300; + const u32 min_zcull_tick_us = 100; // Number of occlusion query slots available. Real hardware actually has far fewer units before choking const u32 occlusion_query_count = 1024; @@ -437,7 +436,7 @@ namespace rsx void on_draw(); // Sync hint notification - void on_sync_hint(); + void on_sync_hint(void* args); // Check for pending writes bool has_pending() const { return !m_pending_writes.empty(); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 9abd349a11..e92df4ffa3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1784,7 +1784,7 @@ void VKGSRender::end() auto &data = m_occlusion_map[m_active_query_info->driver_handle]; data.indices.push_back(occlusion_id); - data.command_buffer_to_wait = m_current_command_buffer; + data.set_sync_command_buffer(m_current_command_buffer); m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task; m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query); @@ -2232,7 +2232,9 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) auto occlusion_info = static_cast(args); auto& data = m_occlusion_map[occlusion_info->driver_handle]; - if (data.command_buffer_to_wait != m_current_command_buffer || data.indices.empty()) + // NOTE: Currently, a special condition exists where the indices can be empty even with active draw count. + // This is caused by async compiler and should be removed when ubershaders are added in + if (!data.is_current(m_current_command_buffer) || data.indices.empty()) return; // Occlusion test result evaluation is coming up, avoid a hard sync @@ -3707,7 +3709,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info if (data.indices.empty()) return true; - if (data.command_buffer_to_wait == m_current_command_buffer) + if (data.is_current(m_current_command_buffer)) return false; u32 oldest = data.indices.front(); @@ -3722,7 +3724,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* if (query->num_draws) { - if (data.command_buffer_to_wait == m_current_command_buffer) + if (data.is_current(m_current_command_buffer)) { std::lock_guard lock(m_flush_queue_mutex); flush_command_queue(); @@ -3736,8 +3738,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* busy_wait(); } - // Allocation stack is FIFO and very long so no need to actually wait for fence signal - data.command_buffer_to_wait->flush(); + data.sync(); // Gather data for (const auto occlusion_id : data.indices) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index f3999249f5..4612071357 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -72,6 +72,7 @@ struct command_buffer_chunk: public vk::command_buffer std::atomic_bool pending = { false }; u64 eid_tag = 0; + u64 reset_id = 0; shared_mutex guard_mutex; command_buffer_chunk() = default; @@ -101,6 +102,7 @@ struct command_buffer_chunk: public vk::command_buffer if (pending) wait(FRAME_PRESENT_TIMEOUT); + ++reset_id; CHECK_RESULT(vkResetCommandBuffer(commands, 0)); } @@ -164,6 +166,27 @@ struct occlusion_data { rsx::simple_array indices; command_buffer_chunk* command_buffer_to_wait = nullptr; + u64 command_buffer_sync_id = 0; + + bool is_current(command_buffer_chunk* cmd) const + { + return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id); + } + + void set_sync_command_buffer(command_buffer_chunk* cmd) + { + command_buffer_to_wait = cmd; + command_buffer_sync_id = cmd->reset_id; + } + + void sync() + { + if (command_buffer_to_wait->reset_id == command_buffer_sync_id) + { + // Allocation stack is FIFO and very long so no need to actually wait for fence signal + command_buffer_to_wait->flush(); + } + } }; enum frame_context_state : u32