1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 10:42:36 +01:00

rsx: Zcull synchronization tuning

- Also fixes a bug where sync_hint would erroneously update the sync tag
  even for old lookups (e.g conditional render using older query)
This commit is contained in:
kd-11 2019-12-11 22:07:09 +03:00 committed by kd-11
parent fdb638436f
commit ed2bdb8e0c
4 changed files with 49 additions and 23 deletions

View File

@ -2220,9 +2220,9 @@ namespace rsx
//verify (HERE), async_tasks_pending.load() == 0;
}
void thread::sync_hint(FIFO_hint /*hint*/, void* /*args*/)
void thread::sync_hint(FIFO_hint /*hint*/, void* args)
{
zcull_ctrl->on_sync_hint();
zcull_ctrl->on_sync_hint(args);
}
void thread::flush_fifo()
@ -2639,7 +2639,7 @@ namespace rsx
end_occlusion_query(m_current_task);
m_current_task->active = false;
m_current_task->pending = true;
m_current_task->sync_tag = ++m_timer;
m_current_task->sync_tag = m_timer++;
m_current_task->timestamp = m_tsc;
m_pending_writes.push_back({});
@ -2670,7 +2670,7 @@ namespace rsx
m_current_task->active = false;
m_current_task->pending = true;
m_current_task->timestamp = m_tsc;
m_current_task->sync_tag = ++m_timer;
m_current_task->sync_tag = m_timer++;
m_pending_writes.back().query = m_current_task;
allocate_new_query(ptimer);
@ -2800,12 +2800,16 @@ namespace rsx
void ZCULL_control::on_draw()
{
if (m_current_task)
{
m_current_task->num_draws++;
m_current_task->sync_tag = m_timer++;
}
}
void ZCULL_control::on_sync_hint()
void ZCULL_control::on_sync_hint(void* args)
{
m_sync_tag = ++m_timer;
auto query = static_cast<occlusion_query_info*>(args);
m_sync_tag = std::max(m_sync_tag, query->sync_tag);
}
void ZCULL_control::write(vm::addr_t sink, u64 timestamp, u32 type, u32 value)
@ -2974,7 +2978,7 @@ namespace rsx
if (It->query->num_draws && It->query->sync_tag > m_sync_tag)
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query);
verify(HERE), It->query->sync_tag < m_sync_tag;
verify(HERE), It->query->sync_tag <= m_sync_tag;
}
break;
@ -2991,20 +2995,19 @@ namespace rsx
// Schedule ahead
m_next_tsc = m_tsc + min_zcull_tick_us;
#if 0
// Schedule a queue flush if needed
if (front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag)
if (!g_cfg.video.relaxed_zcull_sync &&
front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag)
{
const auto elapsed = m_tsc - front.query->timestamp;
if (elapsed > max_zcull_delay_us)
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast<uintptr_t>(front.query));
verify(HERE), front.query->sync_tag < m_sync_tag;
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, front.query);
verify(HERE), front.query->sync_tag <= m_sync_tag;
}
return;
}
#endif
}
}
@ -3176,7 +3179,7 @@ namespace rsx
if (UNLIKELY(query->sync_tag > m_sync_tag))
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, query);
verify(HERE), m_sync_tag > query->sync_tag;
verify(HERE), m_sync_tag >= query->sync_tag;
}
}

View File

@ -375,9 +375,8 @@ namespace rsx
struct ZCULL_control
{
// Delay before a report update operation is forced to retire
const u32 max_zcull_delay_us = 4000;
const u32 min_zcull_delay_us = 50;
const u32 min_zcull_tick_us = 500;
const u32 max_zcull_delay_us = 300;
const u32 min_zcull_tick_us = 100;
// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
const u32 occlusion_query_count = 1024;
@ -437,7 +436,7 @@ namespace rsx
void on_draw();
// Sync hint notification
void on_sync_hint();
void on_sync_hint(void* args);
// Check for pending writes
bool has_pending() const { return !m_pending_writes.empty(); }

View File

@ -1784,7 +1784,7 @@ void VKGSRender::end()
auto &data = m_occlusion_map[m_active_query_info->driver_handle];
data.indices.push_back(occlusion_id);
data.command_buffer_to_wait = m_current_command_buffer;
data.set_sync_command_buffer(m_current_command_buffer);
m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task;
m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query);
@ -2232,7 +2232,9 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
auto& data = m_occlusion_map[occlusion_info->driver_handle];
if (data.command_buffer_to_wait != m_current_command_buffer || data.indices.empty())
// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
// This is caused by async compiler and should be removed when ubershaders are added in
if (!data.is_current(m_current_command_buffer) || data.indices.empty())
return;
// Occlusion test result evaluation is coming up, avoid a hard sync
@ -3707,7 +3709,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info
if (data.indices.empty())
return true;
if (data.command_buffer_to_wait == m_current_command_buffer)
if (data.is_current(m_current_command_buffer))
return false;
u32 oldest = data.indices.front();
@ -3722,7 +3724,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
if (query->num_draws)
{
if (data.command_buffer_to_wait == m_current_command_buffer)
if (data.is_current(m_current_command_buffer))
{
std::lock_guard lock(m_flush_queue_mutex);
flush_command_queue();
@ -3736,8 +3738,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
busy_wait();
}
// Allocation stack is FIFO and very long so no need to actually wait for fence signal
data.command_buffer_to_wait->flush();
data.sync();
// Gather data
for (const auto occlusion_id : data.indices)

View File

@ -72,6 +72,7 @@ struct command_buffer_chunk: public vk::command_buffer
std::atomic_bool pending = { false };
u64 eid_tag = 0;
u64 reset_id = 0;
shared_mutex guard_mutex;
command_buffer_chunk() = default;
@ -101,6 +102,7 @@ struct command_buffer_chunk: public vk::command_buffer
if (pending)
wait(FRAME_PRESENT_TIMEOUT);
++reset_id;
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
@ -164,6 +166,27 @@ struct occlusion_data
{
rsx::simple_array<u32> indices;
command_buffer_chunk* command_buffer_to_wait = nullptr;
u64 command_buffer_sync_id = 0;
bool is_current(command_buffer_chunk* cmd) const
{
return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id);
}
void set_sync_command_buffer(command_buffer_chunk* cmd)
{
command_buffer_to_wait = cmd;
command_buffer_sync_id = cmd->reset_id;
}
void sync()
{
if (command_buffer_to_wait->reset_id == command_buffer_sync_id)
{
// Allocation stack is FIFO and very long so no need to actually wait for fence signal
command_buffer_to_wait->flush();
}
}
};
enum frame_context_state : u32