mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
rsx/vk: Redesign how conditional rendering hints work
- Pass a sync address to the backend - Ignore the hint if the query is running in lazy mode - Do not submit CBs too close to each other. Submits are expensive
This commit is contained in:
parent
0244c4046e
commit
7fa521a046
@ -2569,7 +2569,7 @@ namespace rsx
|
||||
if (!result.queries.empty())
|
||||
{
|
||||
cond_render_ctrl.set_eval_sources(result.queries);
|
||||
sync_hint(FIFO_hint::hint_conditional_render_eval, cond_render_ctrl.eval_sources.front());
|
||||
sync_hint(FIFO_hint::hint_conditional_render_eval, reinterpret_cast<void*>(ref));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -356,6 +356,8 @@ namespace rsx
|
||||
struct frame_statistics_t
|
||||
{
|
||||
u32 draw_calls;
|
||||
u32 submit_count;
|
||||
|
||||
s64 setup_time;
|
||||
s64 vertex_upload_time;
|
||||
s64 textures_upload_time;
|
||||
@ -434,7 +436,6 @@ namespace rsx
|
||||
|
||||
s32 m_skip_frame_ctr = 0;
|
||||
bool skip_current_frame = false;
|
||||
frame_statistics_t stats{};
|
||||
|
||||
backend_configuration backend_config{};
|
||||
|
||||
@ -793,6 +794,9 @@ namespace rsx
|
||||
// Get RSX approximate load in %
|
||||
u32 get_load();
|
||||
|
||||
// Get stats object
|
||||
frame_statistics_t& get_stats() { return m_frame_stats; }
|
||||
|
||||
// Returns true if the current thread is the active RSX thread
|
||||
inline bool is_current_thread() const
|
||||
{
|
||||
|
@ -3,18 +3,6 @@
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
static inline std::string_view location_tostring(u32 location)
|
||||
{
|
||||
ensure(location < 2);
|
||||
const char* location_names[] = {"CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN"};
|
||||
return location_names[location];
|
||||
}
|
||||
|
||||
static inline u32 classify_location(u32 address)
|
||||
{
|
||||
return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
|
||||
}
|
||||
|
||||
namespace reports
|
||||
{
|
||||
ZCULL_control::ZCULL_control()
|
||||
@ -783,7 +771,7 @@ namespace rsx
|
||||
|
||||
void ZCULL_control::on_report_enqueued(vm::addr_t address)
|
||||
{
|
||||
const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
|
||||
const auto location = rsx::classify_location(address);
|
||||
std::scoped_lock lock(m_pages_mutex);
|
||||
|
||||
if (!m_pages_accessed[location]) [[ likely ]]
|
||||
@ -806,7 +794,7 @@ namespace rsx
|
||||
|
||||
void ZCULL_control::on_report_completed(vm::addr_t address)
|
||||
{
|
||||
const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
|
||||
const auto location = rsx::classify_location(address);
|
||||
if (!m_pages_accessed[location])
|
||||
{
|
||||
const auto page_address = static_cast<u32>(address) & ~0xfff;
|
||||
@ -820,7 +808,7 @@ namespace rsx
|
||||
ensure(page.has_refs());
|
||||
page.release();
|
||||
|
||||
if (!page.has_refs())
|
||||
if (!page.has_refs() && location != CELL_GCM_LOCATION_LOCAL)
|
||||
{
|
||||
if (page.prot != utils::protection::rw)
|
||||
{
|
||||
|
@ -14,6 +14,18 @@ namespace rsx
|
||||
{
|
||||
class thread;
|
||||
|
||||
static inline std::string_view location_tostring(u32 location)
|
||||
{
|
||||
ensure(location < 2);
|
||||
const char* location_names[] = { "CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN" };
|
||||
return location_names[location];
|
||||
}
|
||||
|
||||
static inline u32 classify_location(u32 address)
|
||||
{
|
||||
return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
|
||||
}
|
||||
|
||||
namespace reports
|
||||
{
|
||||
struct occlusion_query_info
|
||||
@ -173,6 +185,9 @@ namespace rsx
|
||||
// Check paging issues
|
||||
bool on_access_violation(u32 address);
|
||||
|
||||
// Optimization check
|
||||
bool is_query_result_urgent(u32 address) const { return m_pages_accessed[rsx::classify_location(address)]; }
|
||||
|
||||
// Backend methods (optional, will return everything as always visible by default)
|
||||
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
|
||||
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
|
||||
|
@ -50,6 +50,8 @@ namespace vk
|
||||
|
||||
void queue_submit(const queue_submit_t& submit_info, VkBool32 flush)
|
||||
{
|
||||
rsx::get_current_renderer()->get_stats().submit_count++;
|
||||
|
||||
// Access to this method must be externally synchronized.
|
||||
// Offloader is guaranteed to never call this for async flushes.
|
||||
vk::descriptors::flush();
|
||||
|
@ -1577,18 +1577,11 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
|
||||
ensure(args);
|
||||
rsx::thread::sync_hint(hint, args);
|
||||
|
||||
// Occlusion queries not enabled, do nothing
|
||||
if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task))
|
||||
{
|
||||
// Occlusion queries not enabled, do nothing
|
||||
return;
|
||||
|
||||
// Check if the required report is synced to this CB
|
||||
auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
|
||||
auto& data = m_occlusion_map[occlusion_info->driver_handle];
|
||||
|
||||
// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
|
||||
// This is caused by async compiler and should be removed when ubershaders are added in
|
||||
if (!data.is_current(m_current_command_buffer) || data.indices.empty())
|
||||
return;
|
||||
}
|
||||
|
||||
// Occlusion test result evaluation is coming up, avoid a hard sync
|
||||
switch (hint)
|
||||
@ -1597,15 +1590,45 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
|
||||
{
|
||||
// If a flush request is already enqueued, do nothing
|
||||
if (m_flush_requests.pending())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Schedule a sync on the next loop iteration
|
||||
m_flush_requests.post(false);
|
||||
m_flush_requests.remove_one();
|
||||
// If the result is not going to be read by CELL, do nothing
|
||||
const auto ref_addr = reinterpret_cast<u32>(args);
|
||||
if (!zcull_ctrl->is_query_result_urgent(ref_addr))
|
||||
{
|
||||
// No effect on CELL behaviour, it will be faster to handle this in RSX code
|
||||
return;
|
||||
}
|
||||
|
||||
// OK, cell will be accessing the results, probably.
|
||||
// Try to avoid flush spam, it is more costly to flush the CB than it is to just upload the vertex data
|
||||
// This is supposed to be an optimization afterall.
|
||||
const auto now = rsx::uclock();
|
||||
if ((now - m_last_cond_render_eval_hint) > 50)
|
||||
{
|
||||
// Schedule a sync on the next loop iteration
|
||||
m_flush_requests.post(false);
|
||||
m_flush_requests.remove_one();
|
||||
}
|
||||
|
||||
m_last_cond_render_eval_hint = now;
|
||||
break;
|
||||
}
|
||||
case rsx::FIFO_hint::hint_zcull_sync:
|
||||
{
|
||||
// Check if the required report is synced to this CB
|
||||
auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
|
||||
auto& data = m_occlusion_map[occlusion_info->driver_handle];
|
||||
|
||||
// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
|
||||
// This is caused by async compiler and should be removed when ubershaders are added in
|
||||
if (!data.is_current(m_current_command_buffer) || data.indices.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Unavoidable hard sync coming up, flush immediately
|
||||
// This heavyweight hint should be used with caution
|
||||
std::lock_guard lock(m_flush_queue_mutex);
|
||||
|
@ -173,6 +173,8 @@ private:
|
||||
shared_mutex m_flush_queue_mutex;
|
||||
vk::flush_request_task m_flush_requests;
|
||||
|
||||
ullong m_last_cond_render_eval_hint = 0;
|
||||
|
||||
// Offloader thread deadlock recovery
|
||||
rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
|
||||
utils::address_range m_offloader_fault_range;
|
||||
|
@ -747,6 +747,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
||||
|
||||
if (g_cfg.video.overlay)
|
||||
{
|
||||
// TODO: Move this to native overlay! It is both faster and easier to manage
|
||||
if (!m_text_writer)
|
||||
{
|
||||
auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
|
||||
@ -756,13 +757,14 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
||||
|
||||
m_text_writer->set_scale(m_frame->client_device_pixel_ratio());
|
||||
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("submits: %20d", info.stats.submit_count));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
|
||||
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 126, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
|
||||
|
||||
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
|
||||
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
||||
|
Loading…
Reference in New Issue
Block a user