rsx/vk: Redesign how conditional rendering hints work

- Pass a sync address to the backend - Ignore the hint if the query is running in lazy mode - Do not submit CBs too close to each other. Submits are expensive
2024-11-22 18:53:28 +01:00 · 2022-05-14 17:43:43 +03:00 · 2022-05-14 17:43:43 +03:00 · 7fa521a046
commit 7fa521a046
parent 0244c4046e
8 changed files with 73 additions and 37 deletions
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@ -2569,7 +2569,7 @@ namespace rsx
 			if (!result.queries.empty())
 			{
 				cond_render_ctrl.set_eval_sources(result.queries);
-				sync_hint(FIFO_hint::hint_conditional_render_eval, cond_render_ctrl.eval_sources.front());
+				sync_hint(FIFO_hint::hint_conditional_render_eval, reinterpret_cast<void*>(ref));
 			}
 			else
 			{
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@ -356,6 +356,8 @@ namespace rsx
 	struct frame_statistics_t
 	{
 		u32 draw_calls;
+		u32 submit_count;
+
 		s64 setup_time;
 		s64 vertex_upload_time;
 		s64 textures_upload_time;
@ -434,7 +436,6 @@ namespace rsx

 		s32 m_skip_frame_ctr = 0;
 		bool skip_current_frame = false;
-		frame_statistics_t stats{};

 		backend_configuration backend_config{};

@ -793,6 +794,9 @@ namespace rsx
 		// Get RSX approximate load in %
 		u32 get_load();

+		// Get stats object
+		frame_statistics_t& get_stats() { return m_frame_stats; }
+
 		// Returns true if the current thread is the active RSX thread
 		inline bool is_current_thread() const
 		{
--- a/rpcs3/Emu/RSX/RSXZCULL.cpp
+++ b/rpcs3/Emu/RSX/RSXZCULL.cpp
@ -3,18 +3,6 @@

 namespace rsx
 {
-	static inline std::string_view location_tostring(u32 location)
-	{
-		ensure(location < 2);
-		const char* location_names[] = {"CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN"};
-		return location_names[location];
-	}
-
-	static inline u32 classify_location(u32 address)
-	{
-		return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
-	}
-
 	namespace reports
 	{
 		ZCULL_control::ZCULL_control()
@ -783,7 +771,7 @@ namespace rsx

 		void ZCULL_control::on_report_enqueued(vm::addr_t address)
 		{
-			const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
+			const auto location = rsx::classify_location(address);
 			std::scoped_lock lock(m_pages_mutex);

 			if (!m_pages_accessed[location]) [[ likely ]]
@ -806,7 +794,7 @@ namespace rsx

 		void ZCULL_control::on_report_completed(vm::addr_t address)
 		{
-			const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
+			const auto location = rsx::classify_location(address);
 			if (!m_pages_accessed[location])
 			{
 				const auto page_address = static_cast<u32>(address) & ~0xfff;
@ -820,7 +808,7 @@ namespace rsx
 					ensure(page.has_refs());
 					page.release();

-					if (!page.has_refs())
+					if (!page.has_refs() && location != CELL_GCM_LOCATION_LOCAL)
 					{
 						if (page.prot != utils::protection::rw)
 						{
--- a/rpcs3/Emu/RSX/RSXZCULL.h
+++ b/rpcs3/Emu/RSX/RSXZCULL.h
@ -14,6 +14,18 @@ namespace rsx
 {
 	class thread;

+	static inline std::string_view location_tostring(u32 location)
+	{
+		ensure(location < 2);
+		const char* location_names[] = { "CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN" };
+		return location_names[location];
+	}
+
+	static inline u32 classify_location(u32 address)
+	{
+		return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
+	}
+
 	namespace reports
 	{
 		struct occlusion_query_info
@ -173,6 +185,9 @@ namespace rsx
 			// Check paging issues
 			bool on_access_violation(u32 address);

+			// Optimization check
+			bool is_query_result_urgent(u32 address) const { return m_pages_accessed[rsx::classify_location(address)]; }
+
 			// Backend methods (optional, will return everything as always visible by default)
 			virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
 			virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
--- a/rpcs3/Emu/RSX/VK/VKCommandStream.cpp
+++ b/rpcs3/Emu/RSX/VK/VKCommandStream.cpp
@ -50,6 +50,8 @@ namespace vk

 	void queue_submit(const queue_submit_t& submit_info, VkBool32 flush)
 	{
+		rsx::get_current_renderer()->get_stats().submit_count++;
+
 		// Access to this method must be externally synchronized.
 		// Offloader is guaranteed to never call this for async flushes.
 		vk::descriptors::flush();
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -1577,18 +1577,11 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
 	ensure(args);
 	rsx::thread::sync_hint(hint, args);

-	// Occlusion queries not enabled, do nothing
 	if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task))
+	{
+		// Occlusion queries not enabled, do nothing
 		return;
-
-	// Check if the required report is synced to this CB
-	auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
-	auto& data = m_occlusion_map[occlusion_info->driver_handle];
-
-	// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
-	// This is caused by async compiler and should be removed when ubershaders are added in
-	if (!data.is_current(m_current_command_buffer) || data.indices.empty())
-		return;
+	}

 	// Occlusion test result evaluation is coming up, avoid a hard sync
 	switch (hint)
@ -1597,15 +1590,45 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
 	{
 		// If a flush request is already enqueued, do nothing
 		if (m_flush_requests.pending())
+		{
 			return;
+		}

+		// If the result is not going to be read by CELL, do nothing
+		const auto ref_addr = reinterpret_cast<u32>(args);
+		if (!zcull_ctrl->is_query_result_urgent(ref_addr))
+		{
+			// No effect on CELL behaviour, it will be faster to handle this in RSX code
+			return;
+		}
+
+		// OK, cell will be accessing the results, probably.
+		// Try to avoid flush spam, it is more costly to flush the CB than it is to just upload the vertex data
+		// This is supposed to be an optimization afterall.
+		const auto now = rsx::uclock();
+		if ((now - m_last_cond_render_eval_hint) > 50)
+		{
 			// Schedule a sync on the next loop iteration
 			m_flush_requests.post(false);
 			m_flush_requests.remove_one();
+		}
+
+		m_last_cond_render_eval_hint = now;
 		break;
 	}
 	case rsx::FIFO_hint::hint_zcull_sync:
 	{
+		// Check if the required report is synced to this CB
+		auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
+		auto& data = m_occlusion_map[occlusion_info->driver_handle];
+
+		// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
+		// This is caused by async compiler and should be removed when ubershaders are added in
+		if (!data.is_current(m_current_command_buffer) || data.indices.empty())
+		{
+			return;
+		}
+
 		// Unavoidable hard sync coming up, flush immediately
 		// This heavyweight hint should be used with caution
 		std::lock_guard lock(m_flush_queue_mutex);
--- a/rpcs3/Emu/RSX/VK/VKGSRender.h
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.h
@ -173,6 +173,8 @@ private:
 	shared_mutex m_flush_queue_mutex;
 	vk::flush_request_task m_flush_requests;

+	ullong m_last_cond_render_eval_hint = 0;
+
 	// Offloader thread deadlock recovery
 	rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
 	utils::address_range m_offloader_fault_range;
--- a/rpcs3/Emu/RSX/VK/VKPresent.cpp
+++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp
@ -747,6 +747,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)

 		if (g_cfg.video.overlay)
 		{
+			// TODO: Move this to native overlay! It is both faster and easier to manage
 			if (!m_text_writer)
 			{
 				auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
@ -758,11 +759,12 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)

 			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4,  0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load:                 %3d%%", get_load()));
 			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
-			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4,  36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
-			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4,  54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
-			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4,  72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
-			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4,  90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
-			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
+			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("submits: %20d", info.stats.submit_count));
+			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
+			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
+			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
+			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
+			m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 126, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));

 			const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
 			const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);