rsx/vk: Use backend configuration to track options that can be modified on a per-device basis

2024-11-22 10:42:36 +01:00 · 2021-05-25 20:04:45 +03:00 · 2021-05-25 20:04:45 +03:00 · a736350859
commit a736350859
parent 156b092dc9
6 changed files with 68 additions and 45 deletions
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@ -585,6 +585,8 @@ namespace rsx
 		bool supports_hw_renormalization;    // Should be true on NV hardware which matches PS3 texture renormalization behaviour
 		bool supports_hw_a2one;              // Alpha to one
 		bool supports_hw_conditional_render; // Conditional render
+		bool supports_passthrough_dma;       // DMA passthrough
+		bool supports_asynchronous_compute;  // Async compute
 	};

 	struct sampled_image_descriptor_base;
@ -687,6 +689,7 @@ namespace rsx
 		GcmZcullInfo zculls[limits::zculls_count];

 		void capture_frame(const std::string &name);
+		const backend_configuration& get_backend_config() const { return backend_config; }

 	public:
 		std::shared_ptr<named_thread<class ppu_thread>> intr_thread;
--- a/rpcs3/Emu/RSX/VK/VKDMA.cpp
+++ b/rpcs3/Emu/RSX/VK/VKDMA.cpp
@ -4,6 +4,7 @@
 #include "vkutils/device.h"

 #include "Emu/Memory/vm.h"
+#include "Emu/RSX/RSXThread.h"
 #include "Utilities/mutex.h"

 #include "util/asm.hpp"
@ -255,30 +256,16 @@ namespace vk

 	void create_dma_block(std::unique_ptr<dma_block>& block, u32 base_address, usz expected_length)
 	{
-		const auto vendor = g_render_device->gpu().get_driver_vendor();
 		bool allow_host_buffers = false;
-
-#if defined(_WIN32)
-		if (g_cfg.video.vk.asynchronous_texture_streaming)
+		if (rsx::get_current_renderer()->get_backend_config().supports_passthrough_dma)
 		{
-			if (vendor == driver_vendor::NVIDIA)
-			{
-				allow_host_buffers = (vk::get_chip_family() != chip_class::NV_mobile_kepler) ?
+			allow_host_buffers =
+#if defined(_WIN32)
+				(vk::get_driver_vendor() == driver_vendor::NVIDIA) ?
 					test_host_pointer(base_address, expected_length) :
-					false;
-			}
-			else
-			{
-				allow_host_buffers = true;
-			}
-#elif defined(__linux__)
-			// Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations
-			allow_host_buffers = (vendor != driver_vendor::AMD && vendor != driver_vendor::RADV);
-#else
-			// Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations
-			// Intel chipsets woulf fail in most cases and DRM_IOCTL_i915_GEM_USERPTR unimplemented
-			allow_host_buffers = (vendor != driver_vendor::AMD && vendor != driver_vendor::RADV && vendor != driver_vendor::INTEL);
 #endif
+				true;
+
 			if (!allow_host_buffers)
 			{
 				rsx_log.trace("Requested DMA passthrough for block 0x%x->0x%x but this was not possible.",
@ -286,7 +273,7 @@ namespace vk
 			}
 		}

-		if (allow_host_buffers && g_render_device->get_external_memory_host_support())
+		if (allow_host_buffers)
 		{
 			block.reset(new dma_block_EXT());
 		}
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -392,25 +392,6 @@ VKGSRender::VKGSRender() : GSRender()
 	m_device = const_cast<vk::render_device*>(&m_swapchain->get_device());
 	vk::set_current_renderer(m_swapchain->get_device());

-	// Device-specific overrides
-	if (g_cfg.video.vk.asynchronous_texture_streaming)
-	{
-		if (m_device->get_graphics_queue() == m_device->get_transfer_queue())
-		{
-			rsx_log.error("Cannot run graphics and async transfer in the same queue. Async uploads are disabled. This is a limitation of your GPU");
-			g_cfg.video.vk.asynchronous_texture_streaming.set(false);
-		}
-
-		if (auto chip_family = vk::get_chip_family();
-			chip_family == vk::chip_class::NV_kepler ||
-			chip_family == vk::chip_class::NV_mobile_kepler || // TODO: Deprecate this classification, it just complicates things
-			chip_family == vk::chip_class::NV_maxwell)
-		{
-			rsx_log.error("Older NVIDIA cards do not meet requirements for asynchronous compute due to some driver fakery.");
-			g_cfg.video.vk.asynchronous_texture_streaming.set(false);
-		}
-	}
-
 	m_swapchain_dims.width = m_frame->client_width();
 	m_swapchain_dims.height = m_frame->client_height();

@ -522,8 +503,6 @@ VKGSRender::VKGSRender() : GSRender()

 	m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.91");

-	g_fxo->init<vk::async_scheduler_thread>();
-
 	open_command_buffer();

 	for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
@ -567,6 +546,56 @@ VKGSRender::VKGSRender() : GSRender()

 	// Relaxed query synchronization
 	backend_config.supports_hw_conditional_render = !!g_cfg.video.relaxed_zcull_sync;
+
+	// Async compute and related operations
+	if (g_cfg.video.vk.asynchronous_texture_streaming)
+	{
+		// Optimistic, enable async compute and passthrough DMA
+		backend_config.supports_passthrough_dma = m_device->get_external_memory_host_support();
+		backend_config.supports_asynchronous_compute = true;
+
+		if (m_device->get_graphics_queue() == m_device->get_transfer_queue())
+		{
+			rsx_log.error("Cannot run graphics and async transfer in the same queue. Async uploads are disabled. This is a limitation of your GPU");
+			backend_config.supports_asynchronous_compute = false;
+		}
+
+		switch (vk::get_driver_vendor())
+		{
+		case vk::driver_vendor::NVIDIA:
+			if (auto chip_family = vk::get_chip_family();
+				chip_family == vk::chip_class::NV_kepler ||
+				chip_family == vk::chip_class::NV_mobile_kepler || // TODO: Deprecate this classification, it just complicates things
+				chip_family == vk::chip_class::NV_maxwell)
+			{
+				rsx_log.error("Older NVIDIA cards do not meet requirements for asynchronous compute due to some driver fakery.");
+				backend_config.supports_asynchronous_compute = false;
+			}
+			break;
+#if !defined(_WIN32)
+			// Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations
+		case vk::driver_vendor::RADV:
+		case vk::driver_vendor::AMD:
+#if !defined(__linux__)
+			// Intel chipsets would fail on BSD in most cases and DRM_IOCTL_i915_GEM_USERPTR unimplemented
+		case vk::driver_vendor::INTEL:
+			if (backend_config.supports_passthrough_dma)
+			{
+				rsx_log.error("AMDGPU kernel driver on linux and INTEL driver on some platforms cannot support passthrough DMA buffers.");
+				backend_config.supports_passthrough_dma = false;
+			}
+#endif
+			break;
+#endif
+		default: break;
+		}
+
+		if (backend_config.supports_asynchronous_compute)
+		{
+			// Run only if async compute can be used.
+			g_fxo->init<vk::async_scheduler_thread>();
+		}
+	}
 }

 VKGSRender::~VKGSRender()
@ -578,7 +607,10 @@ VKGSRender::~VKGSRender()
 	}

 	// Globals. TODO: Refactor lifetime management
-	g_fxo->get<vk::async_scheduler_thread>().kill();
+	if (backend_config.supports_asynchronous_compute)
+	{
+		g_fxo->get<vk::async_scheduler_thread>().kill();
+	}

 	//Wait for device to finish up with resources
 	vkDeviceWaitIdle(*m_device);
--- a/rpcs3/Emu/RSX/VK/VKHelpers.h
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.h
@ -50,7 +50,6 @@ namespace vk
 	bool fence_reset_disabled();
 	bool emulate_conditional_rendering();
 	VkFlags get_heap_compatible_buffer_types();
-	driver_vendor get_driver_vendor();

 	// Sync helpers around vkQueueSubmit
 	void acquire_global_submit_lock();
--- a/rpcs3/Emu/RSX/VK/VKTextureCache.h
+++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h
@ -934,7 +934,7 @@ namespace vk
 			}

 			rsx::flags32_t upload_command_flags = initialize_image_layout |
-				(g_cfg.video.vk.asynchronous_texture_streaming? upload_contents_async : upload_contents_inline);
+				(rsx::get_current_renderer()->get_backend_config().supports_asynchronous_compute ? upload_contents_async : upload_contents_inline);

 			vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, image->aspect(),
 				*m_texture_upload_heap, upload_heap_align_default, upload_command_flags);
--- a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
+++ b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
@ -33,6 +33,8 @@ namespace vk
 		INTEL
 	};

+	driver_vendor get_driver_vendor();
+
 	struct chip_family_table
 	{
 		chip_class default_ = chip_class::unknown;