diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 4002252958..3524de5fc1 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -585,6 +585,8 @@ namespace rsx bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour bool supports_hw_a2one; // Alpha to one bool supports_hw_conditional_render; // Conditional render + bool supports_passthrough_dma; // DMA passthrough + bool supports_asynchronous_compute; // Async compute }; struct sampled_image_descriptor_base; @@ -687,6 +689,7 @@ namespace rsx GcmZcullInfo zculls[limits::zculls_count]; void capture_frame(const std::string &name); + const backend_configuration& get_backend_config() const { return backend_config; } public: std::shared_ptr> intr_thread; diff --git a/rpcs3/Emu/RSX/VK/VKDMA.cpp b/rpcs3/Emu/RSX/VK/VKDMA.cpp index b3f0b6c0e9..14f31919fc 100644 --- a/rpcs3/Emu/RSX/VK/VKDMA.cpp +++ b/rpcs3/Emu/RSX/VK/VKDMA.cpp @@ -4,6 +4,7 @@ #include "vkutils/device.h" #include "Emu/Memory/vm.h" +#include "Emu/RSX/RSXThread.h" #include "Utilities/mutex.h" #include "util/asm.hpp" @@ -255,30 +256,16 @@ namespace vk void create_dma_block(std::unique_ptr& block, u32 base_address, usz expected_length) { - const auto vendor = g_render_device->gpu().get_driver_vendor(); bool allow_host_buffers = false; - -#if defined(_WIN32) - if (g_cfg.video.vk.asynchronous_texture_streaming) + if (rsx::get_current_renderer()->get_backend_config().supports_passthrough_dma) { - if (vendor == driver_vendor::NVIDIA) - { - allow_host_buffers = (vk::get_chip_family() != chip_class::NV_mobile_kepler) ? + allow_host_buffers = +#if defined(_WIN32) + (vk::get_driver_vendor() == driver_vendor::NVIDIA) ? test_host_pointer(base_address, expected_length) : - false; - } - else - { - allow_host_buffers = true; - } -#elif defined(__linux__) - // Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations - allow_host_buffers = (vendor != driver_vendor::AMD && vendor != driver_vendor::RADV); -#else - // Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations - // Intel chipsets woulf fail in most cases and DRM_IOCTL_i915_GEM_USERPTR unimplemented - allow_host_buffers = (vendor != driver_vendor::AMD && vendor != driver_vendor::RADV && vendor != driver_vendor::INTEL); #endif + true; + if (!allow_host_buffers) { rsx_log.trace("Requested DMA passthrough for block 0x%x->0x%x but this was not possible.", @@ -286,7 +273,7 @@ namespace vk } } - if (allow_host_buffers && g_render_device->get_external_memory_host_support()) + if (allow_host_buffers) { block.reset(new dma_block_EXT()); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ccb4089bc8..da9d804de9 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -392,25 +392,6 @@ VKGSRender::VKGSRender() : GSRender() m_device = const_cast(&m_swapchain->get_device()); vk::set_current_renderer(m_swapchain->get_device()); - // Device-specific overrides - if (g_cfg.video.vk.asynchronous_texture_streaming) - { - if (m_device->get_graphics_queue() == m_device->get_transfer_queue()) - { - rsx_log.error("Cannot run graphics and async transfer in the same queue. Async uploads are disabled. This is a limitation of your GPU"); - g_cfg.video.vk.asynchronous_texture_streaming.set(false); - } - - if (auto chip_family = vk::get_chip_family(); - chip_family == vk::chip_class::NV_kepler || - chip_family == vk::chip_class::NV_mobile_kepler || // TODO: Deprecate this classification, it just complicates things - chip_family == vk::chip_class::NV_maxwell) - { - rsx_log.error("Older NVIDIA cards do not meet requirements for asynchronous compute due to some driver fakery."); - g_cfg.video.vk.asynchronous_texture_streaming.set(false); - } - } - m_swapchain_dims.width = m_frame->client_width(); m_swapchain_dims.height = m_frame->client_height(); @@ -522,8 +503,6 @@ VKGSRender::VKGSRender() : GSRender() m_shaders_cache = std::make_unique(*m_prog_buffer, "vulkan", "v1.91"); - g_fxo->init(); - open_command_buffer(); for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i) @@ -567,6 +546,56 @@ VKGSRender::VKGSRender() : GSRender() // Relaxed query synchronization backend_config.supports_hw_conditional_render = !!g_cfg.video.relaxed_zcull_sync; + + // Async compute and related operations + if (g_cfg.video.vk.asynchronous_texture_streaming) + { + // Optimistic, enable async compute and passthrough DMA + backend_config.supports_passthrough_dma = m_device->get_external_memory_host_support(); + backend_config.supports_asynchronous_compute = true; + + if (m_device->get_graphics_queue() == m_device->get_transfer_queue()) + { + rsx_log.error("Cannot run graphics and async transfer in the same queue. Async uploads are disabled. This is a limitation of your GPU"); + backend_config.supports_asynchronous_compute = false; + } + + switch (vk::get_driver_vendor()) + { + case vk::driver_vendor::NVIDIA: + if (auto chip_family = vk::get_chip_family(); + chip_family == vk::chip_class::NV_kepler || + chip_family == vk::chip_class::NV_mobile_kepler || // TODO: Deprecate this classification, it just complicates things + chip_family == vk::chip_class::NV_maxwell) + { + rsx_log.error("Older NVIDIA cards do not meet requirements for asynchronous compute due to some driver fakery."); + backend_config.supports_asynchronous_compute = false; + } + break; +#if !defined(_WIN32) + // Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations + case vk::driver_vendor::RADV: + case vk::driver_vendor::AMD: +#if !defined(__linux__) + // Intel chipsets would fail on BSD in most cases and DRM_IOCTL_i915_GEM_USERPTR unimplemented + case vk::driver_vendor::INTEL: + if (backend_config.supports_passthrough_dma) + { + rsx_log.error("AMDGPU kernel driver on linux and INTEL driver on some platforms cannot support passthrough DMA buffers."); + backend_config.supports_passthrough_dma = false; + } +#endif + break; +#endif + default: break; + } + + if (backend_config.supports_asynchronous_compute) + { + // Run only if async compute can be used. + g_fxo->init(); + } + } } VKGSRender::~VKGSRender() @@ -578,7 +607,10 @@ VKGSRender::~VKGSRender() } // Globals. TODO: Refactor lifetime management - g_fxo->get().kill(); + if (backend_config.supports_asynchronous_compute) + { + g_fxo->get().kill(); + } //Wait for device to finish up with resources vkDeviceWaitIdle(*m_device); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 8b18342572..c211f5a001 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -50,7 +50,6 @@ namespace vk bool fence_reset_disabled(); bool emulate_conditional_rendering(); VkFlags get_heap_compatible_buffer_types(); - driver_vendor get_driver_vendor(); // Sync helpers around vkQueueSubmit void acquire_global_submit_lock(); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 41b81467e2..903d709cac 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -934,7 +934,7 @@ namespace vk } rsx::flags32_t upload_command_flags = initialize_image_layout | - (g_cfg.video.vk.asynchronous_texture_streaming? upload_contents_async : upload_contents_inline); + (rsx::get_current_renderer()->get_backend_config().supports_asynchronous_compute ? upload_contents_async : upload_contents_inline); vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, mipmaps, image->aspect(), *m_texture_upload_heap, upload_heap_align_default, upload_command_flags); diff --git a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h index 1f158b7c69..9327ab45d3 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h +++ b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h @@ -33,6 +33,8 @@ namespace vk INTEL }; + driver_vendor get_driver_vendor(); + struct chip_family_table { chip_class default_ = chip_class::unknown;