From c7140df5f802e92b3a958dd6bab52747f151f439 Mon Sep 17 00:00:00 2001 From: nastys <@a.a> Date: Tue, 18 Jan 2022 00:25:59 +0100 Subject: [PATCH] Initial support for Apple GPUs --- rpcs3/Emu/RSX/VK/VKCompute.cpp | 7 ++++++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 5 +++++ rpcs3/Emu/RSX/VK/VKHelpers.cpp | 3 +++ rpcs3/Emu/RSX/VK/VKRenderTargets.h | 3 ++- rpcs3/Emu/RSX/VK/vkutils/chip_class.h | 6 ++++-- rpcs3/Emu/RSX/VK/vkutils/device.cpp | 7 +++++++ rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp | 1 + rpcs3/Emu/system_config.h | 11 +++++++---- 8 files changed, 35 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index e3f3560cdd..a4dcb838f6 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -79,8 +79,8 @@ namespace vk case vk::driver_vendor::NVIDIA: // Warps are multiples of 32. Increasing kernel depth seems to hurt performance (Nier, Big Duck sample) unroll_loops = true; - optimal_group_size = 32; optimal_kernel_size = 1; + optimal_group_size = 32; break; case vk::driver_vendor::AMD: case vk::driver_vendor::RADV: @@ -89,6 +89,11 @@ namespace vk optimal_kernel_size = 1; optimal_group_size = 64; break; + case vk::driver_vendor::MVK: + unroll_loops = true; + optimal_kernel_size = 1; + optimal_group_size = 256; + break; } const auto& gpu = vk::g_render_device->gpu(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 24d9eada0f..ca39562243 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -603,6 +603,11 @@ VKGSRender::VKGSRender() : GSRender() } break; #endif + case vk::driver_vendor::MVK: + // Async compute crashes immediately on Apple GPUs + rsx_log.error("Apple GPUs are incompatible with the current implementation of asynchronous texture decoding."); + backend_config.supports_asynchronous_compute = false; + break; default: break; } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index b798f4486e..69a91264d7 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -124,6 +124,9 @@ namespace vk case driver_vendor::ANV: // INTEL vulkan drivers are mostly OK, workarounds are applied when creating the device break; + case driver_vendor::MVK: + // Apple GPUs / moltenVK need more testing + break; default: rsx_log.warning("Unsupported device: %s", gpu_name); } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index ba028523c3..01794ff317 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -115,6 +115,7 @@ namespace vk [[ fallthrough ]]; case driver_vendor::NVIDIA: case driver_vendor::INTEL: + case driver_vendor::MVK: break; } @@ -453,4 +454,4 @@ namespace vk void free_invalidated(vk::command_buffer& cmd, rsx::problem_severity memory_pressure); }; } -//h \ No newline at end of file +//h diff --git a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h index 2def2d58ac..f6229f7d89 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h +++ b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h @@ -20,7 +20,8 @@ namespace vk NV_pascal, NV_volta, NV_turing, - NV_ampere + NV_ampere, + MVK_apple }; enum class driver_vendor @@ -30,7 +31,8 @@ namespace vk NVIDIA, RADV, INTEL, - ANV + ANV, + MVK }; driver_vendor get_driver_vendor(); diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 4837e89966..991d951f9e 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -183,9 +183,16 @@ namespace vk driver_vendor physical_device::get_driver_vendor() const { +#ifdef __APPLE__ + // moltenVK currently returns DRIVER_ID_MOLTENVK (0). + // For now, assume the vendor is moltenVK on Apple devices. + return driver_vendor::MVK; +#endif + if (!driver_properties.driverID) { const auto gpu_name = get_name(); + if (gpu_name.find("Radeon") != umax) { return driver_vendor::AMD; diff --git a/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp b/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp index c948a09825..e6ebd01ee6 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp +++ b/rpcs3/Emu/RSX/VK/vkutils/swapchain.hpp @@ -522,6 +522,7 @@ namespace vk case driver_vendor::AMD: case driver_vendor::INTEL: case driver_vendor::RADV: + case driver_vendor::MVK: break; case driver_vendor::ANV: case driver_vendor::NVIDIA: diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index f313c0d452..955d8cbb4e 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -73,11 +73,10 @@ struct cfg_root : cfg::node cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways - cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy", -#ifdef __linux__ - sleep_timers_accuracy_level::_as_host, true }; +#if defined (__linux__) || defined (__APPLE__) + cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true }; #else - sleep_timers_accuracy_level::_usleep, true }; + cfg::_enum sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_usleep, true }; #endif cfg::uint64 perf_report_threshold{this, "Performance Report Threshold", 500, true}; // In µs, 0.5ms = default, 0 = everything @@ -134,7 +133,11 @@ struct cfg_root : cfg::node cfg::_bool disable_vulkan_mem_allocator{ this, "Disable Vulkan Memory Allocator", false }; cfg::_bool full_rgb_range_output{ this, "Use full RGB output range", true, true }; // Video out dynamic range cfg::_bool strict_texture_flushing{ this, "Strict Texture Flushing", false }; +#ifdef __APPLE__ + cfg::_bool disable_native_float16{ this, "Disable native float16 support", true }; +#else cfg::_bool disable_native_float16{ this, "Disable native float16 support", false }; +#endif cfg::_bool multithreaded_rsx{ this, "Multithreaded RSX", false }; cfg::_bool relaxed_zcull_sync{ this, "Relaxed ZCULL Sync", false }; cfg::_bool enable_3d{ this, "Enable 3D", false };