From 57196f0504138ff5d1e7074e34d679012bd0bd4b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 9 Jun 2019 12:48:19 +0300 Subject: [PATCH] vk: Move frame present synchronization to the driver - Just use a semaphore and let the driver handle it instead of manual framepacing. We lose framepace control but drivers have matured in the past few years so it should work fine. --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 181 ++++++++++++------------------ rpcs3/Emu/RSX/VK/VKGSRender.h | 17 ++- rpcs3/Emu/RSX/VK/VKHelpers.h | 32 ++++-- rpcs3/Emu/RSX/VK/VKTextureCache.h | 4 +- 4 files changed, 109 insertions(+), 125 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 9d0b48ebac..65e5e5a5ef 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -505,7 +505,8 @@ VKGSRender::VKGSRender() : GSRender() for (auto &ctx : frame_context_storage) { - vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_semaphore); + vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_wait_semaphore); + vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.acquire_signal_semaphore); ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size()), DESCRIPTOR_MAX_DRAW_CALLS, 1); } @@ -616,7 +617,8 @@ VKGSRender::~VKGSRender() //NOTE: aux_context uses descriptor pools borrowed from the main queues and any allocations will be automatically freed when pool is destroyed for (auto &ctx : frame_context_storage) { - vkDestroySemaphore((*m_device), ctx.present_semaphore, nullptr); + vkDestroySemaphore((*m_device), ctx.present_wait_semaphore, nullptr); + vkDestroySemaphore((*m_device), ctx.acquire_signal_semaphore, nullptr); ctx.descriptor_pool.destroy(); ctx.buffer_views_to_clean.clear(); @@ -826,15 +828,11 @@ void VKGSRender::check_heap_status(u32 flags) std::chrono::time_point submit_start = steady_clock::now(); frame_context_t *target_frame = nullptr; - u64 earliest_sync_time = UINT64_MAX; - for (s32 i = 0; i < VK_MAX_ASYNC_FRAMES; ++i) + if (!m_queued_frames.empty()) { - auto ctx = &frame_context_storage[i]; - if (ctx->swap_command_buffer) + if (m_current_frame != &m_aux_frame_context) { - if (ctx->last_frame_sync_time > m_last_heap_sync_time && - ctx->last_frame_sync_time < earliest_sync_time) - target_frame = ctx; + target_frame = m_queued_frames.front(); } } @@ -857,14 +855,8 @@ void VKGSRender::check_heap_status(u32 flags) } else { - target_frame->swap_command_buffer->poke(); - while (target_frame->swap_command_buffer->pending) - { - if (!target_frame->swap_command_buffer->poke()) - std::this_thread::yield(); - } - - process_swap_request(target_frame, true); + // Flush the frame context + frame_context_cleanup(target_frame, true); } std::chrono::time_point submit_end = steady_clock::now(); @@ -874,7 +866,7 @@ void VKGSRender::check_heap_status(u32 flags) void VKGSRender::check_present_status() { - if (!m_queued_frames.empty()) + while (!m_queued_frames.empty()) { auto ctx = m_queued_frames.front(); if (ctx->swap_command_buffer->pending) @@ -885,7 +877,7 @@ void VKGSRender::check_present_status() } } - process_swap_request(ctx, true); + frame_context_cleanup(ctx, true); } } @@ -1061,10 +1053,6 @@ void VKGSRender::begin() m_current_frame->flags &= ~frame_context_state::dirty; } - else - { - check_present_status(); - } } void VKGSRender::update_draw_state() @@ -2177,24 +2165,27 @@ void VKGSRender::clear_surface(u32 mask) void VKGSRender::flush_command_queue(bool hard_sync) { - close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence); + close_and_submit_command_buffer(m_current_command_buffer->submit_fence); if (hard_sync) { - // swap handler checks the pending flag, so call it here - process_swap_request(m_current_frame); - - //wait for the latest instruction to execute + // wait for the latest instruction to execute m_current_command_buffer->pending = true; m_current_command_buffer->reset(); - //Clear all command buffer statuses + // Clear all command buffer statuses for (auto &cb : m_primary_cb_list) { if (cb.pending) cb.poke(); } + // Drain present queue + while (!m_queued_frames.empty()) + { + check_present_status(); + } + m_flush_requests.clear_pending_flag(); } else @@ -2206,14 +2197,10 @@ void VKGSRender::flush_command_queue(bool hard_sync) m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; - // Soft sync if a present has not yet occured before consuming the wait event - for (auto &ctx : frame_context_storage) - { - if (ctx.swap_command_buffer == m_current_command_buffer) - process_swap_request(&ctx, true); - } - m_current_command_buffer->reset(); + + // Just in case a queued frame holds a ref to this cb, drain the present queue + check_present_status(); } open_command_buffer(); @@ -2276,7 +2263,7 @@ void VKGSRender::present(frame_context_t *ctx) if (!present_surface_dirty_flag) { - switch (VkResult error = m_swapchain->present(ctx->present_image)) + switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image)) { case VK_SUCCESS: break; @@ -2292,62 +2279,44 @@ void VKGSRender::present(frame_context_t *ctx) // Presentation image released; reset value ctx->present_image = UINT32_MAX; - - // Remove from queued list - while (!m_queued_frames.empty()) - { - auto frame = m_queued_frames.front(); - m_queued_frames.pop_front(); - - if (frame == ctx) - { - break; - } - } - - vk::advance_completed_frame_counter(); } void VKGSRender::queue_swap_request() { - // Buffer the swap request and return - if (m_current_frame->swap_command_buffer && - m_current_frame->swap_command_buffer->pending) - { - // Its probable that no actual drawing took place - process_swap_request(m_current_frame); - } - + verify(HERE), !m_current_frame->swap_command_buffer; m_current_frame->swap_command_buffer = m_current_command_buffer; if (m_swapchain->is_headless()) { m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image); - close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence); + close_and_submit_command_buffer(m_current_command_buffer->submit_fence); } else { - close_and_submit_command_buffer({ m_current_frame->present_semaphore }, - m_current_command_buffer->submit_fence, + close_and_submit_command_buffer(m_current_command_buffer->submit_fence, + m_current_frame->acquire_signal_semaphore, + m_current_frame->present_wait_semaphore, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT); } + // Set up a present request for this frame as well + present(m_current_frame); + m_current_frame->swap_command_buffer->pending = true; - //Grab next cb in line and make it usable + // Grab next cb in line and make it usable m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; m_current_command_buffer->reset(); - //Set up new pointers for the next frame + // Set up new pointers for the next frame advance_queued_frames(); open_command_buffer(); } -void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) +void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources) { - if (!ctx->swap_command_buffer) - return; + verify(HERE), ctx->swap_command_buffer; if (ctx->swap_command_buffer->pending) { @@ -2362,9 +2331,6 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) free_resources = true; } - //Always present - present(ctx); - if (free_resources) { if (g_cfg.video.overlay) @@ -2425,6 +2391,20 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) } ctx->swap_command_buffer = nullptr; + + // Remove from queued list + while (!m_queued_frames.empty()) + { + auto frame = m_queued_frames.front(); + m_queued_frames.pop_front(); + + if (frame == ctx) + { + break; + } + } + + vk::advance_completed_frame_counter(); } void VKGSRender::do_local_task(rsx::FIFO_state state) @@ -2797,7 +2777,7 @@ void VKGSRender::write_buffers() { } -void VKGSRender::close_and_submit_command_buffer(const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) +void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags) { if (m_attrib_ring_info.dirty() || m_fragment_env_ring_info.dirty() || @@ -2823,12 +2803,16 @@ void VKGSRender::close_and_submit_command_buffer(const std::vector m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer); m_secondary_command_buffer.end(); - m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), + VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); } m_current_command_buffer->end(); m_current_command_buffer->tag(); - m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), semaphores, fence, pipeline_stage_flags); + + m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), + wait_semaphore, signal_semaphore, fence, pipeline_stage_flags); } void VKGSRender::open_command_buffer() @@ -3065,19 +3049,14 @@ void VKGSRender::reinitialize_swapchain() const auto new_width = m_frame->client_width(); const auto new_height = m_frame->client_height(); - //Reject requests to acquire new swapchain if the window is minimized - //The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized - //However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state + // Reject requests to acquire new swapchain if the window is minimized + // The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized + // However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state if (new_width == 0 || new_height == 0) return; - /** - * Waiting for the commands to process does not work reliably as the fence can be signaled before swap images are released - * and there are no explicit methods to ensure that the presentation engine is not using the images at all. - */ - - //NOTE: This operation will create a hard sync point - close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence); + // NOTE: This operation will create a hard sync point + close_and_submit_command_buffer(m_current_command_buffer->submit_fence); m_current_command_buffer->pending = true; m_current_command_buffer->reset(); @@ -3087,13 +3066,11 @@ void VKGSRender::reinitialize_swapchain() continue; // Release present image by presenting it - ctx.swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT); - ctx.swap_command_buffer = nullptr; - present(&ctx); + frame_context_cleanup(&ctx, true); } - // Remove any old refs to the old images as they are about to be destroyed - //m_framebuffers_to_clean; + // Drain all the queues + vkDeviceWaitIdle(*m_device); // Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call if (!m_swapchain->init(new_width, new_height)) @@ -3108,7 +3085,7 @@ void VKGSRender::reinitialize_swapchain() m_client_width = new_width; m_client_height = new_height; - //Prepare new swapchain images for use + // Prepare new swapchain images for use open_command_buffer(); for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i) @@ -3131,7 +3108,7 @@ void VKGSRender::reinitialize_swapchain() vkCreateFence((*m_device), &infos, nullptr, &resize_fence); //Flush the command buffer - close_and_submit_command_buffer({}, resize_fence); + close_and_submit_command_buffer(resize_fence); vk::wait_for_fence(resize_fence); vkDestroyFence((*m_device), resize_fence, nullptr); @@ -3167,12 +3144,11 @@ void VKGSRender::flip(int buffer, bool emu_flip) m_current_frame = &frame_context_storage[m_current_queue_index]; if (m_current_frame->swap_command_buffer) { - // Always present if pending swap is present. // Its possible this flip request is triggered by overlays and the flip queue is in undefined state - process_swap_request(m_current_frame, true); + frame_context_cleanup(m_current_frame, true); } - //swap aux storage and current frame; aux storage should always be ready for use at all times + // Swap aux storage and current frame; aux storage should always be ready for use at all times m_current_frame->swap_storage(m_aux_frame_context); m_current_frame->grab_resources(m_aux_frame_context); } @@ -3184,8 +3160,8 @@ void VKGSRender::flip(int buffer, bool emu_flip) LOG_ERROR(RSX, "Possible data corruption on frame context storage detected"); } - //There were no draws and back-to-back flips happened - process_swap_request(m_current_frame, true); + // There were no draws and back-to-back flips happened + frame_context_cleanup(m_current_frame, true); } if (present_surface_dirty_flag) @@ -3248,7 +3224,7 @@ void VKGSRender::flip(int buffer, bool emu_flip) verify(HERE), m_current_frame->swap_command_buffer == nullptr; u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull; - while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->present_semaphore, timeout, &m_current_frame->present_image)) + while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image)) { switch (status) { @@ -3263,18 +3239,7 @@ void VKGSRender::flip(int buffer, bool emu_flip) //Whatever returned from status, this is now a spin timeout = 0ull; - for (auto &ctx : frame_context_storage) - { - if (ctx.swap_command_buffer) - { - ctx.swap_command_buffer->poke(); - if (!ctx.swap_command_buffer->pending) - { - //Release in case there is competition for frame resources - process_swap_request(&ctx, true); - } - } - } + check_present_status(); continue; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index fd47a51428..9d64e7abba 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -165,7 +165,8 @@ enum frame_context_state : u32 struct frame_context_t { - VkSemaphore present_semaphore = VK_NULL_HANDLE; + VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE; + VkSemaphore present_wait_semaphore = VK_NULL_HANDLE; VkDescriptorSet descriptor_set = VK_NULL_HANDLE; vk::descriptor_pool descriptor_pool; @@ -194,7 +195,8 @@ struct frame_context_t //Copy shareable information void grab_resources(frame_context_t &other) { - present_semaphore = other.present_semaphore; + present_wait_semaphore = other.present_wait_semaphore; + acquire_signal_semaphore = other.acquire_signal_semaphore; descriptor_set = other.descriptor_set; descriptor_pool = other.descriptor_pool; used_descriptors = other.used_descriptors; @@ -482,13 +484,18 @@ public: private: void clear_surface(u32 mask); - void close_and_submit_command_buffer(const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - void open_command_buffer(); void prepare_rtts(rsx::framebuffer_creation_context context); + void open_command_buffer(); + void close_and_submit_command_buffer( + VkFence fence = VK_NULL_HANDLE, + VkSemaphore wait_semaphore = VK_NULL_HANDLE, + VkSemaphore signal_semaphore = VK_NULL_HANDLE, + VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + void flush_command_queue(bool hard_sync = false); void queue_swap_request(); - void process_swap_request(frame_context_t *ctx, bool free_resources = false); + void frame_context_cleanup(frame_context_t *ctx, bool free_resources = false); void advance_queued_frames(); void present(frame_context_t *ctx); void reinitialize_swapchain(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index b0e9cb32de..e37c3e62ff 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -947,7 +947,7 @@ namespace vk is_open = false; } - void submit(VkQueue queue, const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) + void submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) { if (is_open) { @@ -955,19 +955,29 @@ namespace vk return; } - if (fence == VK_NULL_HANDLE) + if (!fence) { fence = m_submit_fence; is_pending = (fence != VK_NULL_HANDLE); } VkSubmitInfo infos = {}; + infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; infos.commandBufferCount = 1; infos.pCommandBuffers = &commands; infos.pWaitDstStageMask = &pipeline_stage_flags; - infos.pWaitSemaphores = semaphores.data(); - infos.waitSemaphoreCount = static_cast(semaphores.size()); - infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + + if (wait_semaphore) + { + infos.waitSemaphoreCount = 1; + infos.pWaitSemaphores = &wait_semaphore; + } + + if (signal_semaphore) + { + infos.signalSemaphoreCount = 1; + infos.pSignalSemaphores = &signal_semaphore; + } acquire_global_submit_lock(); CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); @@ -1637,7 +1647,7 @@ public: virtual VkImage& get_image(u32 index) = 0; virtual VkResult acquire_next_swapchain_image(VkSemaphore semaphore, u64 timeout, u32* result) = 0; virtual void end_frame(command_buffer& cmd, u32 index) = 0; - virtual VkResult present(u32 index) = 0; + virtual VkResult present(VkSemaphore semaphore, u32 index) = 0; virtual VkImageLayout get_optimal_present_layout() = 0; virtual bool supports_automatic_wm_reports() const @@ -1769,7 +1779,7 @@ public: dev.destroy(); } - VkResult present(u32 image) override + VkResult present(VkSemaphore /*semaphore*/, u32 image) override { auto& src = swapchain_images[image]; GdiFlush(); @@ -1826,7 +1836,7 @@ public: dev.destroy(); } - VkResult present(u32 index) override + VkResult present(VkSemaphore /*semaphore*/, u32 index) override { fmt::throw_exception("Native macOS swapchain is not implemented yet!"); } @@ -1914,7 +1924,7 @@ public: dev.destroy(); } - VkResult present(u32 index) override + VkResult present(VkSemaphore /*semaphore*/, u32 index) override { auto& src = swapchain_images[index]; if (pixmap) @@ -2216,7 +2226,7 @@ public: { } - VkResult present(u32 image) override + VkResult present(VkSemaphore semaphore, u32 image) override { VkPresentInfoKHR present = {}; present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; @@ -2224,6 +2234,8 @@ public: present.swapchainCount = 1; present.pSwapchains = &m_vk_swapchain; present.pImageIndices = ℑ + present.waitSemaphoreCount = 1; + present.pWaitSemaphores = &semaphore; return queuePresentKHR(vk_present_queue, &present); } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 139a75c7ac..ff3bfab865 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1100,7 +1100,7 @@ namespace vk info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; vkCreateFence(*m_device, &info, nullptr, &submit_fence); - cmd.submit(m_submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); vk::wait_for_fence(submit_fence, GENERAL_WAIT_TIMEOUT); vkDestroyFence(*m_device, submit_fence, nullptr); @@ -1111,7 +1111,7 @@ namespace vk else { // Auxilliary command queue with auto-restart capability - cmd.submit(m_submit_queue, {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); } verify(HERE), cmd.flags == 0;