From 5193c99973007ed57ae60c256d76205a6da6a2ea Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 7 Nov 2018 12:34:03 +0300 Subject: [PATCH] rsx: Enable dynamic FIFO preprocessing - Tries to detect when FIFO preprocessing is beneficial and only enables optimizations if the benefit outweighs the cost - Current threshold is at least 500 draw calls saved at over 2000 draw calls to justify the overhead - TODO: More tuning for other CPUs --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 3 - rpcs3/Emu/RSX/GL/GLGSRender.h | 1 - rpcs3/Emu/RSX/RSXFIFO.cpp | 312 ++++++++++++++++++++++---------- rpcs3/Emu/RSX/RSXFIFO.h | 47 ++++- rpcs3/Emu/RSX/RSXThread.cpp | 14 ++ rpcs3/Emu/RSX/RSXThread.h | 5 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 15 +- rpcs3/Emu/RSX/VK/VKGSRender.h | 3 - 8 files changed, 273 insertions(+), 127 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 59df76cbe4..504add31fd 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -636,7 +636,6 @@ void GLGSRender::end() std::chrono::time_point draw_end = steady_clock::now(); m_draw_time += (u32)std::chrono::duration_cast(draw_end - draw_start).count(); - m_draw_calls++; rsx::thread::end(); } @@ -1542,7 +1541,6 @@ void GLGSRender::flip(int buffer) if (!skip_frame) { - m_draw_calls = 0; m_begin_time = 0; m_draw_time = 0; m_vertex_upload_time = 0; @@ -1753,7 +1751,6 @@ void GLGSRender::flip(int buffer) //If we are skipping the next frame, do not reset perf counters if (skip_frame) return; - m_draw_calls = 0; m_begin_time = 0; m_draw_time = 0; m_vertex_upload_time = 0; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index e9cd604fe3..2b48c78a09 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -303,7 +303,6 @@ private: // Identity buffer used to fix broken gl_VertexID on ATI stack std::unique_ptr m_identity_index_buffer; - u32 m_draw_calls = 0; s64 m_begin_time = 0; s64 m_draw_time = 0; s64 m_vertex_upload_time = 0; diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp index 9bcb7c2032..1c07f6b678 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.cpp +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -16,83 +16,6 @@ namespace rsx FIFO_control::FIFO_control(::rsx::thread* pctrl) { m_ctrl = pctrl->ctrl; - - const std::pair skippable_ranges[] = - { - // Texture configuration - { NV4097_SET_TEXTURE_OFFSET, 8 * 16 }, - { NV4097_SET_TEXTURE_CONTROL2, 16 }, - { NV4097_SET_TEXTURE_CONTROL3, 16 }, - { NV4097_SET_VERTEX_TEXTURE_OFFSET, 8 * 4 }, - // Surface configuration - { NV4097_SET_SURFACE_CLIP_HORIZONTAL, 1 }, - { NV4097_SET_SURFACE_CLIP_VERTICAL, 1 }, - { NV4097_SET_SURFACE_COLOR_AOFFSET, 1 }, - { NV4097_SET_SURFACE_COLOR_BOFFSET, 1 }, - { NV4097_SET_SURFACE_COLOR_COFFSET, 1 }, - { NV4097_SET_SURFACE_COLOR_DOFFSET, 1 }, - { NV4097_SET_SURFACE_ZETA_OFFSET, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_A, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_B, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_C, 1 }, - { NV4097_SET_CONTEXT_DMA_COLOR_D, 1 }, - { NV4097_SET_CONTEXT_DMA_ZETA, 1 }, - { NV4097_SET_SURFACE_FORMAT, 1 }, - { NV4097_SET_SURFACE_PITCH_A, 1 }, - { NV4097_SET_SURFACE_PITCH_B, 1 }, - { NV4097_SET_SURFACE_PITCH_C, 1 }, - { NV4097_SET_SURFACE_PITCH_D, 1 }, - { NV4097_SET_SURFACE_PITCH_Z, 1 }, - // Program configuration - { NV4097_SET_TRANSFORM_PROGRAM_START, 1 }, - { NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, 1 }, - { NV4097_SET_TRANSFORM_PROGRAM, 512 }, - // Vertex - { NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 16 }, - { NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 16 }, - }; - - const std::pair ignorable_ranges[] = - { - // General - { NV4097_INVALIDATE_VERTEX_FILE, 3 }, // PSLight clears VERTEX_FILE[0-2] - { NV4097_INVALIDATE_VERTEX_CACHE_FILE, 1 }, - { NV4097_INVALIDATE_L2, 1 }, - { NV4097_INVALIDATE_ZCULL, 1 }, - // FIFO - { (FIFO_DISABLED_COMMAND >> 2), 1}, - { (FIFO_PACKET_BEGIN >> 2), 1 }, - { (FIFO_DRAW_BARRIER >> 2), 1 }, - // ROP - { NV4097_SET_ALPHA_FUNC, 1 }, - { NV4097_SET_ALPHA_REF, 1 }, - { NV4097_SET_ALPHA_TEST_ENABLE, 1 }, - { NV4097_SET_ANTI_ALIASING_CONTROL, 1 }, - // Program - { NV4097_SET_SHADER_PACKER, 1 }, - { NV4097_SET_SHADER_WINDOW, 1 }, - // Vertex data offsets - { NV4097_SET_VERTEX_DATA_BASE_OFFSET, 1 }, - { NV4097_SET_VERTEX_DATA_BASE_INDEX, 1 } - }; - - std::fill(m_register_properties.begin(), m_register_properties.end(), 0u); - - for (const auto &method : skippable_ranges) - { - for (int i = 0; i < method.second; ++i) - { - m_register_properties[method.first + i] = register_props::skip_on_match; - } - } - - for (const auto &method : ignorable_ranges) - { - for (int i = 0; i < method.second; ++i) - { - m_register_properties[method.first + i] |= register_props::always_ignore; - } - } } void FIFO_control::set_put(u32 put) @@ -133,22 +56,22 @@ namespace rsx m_memwatch_addr = 0; } - bool FIFO_control::has_next() const - { - return (m_remaining_commands > 0); - } - void FIFO_control::read_unsafe(register_pair& data) { // Fast read with no processing, only safe inside a PACKET_BEGIN+count block - //verify(HERE), m_remaining_commands; + if (m_remaining_commands) + { + m_command_reg += m_command_inc; + m_args_ptr += 4; + m_remaining_commands--; - m_command_reg += m_command_inc; - m_args_ptr += 4; - m_remaining_commands--; - - data.reg = m_command_reg; - data.value = vm::read32(m_args_ptr); + data.reg = m_command_reg; + data.value = vm::read32(m_args_ptr); + } + else + { + data.reg = FIFO_EMPTY; + } } void FIFO_control::read(register_pair& data) @@ -255,6 +178,164 @@ namespace rsx data = { cmd & 0xfffc, vm::read32(m_args_ptr), m_internal_get }; } } + + flattening_helper::flattening_helper() + { + const std::pair ignorable_ranges[] = + { + // General + { NV4097_INVALIDATE_VERTEX_FILE, 3 }, // PSLight clears VERTEX_FILE[0-2] + { NV4097_INVALIDATE_VERTEX_CACHE_FILE, 1 }, + { NV4097_INVALIDATE_L2, 1 }, + { NV4097_INVALIDATE_ZCULL, 1 } + }; + + std::fill(m_register_properties.begin(), m_register_properties.end(), 0u); + + for (const auto &method : ignorable_ranges) + { + for (int i = 0; i < method.second; ++i) + { + m_register_properties[method.first + i] |= register_props::always_ignore; + } + } + } + + void flattening_helper::evaluate_performance(u32 total_draw_count) + { + if (!enabled) + { + if (total_draw_count <= 2000) + { + // Low draw call pressure + fifo_hint = optimization_hint::load_low; + return; + } + + if (fifo_hint == optimization_hint::load_unoptimizable) + { + // Nope, wait for stats to change + return; + } + } + + if (enabled) + { + // Currently activated. Check if there is any benefit + if (num_collapsed < 500) + { + // Not worth it, disable + enabled = false; + fifo_hint = load_unoptimizable; + } + + u32 real_total = total_draw_count + num_collapsed; + if (real_total <= 2000) + { + // Low total number of draws submitted, no need to keep trying for now + enabled = false; + fifo_hint = load_low; + } + + num_collapsed = 0; + } + else + { + // Not enabled, check if we should try enabling + verify(HERE), total_draw_count > 2000; + if (fifo_hint != load_unoptimizable) + { + // If its set to unoptimizable, we already tried and it did not work + // If it resets to load low (usually after some kind of loading screen) we can try again + enabled = true; + } + } + } + + flatten_op flattening_helper::test(register_pair& command) + { + u32 flush_cmd = -1u; + switch (const u32 reg = (command.reg >> 2)) + { + case NV4097_SET_BEGIN_END: + { + begin_end_ctr ^= 1; + + if (command.value) + { + // This is a BEGIN call + if (LIKELY(!deferred_primitive)) + { + // New primitive block + deferred_primitive = command.value; + } + else if (deferred_primitive == command.value) + { + // Same primitive can be chanined; do nothing + command.reg = FIFO_DISABLED_COMMAND; + } + else + { + // Primitive command has changed! + // Flush + flush_cmd = command.value; + } + } + else if (deferred_primitive) + { + command.reg = FIFO_DRAW_BARRIER; + draw_count++; + } + else + { + fmt::throw_exception("Unreachable" HERE); + } + + break; + } + case NV4097_DRAW_ARRAYS: + case NV4097_DRAW_INDEX_ARRAY: + { + // TODO: Check type + break; + } + default: + { + if (UNLIKELY(draw_count)) + { + const auto props = m_register_properties[reg]; + if (UNLIKELY(props & register_props::always_ignore)) + { + // Always ignore + command.reg = FIFO_DISABLED_COMMAND; + } + else + { + // Flush + flush_cmd = (begin_end_ctr) ? deferred_primitive : 0u; + } + } + else + { + // Nothing to do + return NOTHING; + } + + break; + } + } + + if (flush_cmd != -1u) + { + num_collapsed += draw_count? (draw_count - 1) : 0; + draw_count = 0; + deferred_primitive = flush_cmd; + + return (begin_end_ctr == 1)? EMIT_BARRIER : EMIT_END; + } + + return NOTHING; + } } void thread::run_FIFO() @@ -382,13 +463,13 @@ namespace rsx performance_counters.state = FIFO_state::running; } - for (int i = 0; ; i++, fifo_ctrl->read_unsafe(command)) + for (int i = 0; command.reg != FIFO::FIFO_EMPTY; i++, fifo_ctrl->read_unsafe(command)) { - const u32 reg = command.reg >> 2; - const u32 value = command.value; - - if (capture_current_frame) + if (UNLIKELY(capture_current_frame)) { + const u32 reg = command.reg >> 2; + const u32 value = command.value; + frame_debug.command_queue.push_back(std::make_pair(reg, value)); if (!(reg == NV406E_SET_REFERENCE || reg == NV406E_SEMAPHORE_RELEASE || reg == NV406E_SEMAPHORE_ACQUIRE)) @@ -424,17 +505,50 @@ namespace rsx } } + if (UNLIKELY(m_flattener.is_enabled())) + { + switch(m_flattener.test(command)) + { + case FIFO::NOTHING: + { + break; + } + case FIFO::EMIT_END: + { + // Emit end command to close existing scope + //verify(HERE), in_begin_end; + methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); + break; + } + case FIFO::EMIT_BARRIER: + { + //verify(HERE), in_begin_end; + methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); + methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, m_flattener.get_primitive()); + break; + } + default: + { + fmt::throw_exception("Unreachable" HERE); + } + } + + if (command.reg == FIFO::FIFO_DISABLED_COMMAND) + { + // Optimized away + continue; + } + } + + const u32 reg = command.reg >> 2; + const u32 value = command.value; + method_registers.decode(reg, value); if (auto method = methods[reg]) { method(this, reg, value); } - - if (!fifo_ctrl->has_next()) - { - break; - } } } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXFIFO.h b/rpcs3/Emu/RSX/RSXFIFO.h index 43613d84a8..c8ff9ebb0a 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.h +++ b/rpcs3/Emu/RSX/RSXFIFO.h @@ -35,6 +35,13 @@ namespace rsx FIFO_DRAW_BARRIER = 0xF1F8, }; + enum flatten_op : u32 + { + NOTHING = 0, + EMIT_END = 1, + EMIT_BARRIER = 2 + }; + struct register_pair { u32 reg; @@ -43,9 +50,8 @@ namespace rsx u32 reserved; }; - class FIFO_control + class flattening_helper { - private: enum register_props : u8 { none = 0, @@ -53,6 +59,35 @@ namespace rsx always_ignore = 2 }; + enum optimization_hint : u8 + { + unknown, + load_low, + load_unoptimizable + }; + + std::array m_register_properties; + u32 deferred_primitive = 0; + u32 draw_count = 0; + u32 begin_end_ctr = 0; + + bool enabled = false; + u32 num_collapsed = 0; + optimization_hint fifo_hint = unknown; + + public: + flattening_helper(); + ~flattening_helper() {} + + u32 get_primitive() const { return deferred_primitive; } + bool is_enabled() const { return enabled; } + + void evaluate_performance(u32 total_draw_count); + inline flatten_op test(register_pair& command); + }; + + class FIFO_control + { private: RsxDmaControl* m_ctrl = nullptr; u32 m_internal_get = 0; @@ -65,9 +100,6 @@ namespace rsx u32 m_remaining_commands = 0; u32 m_args_ptr = 0; - std::array m_register_properties; - bool has_deferred_draw = false; - public: FIFO_control(rsx::thread* pctrl); ~FIFO_control() {} @@ -77,11 +109,6 @@ namespace rsx void read(register_pair& data); inline void read_unsafe(register_pair& data); - inline bool has_next() const; - - public: - static bool is_blocking_cmd(u32 cmd); - static bool is_sync_cmd(u32 cmd); }; } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index b2970ee56d..38faee79d4 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -467,6 +467,7 @@ namespace rsx capture::capture_draw_memory(this); in_begin_end = false; + m_draw_calls++; m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty; ROP_sync_timestamp = get_system_time(); @@ -2238,6 +2239,19 @@ namespace rsx { async_flip_requested.clear(); + if (!g_cfg.video.disable_FIFO_reordering) + { + // Try to enable FIFO optimizations + // Only rarely useful for some games like RE4 + m_flattener.evaluate_performance(m_draw_calls); + } + + if (!skip_frame) + { + // Reset counter + m_draw_calls = 0; + } + if (g_cfg.video.frame_skip_enabled) { m_skip_frame_ctr++; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index c8e94e732f..927a78ff04 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -380,8 +380,8 @@ namespace rsx bool supports_native_ui = false; // FIFO - friend class FIFO::FIFO_control; std::unique_ptr fifo_ctrl; + FIFO::flattening_helper m_flattener; // Occlusion query bool zcull_surface_active = false; @@ -398,6 +398,9 @@ namespace rsx // Invalidated memory range address_range m_invalidated_memory_range; + // Draw call stats + u32 m_draw_calls = 0; + public: RsxDmaControl* ctrl = nullptr; atomic_t restore_point{ 0 }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 15f0a4fa85..087dc4ad96 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1322,10 +1322,9 @@ void VKGSRender::end() std::chrono::time_point textures_start = steady_clock::now(); - auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); - //Clear any 'dirty' surfaces - possible is a recycled cache surface is used + // Clear any 'dirty' surfaces - possible is a recycled cache surface is used rsx::simple_array buffers_to_clear; - buffers_to_clear.reserve(4); + auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); //Check for memory clears if (ds && ds->dirty) @@ -1347,7 +1346,7 @@ void VKGSRender::end() } } - if (buffers_to_clear.size() > 0) + if (UNLIKELY(!buffers_to_clear.empty())) { begin_render_pass(); @@ -1361,7 +1360,7 @@ void VKGSRender::end() //Check for data casts if (ds && ds->old_contents) { - if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM) + if (UNLIKELY(ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)) { // TODO: Partial memory transfer auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0); @@ -1380,7 +1379,7 @@ void VKGSRender::end() { auto copy_rtt_contents = [&](vk::render_target* surface, bool is_depth) { - if (surface->info.format == surface->old_contents->info.format) + if (LIKELY(surface->info.format == surface->old_contents->info.format)) { const auto region = rsx::get_transferable_region(surface); const auto src_w = std::get<0>(region); @@ -1695,8 +1694,6 @@ void VKGSRender::end() m_current_command_buffer->num_draws++; m_rtts.on_write(); - m_draw_calls++; - rsx::thread::end(); } @@ -3092,7 +3089,6 @@ void VKGSRender::flip(int buffer) if (!skip_frame) { - m_draw_calls = 0; m_draw_time = 0; m_setup_time = 0; m_vertex_upload_time = 0; @@ -3406,7 +3402,6 @@ void VKGSRender::flip(int buffer) //Do not reset perf counters if we are skipping the next frame if (skip_frame) return; - m_draw_calls = 0; m_draw_time = 0; m_setup_time = 0; m_vertex_upload_time = 0; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 982b5e47c7..f5fa8b304e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -372,9 +372,6 @@ private: VkViewport m_viewport{}; VkRect2D m_scissor{}; - // Draw call stats - u32 m_draw_calls = 0; - // Timers s64 m_setup_time = 0; s64 m_vertex_upload_time = 0;