1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 20:22:30 +01:00

rsx: Enable dynamic FIFO preprocessing

- Tries to detect when FIFO preprocessing is beneficial and only enables optimizations if the benefit outweighs the cost
- Current threshold is at least 500 draw calls saved at over 2000 draw calls to justify the overhead
- TODO: More tuning for other CPUs
This commit is contained in:
kd-11 2018-11-07 12:34:03 +03:00 committed by kd-11
parent 7b065d7781
commit 5193c99973
8 changed files with 273 additions and 127 deletions

View File

@ -636,7 +636,6 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
m_draw_calls++;
rsx::thread::end();
}
@ -1542,7 +1541,6 @@ void GLGSRender::flip(int buffer)
if (!skip_frame)
{
m_draw_calls = 0;
m_begin_time = 0;
m_draw_time = 0;
m_vertex_upload_time = 0;
@ -1753,7 +1751,6 @@ void GLGSRender::flip(int buffer)
//If we are skipping the next frame, do not reset perf counters
if (skip_frame) return;
m_draw_calls = 0;
m_begin_time = 0;
m_draw_time = 0;
m_vertex_upload_time = 0;

View File

@ -303,7 +303,6 @@ private:
// Identity buffer used to fix broken gl_VertexID on ATI stack
std::unique_ptr<gl::buffer> m_identity_index_buffer;
u32 m_draw_calls = 0;
s64 m_begin_time = 0;
s64 m_draw_time = 0;
s64 m_vertex_upload_time = 0;

View File

@ -16,83 +16,6 @@ namespace rsx
FIFO_control::FIFO_control(::rsx::thread* pctrl)
{
m_ctrl = pctrl->ctrl;
const std::pair<u32, u32> skippable_ranges[] =
{
// Texture configuration
{ NV4097_SET_TEXTURE_OFFSET, 8 * 16 },
{ NV4097_SET_TEXTURE_CONTROL2, 16 },
{ NV4097_SET_TEXTURE_CONTROL3, 16 },
{ NV4097_SET_VERTEX_TEXTURE_OFFSET, 8 * 4 },
// Surface configuration
{ NV4097_SET_SURFACE_CLIP_HORIZONTAL, 1 },
{ NV4097_SET_SURFACE_CLIP_VERTICAL, 1 },
{ NV4097_SET_SURFACE_COLOR_AOFFSET, 1 },
{ NV4097_SET_SURFACE_COLOR_BOFFSET, 1 },
{ NV4097_SET_SURFACE_COLOR_COFFSET, 1 },
{ NV4097_SET_SURFACE_COLOR_DOFFSET, 1 },
{ NV4097_SET_SURFACE_ZETA_OFFSET, 1 },
{ NV4097_SET_CONTEXT_DMA_COLOR_A, 1 },
{ NV4097_SET_CONTEXT_DMA_COLOR_B, 1 },
{ NV4097_SET_CONTEXT_DMA_COLOR_C, 1 },
{ NV4097_SET_CONTEXT_DMA_COLOR_D, 1 },
{ NV4097_SET_CONTEXT_DMA_ZETA, 1 },
{ NV4097_SET_SURFACE_FORMAT, 1 },
{ NV4097_SET_SURFACE_PITCH_A, 1 },
{ NV4097_SET_SURFACE_PITCH_B, 1 },
{ NV4097_SET_SURFACE_PITCH_C, 1 },
{ NV4097_SET_SURFACE_PITCH_D, 1 },
{ NV4097_SET_SURFACE_PITCH_Z, 1 },
// Program configuration
{ NV4097_SET_TRANSFORM_PROGRAM_START, 1 },
{ NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, 1 },
{ NV4097_SET_TRANSFORM_PROGRAM, 512 },
// Vertex
{ NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 16 },
{ NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 16 },
};
const std::pair<u32, u32> ignorable_ranges[] =
{
// General
{ NV4097_INVALIDATE_VERTEX_FILE, 3 }, // PSLight clears VERTEX_FILE[0-2]
{ NV4097_INVALIDATE_VERTEX_CACHE_FILE, 1 },
{ NV4097_INVALIDATE_L2, 1 },
{ NV4097_INVALIDATE_ZCULL, 1 },
// FIFO
{ (FIFO_DISABLED_COMMAND >> 2), 1},
{ (FIFO_PACKET_BEGIN >> 2), 1 },
{ (FIFO_DRAW_BARRIER >> 2), 1 },
// ROP
{ NV4097_SET_ALPHA_FUNC, 1 },
{ NV4097_SET_ALPHA_REF, 1 },
{ NV4097_SET_ALPHA_TEST_ENABLE, 1 },
{ NV4097_SET_ANTI_ALIASING_CONTROL, 1 },
// Program
{ NV4097_SET_SHADER_PACKER, 1 },
{ NV4097_SET_SHADER_WINDOW, 1 },
// Vertex data offsets
{ NV4097_SET_VERTEX_DATA_BASE_OFFSET, 1 },
{ NV4097_SET_VERTEX_DATA_BASE_INDEX, 1 }
};
std::fill(m_register_properties.begin(), m_register_properties.end(), 0u);
for (const auto &method : skippable_ranges)
{
for (int i = 0; i < method.second; ++i)
{
m_register_properties[method.first + i] = register_props::skip_on_match;
}
}
for (const auto &method : ignorable_ranges)
{
for (int i = 0; i < method.second; ++i)
{
m_register_properties[method.first + i] |= register_props::always_ignore;
}
}
}
void FIFO_control::set_put(u32 put)
@ -133,22 +56,22 @@ namespace rsx
m_memwatch_addr = 0;
}
bool FIFO_control::has_next() const
{
return (m_remaining_commands > 0);
}
void FIFO_control::read_unsafe(register_pair& data)
{
// Fast read with no processing, only safe inside a PACKET_BEGIN+count block
//verify(HERE), m_remaining_commands;
if (m_remaining_commands)
{
m_command_reg += m_command_inc;
m_args_ptr += 4;
m_remaining_commands--;
m_command_reg += m_command_inc;
m_args_ptr += 4;
m_remaining_commands--;
data.reg = m_command_reg;
data.value = vm::read32(m_args_ptr);
data.reg = m_command_reg;
data.value = vm::read32(m_args_ptr);
}
else
{
data.reg = FIFO_EMPTY;
}
}
void FIFO_control::read(register_pair& data)
@ -255,6 +178,164 @@ namespace rsx
data = { cmd & 0xfffc, vm::read32(m_args_ptr), m_internal_get };
}
}
flattening_helper::flattening_helper()
{
const std::pair<u32, u32> ignorable_ranges[] =
{
// General
{ NV4097_INVALIDATE_VERTEX_FILE, 3 }, // PSLight clears VERTEX_FILE[0-2]
{ NV4097_INVALIDATE_VERTEX_CACHE_FILE, 1 },
{ NV4097_INVALIDATE_L2, 1 },
{ NV4097_INVALIDATE_ZCULL, 1 }
};
std::fill(m_register_properties.begin(), m_register_properties.end(), 0u);
for (const auto &method : ignorable_ranges)
{
for (int i = 0; i < method.second; ++i)
{
m_register_properties[method.first + i] |= register_props::always_ignore;
}
}
}
void flattening_helper::evaluate_performance(u32 total_draw_count)
{
if (!enabled)
{
if (total_draw_count <= 2000)
{
// Low draw call pressure
fifo_hint = optimization_hint::load_low;
return;
}
if (fifo_hint == optimization_hint::load_unoptimizable)
{
// Nope, wait for stats to change
return;
}
}
if (enabled)
{
// Currently activated. Check if there is any benefit
if (num_collapsed < 500)
{
// Not worth it, disable
enabled = false;
fifo_hint = load_unoptimizable;
}
u32 real_total = total_draw_count + num_collapsed;
if (real_total <= 2000)
{
// Low total number of draws submitted, no need to keep trying for now
enabled = false;
fifo_hint = load_low;
}
num_collapsed = 0;
}
else
{
// Not enabled, check if we should try enabling
verify(HERE), total_draw_count > 2000;
if (fifo_hint != load_unoptimizable)
{
// If its set to unoptimizable, we already tried and it did not work
// If it resets to load low (usually after some kind of loading screen) we can try again
enabled = true;
}
}
}
flatten_op flattening_helper::test(register_pair& command)
{
u32 flush_cmd = -1u;
switch (const u32 reg = (command.reg >> 2))
{
case NV4097_SET_BEGIN_END:
{
begin_end_ctr ^= 1;
if (command.value)
{
// This is a BEGIN call
if (LIKELY(!deferred_primitive))
{
// New primitive block
deferred_primitive = command.value;
}
else if (deferred_primitive == command.value)
{
// Same primitive can be chanined; do nothing
command.reg = FIFO_DISABLED_COMMAND;
}
else
{
// Primitive command has changed!
// Flush
flush_cmd = command.value;
}
}
else if (deferred_primitive)
{
command.reg = FIFO_DRAW_BARRIER;
draw_count++;
}
else
{
fmt::throw_exception("Unreachable" HERE);
}
break;
}
case NV4097_DRAW_ARRAYS:
case NV4097_DRAW_INDEX_ARRAY:
{
// TODO: Check type
break;
}
default:
{
if (UNLIKELY(draw_count))
{
const auto props = m_register_properties[reg];
if (UNLIKELY(props & register_props::always_ignore))
{
// Always ignore
command.reg = FIFO_DISABLED_COMMAND;
}
else
{
// Flush
flush_cmd = (begin_end_ctr) ? deferred_primitive : 0u;
}
}
else
{
// Nothing to do
return NOTHING;
}
break;
}
}
if (flush_cmd != -1u)
{
num_collapsed += draw_count? (draw_count - 1) : 0;
draw_count = 0;
deferred_primitive = flush_cmd;
return (begin_end_ctr == 1)? EMIT_BARRIER : EMIT_END;
}
return NOTHING;
}
}
void thread::run_FIFO()
@ -382,13 +463,13 @@ namespace rsx
performance_counters.state = FIFO_state::running;
}
for (int i = 0; ; i++, fifo_ctrl->read_unsafe(command))
for (int i = 0; command.reg != FIFO::FIFO_EMPTY; i++, fifo_ctrl->read_unsafe(command))
{
const u32 reg = command.reg >> 2;
const u32 value = command.value;
if (capture_current_frame)
if (UNLIKELY(capture_current_frame))
{
const u32 reg = command.reg >> 2;
const u32 value = command.value;
frame_debug.command_queue.push_back(std::make_pair(reg, value));
if (!(reg == NV406E_SET_REFERENCE || reg == NV406E_SEMAPHORE_RELEASE || reg == NV406E_SEMAPHORE_ACQUIRE))
@ -424,17 +505,50 @@ namespace rsx
}
}
if (UNLIKELY(m_flattener.is_enabled()))
{
switch(m_flattener.test(command))
{
case FIFO::NOTHING:
{
break;
}
case FIFO::EMIT_END:
{
// Emit end command to close existing scope
//verify(HERE), in_begin_end;
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
break;
}
case FIFO::EMIT_BARRIER:
{
//verify(HERE), in_begin_end;
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, m_flattener.get_primitive());
break;
}
default:
{
fmt::throw_exception("Unreachable" HERE);
}
}
if (command.reg == FIFO::FIFO_DISABLED_COMMAND)
{
// Optimized away
continue;
}
}
const u32 reg = command.reg >> 2;
const u32 value = command.value;
method_registers.decode(reg, value);
if (auto method = methods[reg])
{
method(this, reg, value);
}
if (!fifo_ctrl->has_next())
{
break;
}
}
}
}

View File

@ -35,6 +35,13 @@ namespace rsx
FIFO_DRAW_BARRIER = 0xF1F8,
};
enum flatten_op : u32
{
NOTHING = 0,
EMIT_END = 1,
EMIT_BARRIER = 2
};
struct register_pair
{
u32 reg;
@ -43,9 +50,8 @@ namespace rsx
u32 reserved;
};
class FIFO_control
class flattening_helper
{
private:
enum register_props : u8
{
none = 0,
@ -53,6 +59,35 @@ namespace rsx
always_ignore = 2
};
enum optimization_hint : u8
{
unknown,
load_low,
load_unoptimizable
};
std::array<u8, 0x10000 / 4> m_register_properties;
u32 deferred_primitive = 0;
u32 draw_count = 0;
u32 begin_end_ctr = 0;
bool enabled = false;
u32 num_collapsed = 0;
optimization_hint fifo_hint = unknown;
public:
flattening_helper();
~flattening_helper() {}
u32 get_primitive() const { return deferred_primitive; }
bool is_enabled() const { return enabled; }
void evaluate_performance(u32 total_draw_count);
inline flatten_op test(register_pair& command);
};
class FIFO_control
{
private:
RsxDmaControl* m_ctrl = nullptr;
u32 m_internal_get = 0;
@ -65,9 +100,6 @@ namespace rsx
u32 m_remaining_commands = 0;
u32 m_args_ptr = 0;
std::array<u8, 0x10000 / 4> m_register_properties;
bool has_deferred_draw = false;
public:
FIFO_control(rsx::thread* pctrl);
~FIFO_control() {}
@ -77,11 +109,6 @@ namespace rsx
void read(register_pair& data);
inline void read_unsafe(register_pair& data);
inline bool has_next() const;
public:
static bool is_blocking_cmd(u32 cmd);
static bool is_sync_cmd(u32 cmd);
};
}
}

View File

@ -467,6 +467,7 @@ namespace rsx
capture::capture_draw_memory(this);
in_begin_end = false;
m_draw_calls++;
m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty;
ROP_sync_timestamp = get_system_time();
@ -2238,6 +2239,19 @@ namespace rsx
{
async_flip_requested.clear();
if (!g_cfg.video.disable_FIFO_reordering)
{
// Try to enable FIFO optimizations
// Only rarely useful for some games like RE4
m_flattener.evaluate_performance(m_draw_calls);
}
if (!skip_frame)
{
// Reset counter
m_draw_calls = 0;
}
if (g_cfg.video.frame_skip_enabled)
{
m_skip_frame_ctr++;

View File

@ -380,8 +380,8 @@ namespace rsx
bool supports_native_ui = false;
// FIFO
friend class FIFO::FIFO_control;
std::unique_ptr<FIFO::FIFO_control> fifo_ctrl;
FIFO::flattening_helper m_flattener;
// Occlusion query
bool zcull_surface_active = false;
@ -398,6 +398,9 @@ namespace rsx
// Invalidated memory range
address_range m_invalidated_memory_range;
// Draw call stats
u32 m_draw_calls = 0;
public:
RsxDmaControl* ctrl = nullptr;
atomic_t<u32> restore_point{ 0 };

View File

@ -1322,10 +1322,9 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Clear any 'dirty' surfaces - possible is a recycled cache surface is used
// Clear any 'dirty' surfaces - possible is a recycled cache surface is used
rsx::simple_array<VkClearAttachment> buffers_to_clear;
buffers_to_clear.reserve(4);
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Check for memory clears
if (ds && ds->dirty)
@ -1347,7 +1346,7 @@ void VKGSRender::end()
}
}
if (buffers_to_clear.size() > 0)
if (UNLIKELY(!buffers_to_clear.empty()))
{
begin_render_pass();
@ -1361,7 +1360,7 @@ void VKGSRender::end()
//Check for data casts
if (ds && ds->old_contents)
{
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
if (UNLIKELY(ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM))
{
// TODO: Partial memory transfer
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
@ -1380,7 +1379,7 @@ void VKGSRender::end()
{
auto copy_rtt_contents = [&](vk::render_target* surface, bool is_depth)
{
if (surface->info.format == surface->old_contents->info.format)
if (LIKELY(surface->info.format == surface->old_contents->info.format))
{
const auto region = rsx::get_transferable_region(surface);
const auto src_w = std::get<0>(region);
@ -1695,8 +1694,6 @@ void VKGSRender::end()
m_current_command_buffer->num_draws++;
m_rtts.on_write();
m_draw_calls++;
rsx::thread::end();
}
@ -3092,7 +3089,6 @@ void VKGSRender::flip(int buffer)
if (!skip_frame)
{
m_draw_calls = 0;
m_draw_time = 0;
m_setup_time = 0;
m_vertex_upload_time = 0;
@ -3406,7 +3402,6 @@ void VKGSRender::flip(int buffer)
//Do not reset perf counters if we are skipping the next frame
if (skip_frame) return;
m_draw_calls = 0;
m_draw_time = 0;
m_setup_time = 0;
m_vertex_upload_time = 0;

View File

@ -372,9 +372,6 @@ private:
VkViewport m_viewport{};
VkRect2D m_scissor{};
// Draw call stats
u32 m_draw_calls = 0;
// Timers
s64 m_setup_time = 0;
s64 m_vertex_upload_time = 0;