1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 02:32:36 +01:00

SPU: Implement execution wake-up delay

This commit is contained in:
Eladash 2020-06-23 16:41:16 +03:00 committed by Ivan
parent 149c593d89
commit cf0fcf5a2a
7 changed files with 83 additions and 64 deletions

View File

@ -2322,14 +2322,14 @@ thread_state thread_ctrl::state()
return static_cast<thread_state>(_this->m_sync & 3); return static_cast<thread_state>(_this->m_sync & 3);
} }
void thread_ctrl::_wait_for(u64 usec, [[maybe_unused]] bool alert /* true */) void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
{ {
auto _this = g_tls_this_thread; auto _this = g_tls_this_thread;
#ifdef __linux__ #ifdef __linux__
static thread_local struct linux_timer_handle_t static thread_local struct linux_timer_handle_t
{ {
// Allocate timer only if needed (i.e. someone calls _wait_for with alert and short period) // Allocate timer only if needed (i.e. someone calls wait_for with alert and short period)
const int m_timer = timerfd_create(CLOCK_MONOTONIC, 0); const int m_timer = timerfd_create(CLOCK_MONOTONIC, 0);
linux_timer_handle_t() noexcept linux_timer_handle_t() noexcept
@ -2383,6 +2383,58 @@ void thread_ctrl::_wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff}); list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
} }
void thread_ctrl::wait_for_accurate(u64 usec)
{
if (!usec)
{
return;
}
using namespace std::chrono_literals;
const auto until = std::chrono::steady_clock::now() + 1us * usec;
while (true)
{
#ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = usec <= 1000 ? 10 : 50;
#else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
constexpr u64 host_min_quantum = 500;
#endif
if (usec >= host_min_quantum)
{
#ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy
wait_for(usec - ((usec % host_min_quantum) + host_min_quantum), false);
#else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
wait_for(usec - (usec % host_min_quantum), false);
#endif
}
// TODO: Determine best value for yield delay
else if (usec >= host_min_quantum / 2)
{
std::this_thread::yield();
}
else
{
busy_wait(100);
}
const auto current = std::chrono::steady_clock::now();
if (current >= until)
{
break;
}
usec = (until - current).count();
}
}
std::string thread_ctrl::get_name_cached() std::string thread_ctrl::get_name_cached()
{ {
auto _this = thread_ctrl::g_tls_this_thread; auto _this = thread_ctrl::g_tls_this_thread;

View File

@ -201,9 +201,6 @@ class thread_ctrl final
// Target cpu core layout // Target cpu core layout
static atomic_t<native_core_arrangement> g_native_core_layout; static atomic_t<native_core_arrangement> g_native_core_layout;
// Internal waiting function, may throw. Infinite value is -1.
static void _wait_for(u64 usec, bool alert);
friend class thread_base; friend class thread_base;
// Optimized get_name() for logging // Optimized get_name() for logging
@ -263,16 +260,16 @@ public:
// Read current state, possibly executing some tasks // Read current state, possibly executing some tasks
static thread_state state(); static thread_state state();
// Wait once with timeout. May spuriously return false. // Wait once with timeout. Infinite value is -1.
static inline void wait_for(u64 usec, bool alert = true) static void wait_for(u64 usec, bool alert = true);
{
_wait_for(usec, alert); // Waiting with accurate timeout
} static void wait_for_accurate(u64 usec);
// Wait. // Wait.
static inline void wait() static inline void wait()
{ {
_wait_for(-1, true); wait_for(-1, true);
} }
// Wait for both thread sync var and provided atomic var // Wait for both thread sync var and provided atomic var

View File

@ -3849,6 +3849,12 @@ s64 spu_thread::get_ch_value(u32 ch)
} }
const s64 out = channel.pop_wait(*this); const s64 out = channel.pop_wait(*this);
if (state & cpu_flag::wait)
{
wakeup_delay();
}
static_cast<void>(test_stopped()); static_cast<void>(test_stopped());
return out; return out;
}; };
@ -4068,6 +4074,7 @@ s64 spu_thread::get_ch_value(u32 ch)
thread_ctrl::wait_on(state, old, 100); thread_ctrl::wait_on(state, old, 100);
} }
wakeup_delay();
check_state(); check_state();
return events.events & mask1; return events.events & mask1;
} }
@ -4114,6 +4121,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
} }
int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT); int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT);
wakeup_delay();
check_state(); check_state();
return true; return true;
} }
@ -4680,6 +4688,7 @@ bool spu_thread::stop_and_signal(u32 code)
thread_ctrl::wait_on(state, old); thread_ctrl::wait_on(state, old);
} }
wakeup_delay();
return true; return true;
} }
@ -5000,6 +5009,12 @@ bool spu_thread::capture_local_storage() const
return true; return true;
} }
void spu_thread::wakeup_delay(u32 div) const
{
if (g_cfg.core.spu_wakeup_delay_mask & (1u << index))
thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.core.spu_wakeup_delay, div));
}
spu_function_logger::spu_function_logger(spu_thread& spu, const char* func) spu_function_logger::spu_function_logger(spu_thread& spu, const char* func)
: spu(spu) : spu(spu)
{ {

View File

@ -872,6 +872,7 @@ public:
void fast_call(u32 ls_addr); void fast_call(u32 ls_addr);
bool capture_local_storage() const; bool capture_local_storage() const;
void wakeup_delay(u32 div = 1) const;
// Convert specified SPU LS address to a pointer of specified (possibly converted to BE) type // Convert specified SPU LS address to a pointer of specified (possibly converted to BE) type
template<typename T> template<typename T>

View File

@ -2780,59 +2780,10 @@ namespace rsx
return result; return result;
} }
void thread::fifo_wake_delay(u64 div) void thread::fifo_wake_delay(u32 div)
{ {
// TODO: Nanoseconds accuracy
u64 remaining = g_cfg.video.driver_wakeup_delay;
if (!remaining)
{
return;
}
// Some cases do not need full delay // Some cases do not need full delay
remaining = utils::aligned_div(remaining, div); thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.video.driver_wakeup_delay, div));
const u64 until = rsx::uclock() + remaining;
while (true)
{
#ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = remaining <= 1000 ? 10 : 50;
#else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
constexpr u64 host_min_quantum = 500;
#endif
if (remaining >= host_min_quantum)
{
#ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy
thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), false);
#else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), false);
#endif
}
// TODO: Determine best value for yield delay
else if (remaining >= host_min_quantum / 2)
{
std::this_thread::yield();
}
else
{
busy_wait(100);
}
const u64 current = rsx::uclock();
if (current >= until)
{
break;
}
remaining = until - current;
}
} }
u32 thread::get_fifo_cmd() const u32 thread::get_fifo_cmd() const

View File

@ -24,6 +24,7 @@
#include "Capture/rsx_trace.h" #include "Capture/rsx_trace.h"
#include "Capture/rsx_replay.h" #include "Capture/rsx_replay.h"
#include "Emu/system_config.h"
#include "Emu/Cell/lv2/sys_rsx.h" #include "Emu/Cell/lv2/sys_rsx.h"
#include "Emu/IdManager.h" #include "Emu/IdManager.h"
#include "Emu/system_config.h" #include "Emu/system_config.h"
@ -518,7 +519,7 @@ namespace rsx
const char* file = __builtin_FILE(), const char* file = __builtin_FILE(),
const char* func = __builtin_FUNCTION()); const char* func = __builtin_FUNCTION());
static void fifo_wake_delay(u64 div = 1); static void fifo_wake_delay(u32 div = 1);
u32 get_fifo_cmd() const; u32 get_fifo_cmd() const;
void dump_regs(std::string&) const override; void dump_regs(std::string&) const override;

View File

@ -87,6 +87,8 @@ struct cfg_root : cfg::node
cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds
cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways
cfg::uint<0, 3000> spu_wakeup_delay{ this, "SPU Wake-Up Delay", 0, true };
cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true };
#if defined (__linux__) || defined (__APPLE__) #if defined (__linux__) || defined (__APPLE__)
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true }; cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
#else #else
@ -168,7 +170,7 @@ struct cfg_root : cfg::node
cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 }; cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 };
cfg::_int<0, 16> shader_compiler_threads_count{ this, "Shader Compiler Threads", 0 }; cfg::_int<0, 16> shader_compiler_threads_count{ this, "Shader Compiler Threads", 0 };
cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true }; cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true };
cfg::_int<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true }; cfg::uint<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true };
cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways
cfg::_bool vblank_ntsc{ this, "Vblank NTSC Fixup", false, true }; cfg::_bool vblank_ntsc{ this, "Vblank NTSC Fixup", false, true };
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console