mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
SPU: Implement execution wake-up delay
This commit is contained in:
parent
149c593d89
commit
cf0fcf5a2a
@ -2322,14 +2322,14 @@ thread_state thread_ctrl::state()
|
||||
return static_cast<thread_state>(_this->m_sync & 3);
|
||||
}
|
||||
|
||||
void thread_ctrl::_wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
||||
void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
||||
{
|
||||
auto _this = g_tls_this_thread;
|
||||
|
||||
#ifdef __linux__
|
||||
static thread_local struct linux_timer_handle_t
|
||||
{
|
||||
// Allocate timer only if needed (i.e. someone calls _wait_for with alert and short period)
|
||||
// Allocate timer only if needed (i.e. someone calls wait_for with alert and short period)
|
||||
const int m_timer = timerfd_create(CLOCK_MONOTONIC, 0);
|
||||
|
||||
linux_timer_handle_t() noexcept
|
||||
@ -2383,6 +2383,58 @@ void thread_ctrl::_wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
||||
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
|
||||
}
|
||||
|
||||
void thread_ctrl::wait_for_accurate(u64 usec)
|
||||
{
|
||||
if (!usec)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
const auto until = std::chrono::steady_clock::now() + 1us * usec;
|
||||
|
||||
while (true)
|
||||
{
|
||||
#ifdef __linux__
|
||||
// NOTE: Assumption that timer initialization has succeeded
|
||||
u64 host_min_quantum = usec <= 1000 ? 10 : 50;
|
||||
#else
|
||||
// Host scheduler quantum for windows (worst case)
|
||||
// NOTE: On ps3 this function has very high accuracy
|
||||
constexpr u64 host_min_quantum = 500;
|
||||
#endif
|
||||
if (usec >= host_min_quantum)
|
||||
{
|
||||
#ifdef __linux__
|
||||
// Do not wait for the last quantum to avoid loss of accuracy
|
||||
wait_for(usec - ((usec % host_min_quantum) + host_min_quantum), false);
|
||||
#else
|
||||
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
||||
wait_for(usec - (usec % host_min_quantum), false);
|
||||
#endif
|
||||
}
|
||||
// TODO: Determine best value for yield delay
|
||||
else if (usec >= host_min_quantum / 2)
|
||||
{
|
||||
std::this_thread::yield();
|
||||
}
|
||||
else
|
||||
{
|
||||
busy_wait(100);
|
||||
}
|
||||
|
||||
const auto current = std::chrono::steady_clock::now();
|
||||
|
||||
if (current >= until)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
usec = (until - current).count();
|
||||
}
|
||||
}
|
||||
|
||||
std::string thread_ctrl::get_name_cached()
|
||||
{
|
||||
auto _this = thread_ctrl::g_tls_this_thread;
|
||||
|
@ -201,9 +201,6 @@ class thread_ctrl final
|
||||
// Target cpu core layout
|
||||
static atomic_t<native_core_arrangement> g_native_core_layout;
|
||||
|
||||
// Internal waiting function, may throw. Infinite value is -1.
|
||||
static void _wait_for(u64 usec, bool alert);
|
||||
|
||||
friend class thread_base;
|
||||
|
||||
// Optimized get_name() for logging
|
||||
@ -263,16 +260,16 @@ public:
|
||||
// Read current state, possibly executing some tasks
|
||||
static thread_state state();
|
||||
|
||||
// Wait once with timeout. May spuriously return false.
|
||||
static inline void wait_for(u64 usec, bool alert = true)
|
||||
{
|
||||
_wait_for(usec, alert);
|
||||
}
|
||||
// Wait once with timeout. Infinite value is -1.
|
||||
static void wait_for(u64 usec, bool alert = true);
|
||||
|
||||
// Waiting with accurate timeout
|
||||
static void wait_for_accurate(u64 usec);
|
||||
|
||||
// Wait.
|
||||
static inline void wait()
|
||||
{
|
||||
_wait_for(-1, true);
|
||||
wait_for(-1, true);
|
||||
}
|
||||
|
||||
// Wait for both thread sync var and provided atomic var
|
||||
|
@ -3849,6 +3849,12 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
}
|
||||
|
||||
const s64 out = channel.pop_wait(*this);
|
||||
|
||||
if (state & cpu_flag::wait)
|
||||
{
|
||||
wakeup_delay();
|
||||
}
|
||||
|
||||
static_cast<void>(test_stopped());
|
||||
return out;
|
||||
};
|
||||
@ -4068,6 +4074,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
thread_ctrl::wait_on(state, old, 100);
|
||||
}
|
||||
|
||||
wakeup_delay();
|
||||
check_state();
|
||||
return events.events & mask1;
|
||||
}
|
||||
@ -4114,6 +4121,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
||||
}
|
||||
|
||||
int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT);
|
||||
wakeup_delay();
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
@ -4680,6 +4688,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
thread_ctrl::wait_on(state, old);
|
||||
}
|
||||
|
||||
wakeup_delay();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -5000,6 +5009,12 @@ bool spu_thread::capture_local_storage() const
|
||||
return true;
|
||||
}
|
||||
|
||||
void spu_thread::wakeup_delay(u32 div) const
|
||||
{
|
||||
if (g_cfg.core.spu_wakeup_delay_mask & (1u << index))
|
||||
thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.core.spu_wakeup_delay, div));
|
||||
}
|
||||
|
||||
spu_function_logger::spu_function_logger(spu_thread& spu, const char* func)
|
||||
: spu(spu)
|
||||
{
|
||||
|
@ -872,6 +872,7 @@ public:
|
||||
void fast_call(u32 ls_addr);
|
||||
|
||||
bool capture_local_storage() const;
|
||||
void wakeup_delay(u32 div = 1) const;
|
||||
|
||||
// Convert specified SPU LS address to a pointer of specified (possibly converted to BE) type
|
||||
template<typename T>
|
||||
|
@ -2780,59 +2780,10 @@ namespace rsx
|
||||
return result;
|
||||
}
|
||||
|
||||
void thread::fifo_wake_delay(u64 div)
|
||||
void thread::fifo_wake_delay(u32 div)
|
||||
{
|
||||
// TODO: Nanoseconds accuracy
|
||||
u64 remaining = g_cfg.video.driver_wakeup_delay;
|
||||
|
||||
if (!remaining)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Some cases do not need full delay
|
||||
remaining = utils::aligned_div(remaining, div);
|
||||
const u64 until = rsx::uclock() + remaining;
|
||||
|
||||
while (true)
|
||||
{
|
||||
#ifdef __linux__
|
||||
// NOTE: Assumption that timer initialization has succeeded
|
||||
u64 host_min_quantum = remaining <= 1000 ? 10 : 50;
|
||||
#else
|
||||
// Host scheduler quantum for windows (worst case)
|
||||
// NOTE: On ps3 this function has very high accuracy
|
||||
constexpr u64 host_min_quantum = 500;
|
||||
#endif
|
||||
if (remaining >= host_min_quantum)
|
||||
{
|
||||
#ifdef __linux__
|
||||
// Do not wait for the last quantum to avoid loss of accuracy
|
||||
thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), false);
|
||||
#else
|
||||
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
||||
thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), false);
|
||||
#endif
|
||||
}
|
||||
// TODO: Determine best value for yield delay
|
||||
else if (remaining >= host_min_quantum / 2)
|
||||
{
|
||||
std::this_thread::yield();
|
||||
}
|
||||
else
|
||||
{
|
||||
busy_wait(100);
|
||||
}
|
||||
|
||||
const u64 current = rsx::uclock();
|
||||
|
||||
if (current >= until)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
remaining = until - current;
|
||||
}
|
||||
thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.video.driver_wakeup_delay, div));
|
||||
}
|
||||
|
||||
u32 thread::get_fifo_cmd() const
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "Capture/rsx_trace.h"
|
||||
#include "Capture/rsx_replay.h"
|
||||
|
||||
#include "Emu/system_config.h"
|
||||
#include "Emu/Cell/lv2/sys_rsx.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "Emu/system_config.h"
|
||||
@ -518,7 +519,7 @@ namespace rsx
|
||||
const char* file = __builtin_FILE(),
|
||||
const char* func = __builtin_FUNCTION());
|
||||
|
||||
static void fifo_wake_delay(u64 div = 1);
|
||||
static void fifo_wake_delay(u32 div = 1);
|
||||
u32 get_fifo_cmd() const;
|
||||
|
||||
void dump_regs(std::string&) const override;
|
||||
|
@ -87,6 +87,8 @@ struct cfg_root : cfg::node
|
||||
cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds
|
||||
|
||||
cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways
|
||||
cfg::uint<0, 3000> spu_wakeup_delay{ this, "SPU Wake-Up Delay", 0, true };
|
||||
cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true };
|
||||
#if defined (__linux__) || defined (__APPLE__)
|
||||
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
|
||||
#else
|
||||
@ -168,7 +170,7 @@ struct cfg_root : cfg::node
|
||||
cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 };
|
||||
cfg::_int<0, 16> shader_compiler_threads_count{ this, "Shader Compiler Threads", 0 };
|
||||
cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true };
|
||||
cfg::_int<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true };
|
||||
cfg::uint<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true };
|
||||
cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways
|
||||
cfg::_bool vblank_ntsc{ this, "Vblank NTSC Fixup", false, true };
|
||||
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console
|
||||
|
Loading…
Reference in New Issue
Block a user