mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
SPU: Implement execution wake-up delay
This commit is contained in:
parent
149c593d89
commit
cf0fcf5a2a
@ -2322,14 +2322,14 @@ thread_state thread_ctrl::state()
|
|||||||
return static_cast<thread_state>(_this->m_sync & 3);
|
return static_cast<thread_state>(_this->m_sync & 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread_ctrl::_wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
||||||
{
|
{
|
||||||
auto _this = g_tls_this_thread;
|
auto _this = g_tls_this_thread;
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
static thread_local struct linux_timer_handle_t
|
static thread_local struct linux_timer_handle_t
|
||||||
{
|
{
|
||||||
// Allocate timer only if needed (i.e. someone calls _wait_for with alert and short period)
|
// Allocate timer only if needed (i.e. someone calls wait_for with alert and short period)
|
||||||
const int m_timer = timerfd_create(CLOCK_MONOTONIC, 0);
|
const int m_timer = timerfd_create(CLOCK_MONOTONIC, 0);
|
||||||
|
|
||||||
linux_timer_handle_t() noexcept
|
linux_timer_handle_t() noexcept
|
||||||
@ -2383,6 +2383,58 @@ void thread_ctrl::_wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
|||||||
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
|
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void thread_ctrl::wait_for_accurate(u64 usec)
|
||||||
|
{
|
||||||
|
if (!usec)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
|
||||||
|
const auto until = std::chrono::steady_clock::now() + 1us * usec;
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
#ifdef __linux__
|
||||||
|
// NOTE: Assumption that timer initialization has succeeded
|
||||||
|
u64 host_min_quantum = usec <= 1000 ? 10 : 50;
|
||||||
|
#else
|
||||||
|
// Host scheduler quantum for windows (worst case)
|
||||||
|
// NOTE: On ps3 this function has very high accuracy
|
||||||
|
constexpr u64 host_min_quantum = 500;
|
||||||
|
#endif
|
||||||
|
if (usec >= host_min_quantum)
|
||||||
|
{
|
||||||
|
#ifdef __linux__
|
||||||
|
// Do not wait for the last quantum to avoid loss of accuracy
|
||||||
|
wait_for(usec - ((usec % host_min_quantum) + host_min_quantum), false);
|
||||||
|
#else
|
||||||
|
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
||||||
|
wait_for(usec - (usec % host_min_quantum), false);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
// TODO: Determine best value for yield delay
|
||||||
|
else if (usec >= host_min_quantum / 2)
|
||||||
|
{
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
busy_wait(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto current = std::chrono::steady_clock::now();
|
||||||
|
|
||||||
|
if (current >= until)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
usec = (until - current).count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string thread_ctrl::get_name_cached()
|
std::string thread_ctrl::get_name_cached()
|
||||||
{
|
{
|
||||||
auto _this = thread_ctrl::g_tls_this_thread;
|
auto _this = thread_ctrl::g_tls_this_thread;
|
||||||
|
@ -201,9 +201,6 @@ class thread_ctrl final
|
|||||||
// Target cpu core layout
|
// Target cpu core layout
|
||||||
static atomic_t<native_core_arrangement> g_native_core_layout;
|
static atomic_t<native_core_arrangement> g_native_core_layout;
|
||||||
|
|
||||||
// Internal waiting function, may throw. Infinite value is -1.
|
|
||||||
static void _wait_for(u64 usec, bool alert);
|
|
||||||
|
|
||||||
friend class thread_base;
|
friend class thread_base;
|
||||||
|
|
||||||
// Optimized get_name() for logging
|
// Optimized get_name() for logging
|
||||||
@ -263,16 +260,16 @@ public:
|
|||||||
// Read current state, possibly executing some tasks
|
// Read current state, possibly executing some tasks
|
||||||
static thread_state state();
|
static thread_state state();
|
||||||
|
|
||||||
// Wait once with timeout. May spuriously return false.
|
// Wait once with timeout. Infinite value is -1.
|
||||||
static inline void wait_for(u64 usec, bool alert = true)
|
static void wait_for(u64 usec, bool alert = true);
|
||||||
{
|
|
||||||
_wait_for(usec, alert);
|
// Waiting with accurate timeout
|
||||||
}
|
static void wait_for_accurate(u64 usec);
|
||||||
|
|
||||||
// Wait.
|
// Wait.
|
||||||
static inline void wait()
|
static inline void wait()
|
||||||
{
|
{
|
||||||
_wait_for(-1, true);
|
wait_for(-1, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for both thread sync var and provided atomic var
|
// Wait for both thread sync var and provided atomic var
|
||||||
|
@ -3849,6 +3849,12 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||||||
}
|
}
|
||||||
|
|
||||||
const s64 out = channel.pop_wait(*this);
|
const s64 out = channel.pop_wait(*this);
|
||||||
|
|
||||||
|
if (state & cpu_flag::wait)
|
||||||
|
{
|
||||||
|
wakeup_delay();
|
||||||
|
}
|
||||||
|
|
||||||
static_cast<void>(test_stopped());
|
static_cast<void>(test_stopped());
|
||||||
return out;
|
return out;
|
||||||
};
|
};
|
||||||
@ -4068,6 +4074,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||||||
thread_ctrl::wait_on(state, old, 100);
|
thread_ctrl::wait_on(state, old, 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wakeup_delay();
|
||||||
check_state();
|
check_state();
|
||||||
return events.events & mask1;
|
return events.events & mask1;
|
||||||
}
|
}
|
||||||
@ -4114,6 +4121,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT);
|
int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT);
|
||||||
|
wakeup_delay();
|
||||||
check_state();
|
check_state();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -4680,6 +4688,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
|||||||
thread_ctrl::wait_on(state, old);
|
thread_ctrl::wait_on(state, old);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wakeup_delay();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5000,6 +5009,12 @@ bool spu_thread::capture_local_storage() const
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void spu_thread::wakeup_delay(u32 div) const
|
||||||
|
{
|
||||||
|
if (g_cfg.core.spu_wakeup_delay_mask & (1u << index))
|
||||||
|
thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.core.spu_wakeup_delay, div));
|
||||||
|
}
|
||||||
|
|
||||||
spu_function_logger::spu_function_logger(spu_thread& spu, const char* func)
|
spu_function_logger::spu_function_logger(spu_thread& spu, const char* func)
|
||||||
: spu(spu)
|
: spu(spu)
|
||||||
{
|
{
|
||||||
|
@ -872,6 +872,7 @@ public:
|
|||||||
void fast_call(u32 ls_addr);
|
void fast_call(u32 ls_addr);
|
||||||
|
|
||||||
bool capture_local_storage() const;
|
bool capture_local_storage() const;
|
||||||
|
void wakeup_delay(u32 div = 1) const;
|
||||||
|
|
||||||
// Convert specified SPU LS address to a pointer of specified (possibly converted to BE) type
|
// Convert specified SPU LS address to a pointer of specified (possibly converted to BE) type
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -2780,59 +2780,10 @@ namespace rsx
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::fifo_wake_delay(u64 div)
|
void thread::fifo_wake_delay(u32 div)
|
||||||
{
|
{
|
||||||
// TODO: Nanoseconds accuracy
|
|
||||||
u64 remaining = g_cfg.video.driver_wakeup_delay;
|
|
||||||
|
|
||||||
if (!remaining)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some cases do not need full delay
|
// Some cases do not need full delay
|
||||||
remaining = utils::aligned_div(remaining, div);
|
thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.video.driver_wakeup_delay, div));
|
||||||
const u64 until = rsx::uclock() + remaining;
|
|
||||||
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
#ifdef __linux__
|
|
||||||
// NOTE: Assumption that timer initialization has succeeded
|
|
||||||
u64 host_min_quantum = remaining <= 1000 ? 10 : 50;
|
|
||||||
#else
|
|
||||||
// Host scheduler quantum for windows (worst case)
|
|
||||||
// NOTE: On ps3 this function has very high accuracy
|
|
||||||
constexpr u64 host_min_quantum = 500;
|
|
||||||
#endif
|
|
||||||
if (remaining >= host_min_quantum)
|
|
||||||
{
|
|
||||||
#ifdef __linux__
|
|
||||||
// Do not wait for the last quantum to avoid loss of accuracy
|
|
||||||
thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), false);
|
|
||||||
#else
|
|
||||||
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
|
||||||
thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), false);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
// TODO: Determine best value for yield delay
|
|
||||||
else if (remaining >= host_min_quantum / 2)
|
|
||||||
{
|
|
||||||
std::this_thread::yield();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
busy_wait(100);
|
|
||||||
}
|
|
||||||
|
|
||||||
const u64 current = rsx::uclock();
|
|
||||||
|
|
||||||
if (current >= until)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
remaining = until - current;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 thread::get_fifo_cmd() const
|
u32 thread::get_fifo_cmd() const
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "Capture/rsx_trace.h"
|
#include "Capture/rsx_trace.h"
|
||||||
#include "Capture/rsx_replay.h"
|
#include "Capture/rsx_replay.h"
|
||||||
|
|
||||||
|
#include "Emu/system_config.h"
|
||||||
#include "Emu/Cell/lv2/sys_rsx.h"
|
#include "Emu/Cell/lv2/sys_rsx.h"
|
||||||
#include "Emu/IdManager.h"
|
#include "Emu/IdManager.h"
|
||||||
#include "Emu/system_config.h"
|
#include "Emu/system_config.h"
|
||||||
@ -518,7 +519,7 @@ namespace rsx
|
|||||||
const char* file = __builtin_FILE(),
|
const char* file = __builtin_FILE(),
|
||||||
const char* func = __builtin_FUNCTION());
|
const char* func = __builtin_FUNCTION());
|
||||||
|
|
||||||
static void fifo_wake_delay(u64 div = 1);
|
static void fifo_wake_delay(u32 div = 1);
|
||||||
u32 get_fifo_cmd() const;
|
u32 get_fifo_cmd() const;
|
||||||
|
|
||||||
void dump_regs(std::string&) const override;
|
void dump_regs(std::string&) const override;
|
||||||
|
@ -87,6 +87,8 @@ struct cfg_root : cfg::node
|
|||||||
cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds
|
cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds
|
||||||
|
|
||||||
cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways
|
cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways
|
||||||
|
cfg::uint<0, 3000> spu_wakeup_delay{ this, "SPU Wake-Up Delay", 0, true };
|
||||||
|
cfg::uint<0, (1 << 6) - 1> spu_wakeup_delay_mask{ this, "SPU Wake-Up Delay Thread Mask", (1 << 6) - 1, true };
|
||||||
#if defined (__linux__) || defined (__APPLE__)
|
#if defined (__linux__) || defined (__APPLE__)
|
||||||
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
|
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy", sleep_timers_accuracy_level::_as_host, true };
|
||||||
#else
|
#else
|
||||||
@ -168,7 +170,7 @@ struct cfg_root : cfg::node
|
|||||||
cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 };
|
cfg::_int<1, 1024> min_scalable_dimension{ this, "Minimum Scalable Dimension", 16 };
|
||||||
cfg::_int<0, 16> shader_compiler_threads_count{ this, "Shader Compiler Threads", 0 };
|
cfg::_int<0, 16> shader_compiler_threads_count{ this, "Shader Compiler Threads", 0 };
|
||||||
cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true };
|
cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true };
|
||||||
cfg::_int<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true };
|
cfg::uint<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true };
|
||||||
cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways
|
cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways
|
||||||
cfg::_bool vblank_ntsc{ this, "Vblank NTSC Fixup", false, true };
|
cfg::_bool vblank_ntsc{ this, "Vblank NTSC Fixup", false, true };
|
||||||
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console
|
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console
|
||||||
|
Loading…
Reference in New Issue
Block a user