1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 02:32:36 +01:00

Use Linux timers for sleeps up to 1ms (#6697)

* Use Linux timers for sleeps up to 1ms (v3)
The current sleep timer implementation basically offers two variants. Either
wait the specified time exactly with a condition variable (as host) or use a
combination of it with a thread yielding busy loop afterwards (usleep timer).

While the second one is very precise it consumes CPU loops for each wait call
below 50us. Games like Bomberman Ultra spam 30us waits and the emulator hogs
low power CPUs. Switching to host mode reduces CPU consumption but gives a
~50us penalty for each wait call. Thus extending all sleeps by a factor of
more than two.

The following bugfix tries to improve the system timer for Linux by using
Linux native timers for small wait calls below 1ms. This has two effects.

- Host wait setting has much less wait overhead
- usleep wait setting produces lower CPU overhead
This commit is contained in:
plappermaul 2019-10-09 19:03:34 +02:00 committed by Ivan
parent 6b1e1e4020
commit 925f2ce02f
3 changed files with 48 additions and 8 deletions

View File

@ -38,6 +38,9 @@
#include <sys/resource.h>
#include <time.h>
#endif
#ifdef __linux__
#include <sys/timerfd.h>
#endif
#include "sync.h"
#include "Log.h"
@ -1719,6 +1722,14 @@ void thread_base::initialize(bool(*wait_cb)(const void*))
#elif !defined(_WIN32)
pthread_setname_np(pthread_self(), m_name.get().substr(0, 15).c_str());
#endif
#ifdef __linux__
m_timer = timerfd_create(CLOCK_MONOTONIC, 0);
if (m_timer == -1)
{
LOG_ERROR(GENERAL, "Linux timer allocation failed, use wait_unlock() only");
}
#endif
}
void thread_base::notify_abort() noexcept
@ -1734,6 +1745,13 @@ bool thread_base::finalize(int) noexcept
// Report pending errors
error_code::error_report(0, 0, 0, 0);
#ifdef __linux__
if (m_timer != -1)
{
close(m_timer);
}
#endif
#ifdef _WIN32
ULONG64 cycles{};
QueryThreadCycleTime(GetCurrentThread(), &cycles);
@ -1781,6 +1799,23 @@ void thread_ctrl::_wait_for(u64 usec, bool alert /* true */)
{
auto _this = g_tls_this_thread;
#ifdef __linux__
if (!alert && _this->m_timer != -1 && usec > 0 && usec <= 1000)
{
struct itimerspec timeout;
u64 missed;
u64 nsec = usec * 1000ull;
timeout.it_value.tv_nsec = (nsec % 1000000000ull);
timeout.it_value.tv_sec = nsec / 1000000000ull;
timeout.it_interval.tv_sec = 0;
timeout.it_interval.tv_nsec = 0;
timerfd_settime(_this->m_timer, 0, &timeout, NULL);
read(_this->m_timer, &missed, sizeof(missed));
return;
}
#endif
std::unique_lock lock(_this->m_mutex, std::defer_lock);
while (true)

View File

@ -118,6 +118,11 @@ class thread_base
using native_entry = void*(*)(void* arg);
#endif
#ifdef __linux__
// Linux thread timer
int m_timer = -1;
#endif
// Thread handle (platform-specific)
atomic_t<std::uintptr_t> m_thread{0};

View File

@ -241,14 +241,6 @@ public:
// Now scale the result
usec = (std::min<u64>(usec, max_usec) * g_cfg.core.clocks_scale) / 100;
#ifdef __linux__
// TODO: Confirm whether Apple or any BSD can benefit from this as well
constexpr u32 host_min_quantum = 50;
#else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
constexpr u32 host_min_quantum = 500;
#endif
extern u64 get_system_time();
u64 passed = 0;
@ -258,6 +250,14 @@ public:
while (usec >= passed)
{
remaining = usec - passed;
#ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = is_usleep && remaining <= 1000 ? 16 : 50;
#else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
constexpr u64 host_min_quantum = 500;
#endif
if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
{