diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index e26891d85f..a502210ff0 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -293,3 +293,106 @@ void cond_one::imp_notify() noexcept futex(&m_value, FUTEX_WAKE_PRIVATE, 1); #endif } + +bool cond_x16::imp_wait(u32 _new, u32 slot, u64 _timeout) noexcept +{ + const u32 wait_bit = c_wait << slot; + const u32 lock_bit = c_lock << slot; + + const bool is_inf = _timeout > cond_variable::max_timeout; + +#ifdef _WIN32 + LARGE_INTEGER timeout; + timeout.QuadPart = _timeout * -10; + + if (HRESULT rc = _timeout ? NtWaitForKeyedEvent(nullptr, &m_cvx16, false, is_inf ? nullptr : &timeout) : WAIT_TIMEOUT) + { + verify(HERE), rc == WAIT_TIMEOUT; + + // Retire + const bool signaled = this->retire(slot); + + while (signaled) + { + timeout.QuadPart = 0; + + if (HRESULT rc2 = NtWaitForKeyedEvent(nullptr, &m_cvx16, false, &timeout)) + { + verify(HERE), rc2 == WAIT_TIMEOUT; + SwitchToThread(); + continue; + } + + return true; + } + + return false; + } + + if (!this->retire(slot)) + { + // Stolen notification: restore balance + NtReleaseKeyedEvent(nullptr, &m_cvx16, false, nullptr); + } +#else + timespec timeout; + timeout.tv_sec = _timeout / 1000000; + timeout.tv_nsec = (_timeout % 1000000) * 1000; + + for (u32 value = _new; ((value >> slot) & c_sig) != c_sig; value = m_cvx16) + { + const int err = futex(&m_cvx16, FUTEX_WAIT_PRIVATE, value, is_inf ? nullptr : &timeout) == 0 + ? 0 + : errno; + + // Normal or timeout wakeup + if (!err || (!is_inf && err == ETIMEDOUT)) + { + return this->retire(slot); + } + + // Not a wakeup + verify(HERE), err == EAGAIN; + } + + // Convert c_sig to c_lock + m_cvx16 &= ~wait_bit; +#endif + + return true; +} + +void cond_x16::imp_notify() noexcept +{ + auto [old, ok] = m_cvx16.fetch_op([](u32& v) + { + const u32 lock_mask = v >> 16; + const u32 wait_mask = v & 0xffff; + + if (const u32 sig_mask = lock_mask ^ wait_mask) + { + v |= sig_mask | sig_mask << 16; + return true; + } + + return false; + }); + + // Determine if some waiters need a syscall notification + const u32 wait_mask = old & (~old >> 16); + + if (UNLIKELY(!ok || !wait_mask)) + { + return; + } + +#ifdef _WIN32 + for (u32 i = 0; i < 16; i++) + { + if ((wait_mask >> i) & 1) + NtReleaseKeyedEvent(nullptr, &m_cvx16, false, nullptr); + } +#else + futex(&m_cvx16, FUTEX_WAKE_PRIVATE, INT_MAX); +#endif +} diff --git a/Utilities/cond.h b/Utilities/cond.h index a45a3afd92..b17442dc1a 100644 --- a/Utilities/cond.h +++ b/Utilities/cond.h @@ -3,6 +3,7 @@ #include "types.h" #include "Atomic.h" #include +#include "asm.h" // Lightweight condition variable class cond_variable @@ -179,3 +180,137 @@ public: imp_notify(); } }; + +// Packed version of cond_one, supports up to 16 readers. +class cond_x16 +{ + // For information, shouldn't modify + enum : u32 + { + c_wait = 1, + c_lock = 1 << 16, + c_sig = 1 << 16 | 1, + }; + + // Split in 16-bit parts for convenient bit combining + atomic_t m_cvx16{0}; + + // Effectively unused, only counts readers + atomic_t m_total{0}; + + class lock_x16 + { + cond_x16* m_this; + u32 m_slot; + + friend class cond_x16; + + public: + lock_x16(cond_x16* _this) noexcept + : m_this(_this) + { + // Spin if the number of readers exceeds 16 + while (UNLIKELY(m_this->m_total++ >= 16)) + m_this->m_total--; + + // Lock and remember obtained slot index + m_slot = m_this->m_cvx16.atomic_op([](u32& cvx16) + { + // Combine used bits and invert to find least significant bit unused + const u32 slot = utils::cnttz32(~((cvx16 & 0xffff) | (cvx16 >> 16)), true); + + // Set lock bit + cvx16 |= c_lock << slot; + + AUDIT(slot < 16); + return slot; + }); + } + + lock_x16(const lock_x16&) = delete; + + lock_x16& operator=(const lock_x16&) = delete; + + ~lock_x16() + { + // Clear the slot + m_this->m_cvx16 &= ~((c_wait | c_lock) << m_slot); + m_this->m_total -= 1; + } + + bool wait(u64 usec_timeout = -1) const noexcept + { + return m_this->wait(*this, usec_timeout); + } + }; + + bool imp_wait(u32 _new, u32 slot, u64 _timeout) noexcept; + void imp_notify() noexcept; + + bool retire(u32 slot) noexcept + { + const u32 wait_bit = c_wait << slot; + const u32 lock_bit = c_lock << slot; + + return m_cvx16.atomic_op([=](u32& cvx16) + { + if (cvx16 & lock_bit) + { + cvx16 &= ~wait_bit; + return true; + } + + cvx16 |= lock_bit; + cvx16 &= ~wait_bit; + return false; + }); + } + +public: + constexpr cond_x16() = default; + + lock_x16 lock_one() noexcept + { + return lock_x16(this); + } + + bool wait(lock_x16 const& lock, u64 usec_timeout = -1) noexcept + { + AUDIT(lock.m_this == this); + + const u32 wait_bit = c_wait << lock.m_slot; + const u32 lock_bit = c_lock << lock.m_slot; + + // Change state from c_lock to c_wait + const u32 new_ = m_cvx16.atomic_op([=](u32& cvx16) + { + if (cvx16 & wait_bit) + { + cvx16 &= ~wait_bit; + } + else + { + cvx16 |= wait_bit; + cvx16 &= ~lock_bit; + } + + return cvx16; + }); + + if (new_ & lock_bit) + { + // Already signaled, return without waiting + return true; + } + + return imp_wait(new_, lock.m_slot, usec_timeout); + } + + void notify_all() noexcept + { + if (LIKELY(!m_cvx16)) + return; + + imp_notify(); + } +}; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 10fc5b9d6e..afba170867 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -22,7 +22,6 @@ #include #include #include -#include const bool s_use_ssse3 = #ifdef _MSC_VER @@ -1710,9 +1709,7 @@ s64 spu_thread::get_ch_value(u32 ch) fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1); } - std::shared_lock pseudo_lock(vm::reservation_notifier(raddr, 128), std::try_to_lock); - - verify(HERE), pseudo_lock; + const auto pseudo_lock = vm::reservation_notifier(raddr, 128).lock_one(); while (res = get_events(), !res) { @@ -1721,7 +1718,7 @@ s64 spu_thread::get_ch_value(u32 ch) return -1; } - pseudo_lock.mutex()->wait(100); + pseudo_lock.wait(100); } return res; diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index 170de102da..0e1ab0332d 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -7,7 +7,7 @@ class shared_mutex; class cpu_thread; -class notifier; +class cond_x16; namespace vm { @@ -105,9 +105,9 @@ namespace vm } // Get reservation sync variable - inline notifier& reservation_notifier(u32 addr, u32 size) + inline cond_x16& reservation_notifier(u32 addr, u32 size) { - return *reinterpret_cast(g_reservations2 + addr / 128 * 8); + return *reinterpret_cast(g_reservations2 + addr / 128 * 8); } void reservation_lock_internal(atomic_t&);