diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index 39c240775f..2e1a900a05 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -196,46 +196,51 @@ void unique_cond::imp_notify() noexcept balanced_awaken(m_value, 1); } -bool cond_x16::imp_wait(u32 slot, u64 _timeout) noexcept +bool shared_cond::imp_wait(u32 slot, u64 _timeout) noexcept { - const u32 wait_bit = c_wait << slot; - const u32 lock_bit = c_lock << slot; + if (slot >= 32) + { + // Invalid argument, assume notified + return true; + } + + const u64 wait_bit = c_wait << slot; + const u64 lock_bit = c_lock << slot; // Change state from c_lock to c_wait - const u32 old_ = m_cvx16.fetch_op([=](u32& cvx16) + const u64 old_ = m_cvx32.fetch_op([=](u64& cvx32) { - if (cvx16 & wait_bit) + if (cvx32 & wait_bit) { - // c_sig -> c_lock - cvx16 &= ~wait_bit; + // c_lock -> c_wait + cvx32 &= ~(lock_bit & ~wait_bit); } else { - cvx16 |= wait_bit; - cvx16 &= ~lock_bit; + // c_sig -> c_lock + cvx32 |= lock_bit; } }); - if (old_ & wait_bit) + if ((old_ & wait_bit) == 0) { // Already signaled, return without waiting return true; } - return balanced_wait_until(m_cvx16, _timeout, [&](u32& cvx16, auto... ret) -> int + return balanced_wait_until(m_cvx32, _timeout, [&](u64& cvx32, auto... ret) -> int { - if (cvx16 & lock_bit) + if ((cvx32 & wait_bit) == 0) { // c_sig -> c_lock - cvx16 &= ~wait_bit; + cvx32 |= lock_bit; return +1; } if constexpr (sizeof...(ret)) { // Retire - cvx16 |= lock_bit; - cvx16 &= ~wait_bit; + cvx32 |= lock_bit; return -1; } @@ -243,16 +248,14 @@ bool cond_x16::imp_wait(u32 slot, u64 _timeout) noexcept }); } -void cond_x16::imp_notify() noexcept +void shared_cond::imp_notify() noexcept { - auto [old, ok] = m_cvx16.fetch_op([](u32& v) + auto [old, ok] = m_cvx32.fetch_op([](u64& cvx32) { - const u32 lock_mask = v >> 16; - const u32 wait_mask = v & 0xffff; - - if (const u32 sig_mask = lock_mask ^ wait_mask) + if (const u64 sig_mask = cvx32 & 0xffffffff) { - v |= sig_mask | sig_mask << 16; + cvx32 &= 0xffffffffull << 32; + cvx32 |= sig_mask << 32; return true; } @@ -260,14 +263,14 @@ void cond_x16::imp_notify() noexcept }); // Determine if some waiters need a syscall notification - const u32 wait_mask = old & (~old >> 16); + const u64 wait_mask = old & (~old >> 32); if (UNLIKELY(!ok || !wait_mask)) { return; } - balanced_awaken(m_cvx16, utils::popcnt32(wait_mask)); + balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); } bool lf_queue_base::wait(u64 _timeout) diff --git a/Utilities/cond.h b/Utilities/cond.h index 0cadedac12..dc716fab88 100644 --- a/Utilities/cond.h +++ b/Utilities/cond.h @@ -176,61 +176,60 @@ public: } }; -// Packed version of cond_one, supports up to 16 readers. -class cond_x16 +// Condition variable fused with a pseudo-mutex supporting only reader locks (up to 32 readers). +class shared_cond { // For information, shouldn't modify - enum : u32 + enum : u64 { + // Wait bit is aligned for compatibility with 32-bit futex. c_wait = 1, - c_lock = 1 << 16, - c_sig = 1 << 16 | 1, + c_sig = 1ull << 32, + c_lock = 1ull << 32 | 1, }; - // Split in 16-bit parts for convenient bit combining - atomic_t m_cvx16{0}; + // Split in 32-bit parts for convenient bit combining + atomic_t m_cvx32{0}; - // Effectively unused, only counts readers - atomic_t m_total{0}; - - class lock_x16 + class shared_lock { - cond_x16* m_this; + shared_cond* m_this; u32 m_slot; - friend class cond_x16; + friend class shared_cond; public: - lock_x16(cond_x16* _this) noexcept + shared_lock(shared_cond* _this) noexcept : m_this(_this) { - // Spin if the number of readers exceeds 16 - while (UNLIKELY(m_this->m_total++ >= 16)) - m_this->m_total--; - // Lock and remember obtained slot index - m_slot = m_this->m_cvx16.atomic_op([](u32& cvx16) + m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32) { // Combine used bits and invert to find least significant bit unused - const u32 slot = utils::cnttz32(~((cvx16 & 0xffff) | (cvx16 >> 16)), true); + const u32 slot = utils::cnttz32(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); - // Set lock bit - cvx16 |= c_lock << slot; - - AUDIT(slot < 16); + // Set lock bits (does nothing if all slots are used) + const u64 bit = (1ull << slot) & 0xffffffff; + cvx32 |= bit | (bit << 32); return slot; }); } - lock_x16(const lock_x16&) = delete; + shared_lock(const shared_lock&) = delete; - lock_x16& operator=(const lock_x16&) = delete; + shared_lock& operator=(const shared_lock&) = delete; - ~lock_x16() + ~shared_lock() { - // Clear the slot - m_this->m_cvx16 &= ~((c_wait | c_lock) << m_slot); - m_this->m_total -= 1; + // Clear the slot (does nothing if all slots are used) + const u64 bit = (1ull << m_slot) & 0xffffffff; + m_this->m_cvx32 &= ~(bit | (bit << 32)); + } + + explicit operator bool() const noexcept + { + // Check success + return m_slot < 32; } bool wait(u64 usec_timeout = -1) const noexcept @@ -243,14 +242,20 @@ class cond_x16 void imp_notify() noexcept; public: - constexpr cond_x16() = default; + constexpr shared_cond() = default; - lock_x16 lock_one() noexcept + shared_lock try_shared_lock() noexcept { - return lock_x16(this); + return shared_lock(this); } - bool wait(lock_x16 const& lock, u64 usec_timeout = -1) noexcept + u32 count() const noexcept + { + const u64 cvx32 = m_cvx32; + return utils::popcnt32(cvx32 | (cvx32 >> 32)); + } + + bool wait(shared_lock const& lock, u64 usec_timeout = -1) noexcept { AUDIT(lock.m_this == this); return imp_wait(lock.m_slot, usec_timeout); @@ -258,7 +263,7 @@ public: void notify_all() noexcept { - if (LIKELY(!m_cvx16)) + if (LIKELY(!m_cvx32)) return; imp_notify(); diff --git a/Utilities/sync.h b/Utilities/sync.h index 859dc2b845..e9c89f9180 100644 --- a/Utilities/sync.h +++ b/Utilities/sync.h @@ -233,9 +233,16 @@ bool balanced_wait_until(atomic_t& var, u64 usec_timeout, Pred&& pred) timeout.tv_sec = usec_timeout / 1000000; timeout.tv_nsec = (usec_timeout % 1000000) * 1000; + char* ptr = reinterpret_cast(&var); + + if constexpr (sizeof(T) == 8) + { + ptr += 4 * IS_BE_MACHINE; + } + while (!test_pred(value)) { - if (futex(&var, FUTEX_WAIT_PRIVATE, static_cast(value), is_inf ? nullptr : &timeout) == 0) + if (futex(ptr, FUTEX_WAIT_PRIVATE, static_cast(value), is_inf ? nullptr : &timeout) == 0) { if (!test_pred(value, nullptr)) { @@ -284,9 +291,16 @@ void balanced_awaken(atomic_t& var, u32 weight) NtReleaseKeyedEvent(nullptr, &var, false, nullptr); } #else + char* ptr = reinterpret_cast(&var); + + if constexpr (sizeof(T) == 8) + { + ptr += 4 * IS_BE_MACHINE; + } + if (All || weight) { - futex(&var, FUTEX_WAKE_PRIVATE, All ? INT_MAX : std::min(INT_MAX, weight)); + futex(ptr, FUTEX_WAKE_PRIVATE, All ? INT_MAX : std::min(INT_MAX, weight)); } return; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index e34d5a26b6..2fbc731501 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2401,7 +2401,12 @@ s64 spu_thread::get_ch_value(u32 ch) fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1); } - const auto pseudo_lock = vm::reservation_notifier(raddr, 128).lock_one(); + const auto pseudo_lock = vm::reservation_notifier(raddr, 128).try_shared_lock(); + + if (!pseudo_lock) + { + fmt::throw_exception("Unexpected: reservation notifier lock failed"); + } while (res = get_events(), !res) { diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index 0042cbfec1..2f58ca60e9 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -7,7 +7,7 @@ class shared_mutex; class cpu_thread; -class cond_x16; +class shared_cond; namespace vm { @@ -102,9 +102,9 @@ namespace vm } // Get reservation sync variable - inline cond_x16& reservation_notifier(u32 addr, u32 size) + inline shared_cond& reservation_notifier(u32 addr, u32 size) { - return *reinterpret_cast(g_reservations2 + addr / 128 * 8); + return *reinterpret_cast(g_reservations2 + addr / 128 * 8); } void reservation_lock_internal(atomic_t&);