diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index fba6eee3ea..d5fdc82e1c 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -2,11 +2,7 @@ #include "sync.h" #include "lockless.h" -#include - -#ifndef _WIN32 -#include -#endif +#include // use constants, increase signal space @@ -60,251 +56,3 @@ void cond_variable::imp_wake(u32 _count) noexcept m_value.notify_one(); } } - -bool shared_cond::imp_wait(u32 slot, u64 _timeout) noexcept -{ - if (slot >= 32) - { - // Invalid argument, assume notified - return true; - } - - const u64 wait_bit = c_wait << slot; - const u64 lock_bit = c_lock << slot; - - // Change state from c_lock to c_wait - const u64 old_ = m_cvx32.fetch_op([=](u64& cvx32) - { - if (cvx32 & wait_bit) - { - // c_lock -> c_wait - cvx32 &= ~(lock_bit & ~wait_bit); - } - else - { - // c_sig -> c_lock - cvx32 |= lock_bit; - } - }); - - if ((old_ & wait_bit) == 0) - { - // Already signaled, return without waiting - return true; - } - - return balanced_wait_until(m_cvx32, _timeout, [&](u64& cvx32, auto... ret) -> int - { - if ((cvx32 & wait_bit) == 0) - { - // c_sig -> c_lock - cvx32 |= lock_bit; - return +1; - } - - if constexpr (sizeof...(ret)) - { - // Retire - cvx32 |= lock_bit; - return -1; - } - - return 0; - }); -} - -void shared_cond::imp_notify() noexcept -{ - auto [old, ok] = m_cvx32.fetch_op([](u64& cvx32) - { - if (const u64 sig_mask = cvx32 & 0xffffffff) - { - cvx32 &= 0xffffffffull << 32; - cvx32 |= sig_mask << 32; - return true; - } - - return false; - }); - - // Determine if some waiters need a syscall notification - const u64 wait_mask = old & (~old >> 32); - - if (UNLIKELY(!ok || !wait_mask)) - { - return; - } - - balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); -} - -void shared_cond::wait_all() noexcept -{ - // Try to acquire waiting state without locking but only if there are other locks - const auto [old_, result] = m_cvx32.fetch_op([](u64& cvx32) -> u64 - { - // Check waiting alone - if ((cvx32 & 0xffffffff) == 0) - { - return 0; - } - - // Combine used bits and invert to find least significant bit unused - const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); - - // Set waiting bit (does nothing if all slots are used) - cvx32 |= (1ull << slot) & 0xffffffff; - return 1ull << slot; - }); - - if (!result) - { - return; - } - - if (result > 0xffffffffu) - { - // All slots are used, fallback to spin wait - while (m_cvx32 & 0xffffffff) - { - busy_wait(); - } - - return; - } - - const u64 wait_bit = result; - const u64 lock_bit = wait_bit | (wait_bit << 32); - - balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int - { - if ((cvx32 & wait_bit) == 0) - { - // Remove signal and unlock at once - cvx32 &= ~lock_bit; - return +1; - } - - if constexpr (sizeof...(ret)) - { - cvx32 &= ~lock_bit; - return -1; - } - - return 0; - }); -} - -bool shared_cond::wait_all(shared_cond::shared_lock& lock) noexcept -{ - AUDIT(lock.m_this == this); - - if (lock.m_slot >= 32) - { - // Invalid argument, assume notified - return true; - } - - const u64 wait_bit = c_wait << lock.m_slot; - const u64 lock_bit = c_lock << lock.m_slot; - - // Try to acquire waiting state only if there are other locks - const auto [old_, not_alone] = m_cvx32.fetch_op([&](u64& cvx32) - { - // Check locking alone - if (((cvx32 >> 32) & cvx32) == (lock_bit >> 32)) - { - return false; - } - - // c_lock -> c_wait, c_sig -> unlock - cvx32 &= ~(lock_bit & ~wait_bit); - return true; - }); - - if (!not_alone) - { - return false; - } - else - { - // Set invalid slot to acknowledge unlocking - lock.m_slot = 33; - } - - if ((old_ & wait_bit) == 0) - { - // Already signaled, return without waiting - return true; - } - - balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int - { - if ((cvx32 & wait_bit) == 0) - { - // Remove signal and unlock at once - cvx32 &= ~lock_bit; - return +1; - } - - if constexpr (sizeof...(ret)) - { - cvx32 &= ~lock_bit; - return -1; - } - - return 0; - }); - - return true; -} - -bool shared_cond::notify_all(shared_cond::shared_lock& lock) noexcept -{ - AUDIT(lock.m_this == this); - - if (lock.m_slot >= 32) - { - // Invalid argument - return false; - } - - const u64 slot_mask = c_sig << lock.m_slot; - - auto [old, ok] = m_cvx32.fetch_op([&](u64& cvx32) - { - if (((cvx32 << 32) & cvx32) != slot_mask) - { - return false; - } - - if (const u64 sig_mask = cvx32 & 0xffffffff) - { - cvx32 &= (0xffffffffull << 32) & ~slot_mask; - cvx32 |= (sig_mask << 32) & ~slot_mask; - return true; - } - - return false; - }); - - if (!ok) - { - // Not an exclusive reader - return false; - } - - // Set invalid slot to acknowledge unlocking - lock.m_slot = 34; - - // Determine if some waiters need a syscall notification - const u64 wait_mask = old & (~old >> 32); - - if (UNLIKELY(!wait_mask)) - { - return true; - } - - balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); - return true; -} diff --git a/Utilities/cond.h b/Utilities/cond.h index 1e6d2f7b62..82e3959145 100644 --- a/Utilities/cond.h +++ b/Utilities/cond.h @@ -94,110 +94,3 @@ public: static constexpr u64 max_timeout = UINT64_MAX / 1000; }; - -// Condition variable fused with a pseudo-mutex supporting only reader locks (up to 32 readers). -class shared_cond -{ - // For information, shouldn't modify - enum : u64 - { - // Wait bit is aligned for compatibility with 32-bit futex. - c_wait = 1, - c_sig = 1ull << 32, - c_lock = 1ull << 32 | 1, - }; - - // Split in 32-bit parts for convenient bit combining - atomic_t m_cvx32{0}; - - class shared_lock - { - shared_cond* m_this; - u32 m_slot; - - friend class shared_cond; - - public: - shared_lock(shared_cond* _this) noexcept - : m_this(_this) - { - // Lock and remember obtained slot index - m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32) - { - // Combine used bits and invert to find least significant bit unused - const u32 slot = static_cast(utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true)); - - // Set lock bits (does nothing if all slots are used) - const u64 bit = (1ull << slot) & 0xffffffff; - cvx32 |= bit | (bit << 32); - return slot; - }); - } - - shared_lock(const shared_lock&) = delete; - - shared_lock(shared_lock&& rhs) - : m_this(rhs.m_this) - , m_slot(rhs.m_slot) - { - rhs.m_slot = 32; - } - - shared_lock& operator=(const shared_lock&) = delete; - - ~shared_lock() - { - // Clear the slot (does nothing if all slots are used) - const u64 bit = (1ull << m_slot) & 0xffffffff; - m_this->m_cvx32 &= ~(bit | (bit << 32)); - } - - explicit operator bool() const noexcept - { - // Check success - return m_slot < 32; - } - - bool wait(u64 usec_timeout = -1) const noexcept - { - return m_this->wait(*this, usec_timeout); - } - }; - - bool imp_wait(u32 slot, u64 _timeout) noexcept; - void imp_notify() noexcept; - -public: - constexpr shared_cond() = default; - - shared_lock try_shared_lock() noexcept - { - return shared_lock(this); - } - - u32 count() const noexcept - { - const u64 cvx32 = m_cvx32; - return utils::popcnt32(static_cast(cvx32 | (cvx32 >> 32))); - } - - bool wait(shared_lock const& lock, u64 usec_timeout = -1) noexcept - { - AUDIT(lock.m_this == this); - return imp_wait(lock.m_slot, usec_timeout); - } - - void wait_all() noexcept; - - bool wait_all(shared_lock& lock) noexcept; - - void notify_all() noexcept - { - if (LIKELY(!m_cvx32)) - return; - - imp_notify(); - } - - bool notify_all(shared_lock& lock) noexcept; -}; diff --git a/Utilities/sync.h b/Utilities/sync.h index d4fedeabea..caed15337a 100644 --- a/Utilities/sync.h +++ b/Utilities/sync.h @@ -149,160 +149,3 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t return g_futex(uaddr, futex_op, val, timeout, mask); #endif } - -template -bool balanced_wait_until(atomic_t& var, u64 usec_timeout, Pred&& pred) -{ - static_assert(sizeof(T) == 4 || sizeof(T) == 8); - - const bool is_inf = usec_timeout > u64{UINT32_MAX / 1000} * 1000000; - - // Optional second argument indicates that the predicate should try to retire - auto test_pred = [&](T& _new, auto... args) - { - T old = var.load(); - - while (true) - { - _new = old; - - // Zero indicates failure without modifying the value - // Negative indicates failure but modifies the value - auto ret = std::invoke(std::forward(pred), _new, args...); - - if (LIKELY(!ret || var.compare_exchange(old, _new))) - { - return ret > 0; - } - } - }; - - T value; - -#ifdef _WIN32 - if (OptWaitOnAddress) - { - while (!test_pred(value)) - { - if (OptWaitOnAddress(&var, &value, sizeof(T), is_inf ? INFINITE : usec_timeout / 1000)) - { - if (!test_pred(value, nullptr)) - { - return false; - } - - break; - } - - if (GetLastError() == ERROR_TIMEOUT) - { - // Retire - return test_pred(value, nullptr); - } - } - - return true; - } - - LARGE_INTEGER timeout; - timeout.QuadPart = usec_timeout * -10; - - if (!usec_timeout || NtWaitForKeyedEvent(nullptr, &var, false, is_inf ? nullptr : &timeout)) - { - // Timed out: retire - if (!test_pred(value, nullptr)) - { - return false; - } - - // Signaled in the last moment: restore balance - NtWaitForKeyedEvent(nullptr, &var, false, nullptr); - return true; - } - - if (!test_pred(value, nullptr)) - { - // Stolen notification: restore balance - NtReleaseKeyedEvent(nullptr, &var, false, nullptr); - return false; - } - - return true; -#else - struct timespec timeout; - timeout.tv_sec = usec_timeout / 1000000; - timeout.tv_nsec = (usec_timeout % 1000000) * 1000; - - char* ptr = reinterpret_cast(&var); - - if constexpr (sizeof(T) == 8) - { - ptr += 4 * IS_BE_MACHINE; - } - - while (!test_pred(value)) - { - if (futex(ptr, FUTEX_WAIT_PRIVATE, static_cast(value), is_inf ? nullptr : &timeout) == 0) - { - if (!test_pred(value, nullptr)) - { - return false; - } - - break; - } - - switch (errno) - { - case EAGAIN: break; - case ETIMEDOUT: return test_pred(value, nullptr); - default: verify("Unknown futex error" HERE), 0; - } - } - - return true; -#endif -} - -template -void balanced_awaken(atomic_t& var, u32 weight) -{ - static_assert(sizeof(T) == 4 || sizeof(T) == 8); - -#ifdef _WIN32 - if (OptWaitOnAddress) - { - if (All || weight > 3) - { - OptWakeByAddressAll(&var); - return; - } - - for (u32 i = 0; i < weight; i++) - { - OptWakeByAddressSingle(&var); - } - - return; - } - - for (u32 i = 0; i < weight; i++) - { - NtReleaseKeyedEvent(nullptr, &var, false, nullptr); - } -#else - char* ptr = reinterpret_cast(&var); - - if constexpr (sizeof(T) == 8) - { - ptr += 4 * IS_BE_MACHINE; - } - - if (All || weight) - { - futex(ptr, FUTEX_WAKE_PRIVATE, All ? INT_MAX : std::min(INT_MAX, weight)); - } - - return; -#endif -} diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 78edcd2e5c..ffc1e04611 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2385,13 +2385,6 @@ s64 spu_thread::get_ch_value(u32 ch) fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1); } - const auto pseudo_lock = vm::reservation_notifier(raddr, 128).try_shared_lock(); - - if (!pseudo_lock) - { - fmt::throw_exception("Unexpected: reservation notifier lock failed"); - } - while (res = get_events(), !res) { state += cpu_flag::wait; @@ -2401,7 +2394,7 @@ s64 spu_thread::get_ch_value(u32 ch) return -1; } - pseudo_lock.wait(100); + vm::reservation_notifier(raddr, 128).wait(rtime, atomic_wait_timeout{30000}); } check_state(); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 314a2d580c..56d6544fc9 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -17,8 +17,6 @@ #include #include -static_assert(sizeof(shared_cond) == 8, "Unexpected size of shared_cond"); - namespace vm { static u8* memory_reserve_4GiB(std::uintptr_t _addr = 0) @@ -50,9 +48,6 @@ namespace vm // Reservation stats (compressed x16) u8* const g_reservations = memory_reserve_4GiB((std::uintptr_t)g_stat_addr); - // Reservation sync variables - u8* const g_reservations2 = g_reservations + 0x10000000; - // Memory locations std::vector> g_locations; @@ -634,11 +629,9 @@ namespace vm if (addr == 0x10000) { utils::memory_commit(g_reservations, 0x1000); - utils::memory_commit(g_reservations2, 0x1000); } utils::memory_commit(g_reservations + addr / 16, size / 16); - utils::memory_commit(g_reservations2 + addr / 16, size / 16); } else { @@ -646,12 +639,10 @@ namespace vm for (u32 i = 0; i < 6; i++) { utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000); - utils::memory_commit(g_reservations2 + addr / 16 + i * 0x10000, 0x4000); } // End of the address space utils::memory_commit(g_reservations + 0xfff0000, 0x10000); - utils::memory_commit(g_reservations2 + 0xfff0000, 0x10000); } if (flags & 0x100) diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index 381fd6139c..c200964f63 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -14,7 +14,6 @@ namespace vm extern u8* const g_exec_addr; extern u8* const g_stat_addr; extern u8* const g_reservations; - extern u8* const g_reservations2; struct writer_lock; diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index e3ee7ea6b1..b90f3d1c48 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -21,9 +21,9 @@ namespace vm } // Get reservation sync variable - inline shared_cond& reservation_notifier(u32 addr, u32 size) + inline atomic_t& reservation_notifier(u32 addr, u32 size) { - return *reinterpret_cast(g_reservations2 + addr / 128 * 8); + return reinterpret_cast*>(g_reservations)[addr / 128]; } void reservation_lock_internal(atomic_t&);