Remove shared_cond and simplify reservation waiting

Use atomic wait for reservations Cleanup some obsolete code
2024-11-22 02:32:36 +01:00 · 2019-09-09 11:29:14 +03:00 · 2019-09-09 11:29:14 +03:00 · 0a96497e13
commit 0a96497e13
parent d13ff285d1
7 changed files with 4 additions and 537 deletions
--- a/Utilities/cond.cpp
+++ b/Utilities/cond.cpp
@ -2,11 +2,7 @@
 #include "sync.h"
 #include "lockless.h"

-#include <limits.h>
-
-#ifndef _WIN32
-#include <thread>
-#endif
+#include <climits>

 // use constants, increase signal space

@ -60,251 +56,3 @@ void cond_variable::imp_wake(u32 _count) noexcept
 		m_value.notify_one();
 	}
 }
-
-bool shared_cond::imp_wait(u32 slot, u64 _timeout) noexcept
-{
-	if (slot >= 32)
-	{
-		// Invalid argument, assume notified
-		return true;
-	}
-
-	const u64 wait_bit = c_wait << slot;
-	const u64 lock_bit = c_lock << slot;
-
-	// Change state from c_lock to c_wait
-	const u64 old_ = m_cvx32.fetch_op([=](u64& cvx32)
-	{
-		if (cvx32 & wait_bit)
-		{
-			// c_lock -> c_wait
-			cvx32 &= ~(lock_bit & ~wait_bit);
-		}
-		else
-		{
-			// c_sig -> c_lock
-			cvx32 |= lock_bit;
-		}
-	});
-
-	if ((old_ & wait_bit) == 0)
-	{
-		// Already signaled, return without waiting
-		return true;
-	}
-
-	return balanced_wait_until(m_cvx32, _timeout, [&](u64& cvx32, auto... ret) -> int
-	{
-		if ((cvx32 & wait_bit) == 0)
-		{
-			// c_sig -> c_lock
-			cvx32 |= lock_bit;
-			return +1;
-		}
-
-		if constexpr (sizeof...(ret))
-		{
-			// Retire
-			cvx32 |= lock_bit;
-			return -1;
-		}
-
-		return 0;
-	});
-}
-
-void shared_cond::imp_notify() noexcept
-{
-	auto [old, ok] = m_cvx32.fetch_op([](u64& cvx32)
-	{
-		if (const u64 sig_mask = cvx32 & 0xffffffff)
-		{
-			cvx32 &= 0xffffffffull << 32;
-			cvx32 |= sig_mask << 32;
-			return true;
-		}
-
-		return false;
-	});
-
-	// Determine if some waiters need a syscall notification
-	const u64 wait_mask = old & (~old >> 32);
-
-	if (UNLIKELY(!ok || !wait_mask))
-	{
-		return;
-	}
-
-	balanced_awaken<true>(m_cvx32, utils::popcnt32(wait_mask));
-}
-
-void shared_cond::wait_all() noexcept
-{
-	// Try to acquire waiting state without locking but only if there are other locks
-	const auto [old_, result] = m_cvx32.fetch_op([](u64& cvx32) -> u64
-	{
-		// Check waiting alone
-		if ((cvx32 & 0xffffffff) == 0)
-		{
-			return 0;
-		}
-
-		// Combine used bits and invert to find least significant bit unused
-		const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true);
-
-		// Set waiting bit (does nothing if all slots are used)
-		cvx32 |= (1ull << slot) & 0xffffffff;
-		return 1ull << slot;
-	});
-
-	if (!result)
-	{
-		return;
-	}
-
-	if (result > 0xffffffffu)
-	{
-		// All slots are used, fallback to spin wait
-		while (m_cvx32 & 0xffffffff)
-		{
-			busy_wait();
-		}
-
-		return;
-	}
-
-	const u64 wait_bit = result;
-	const u64 lock_bit = wait_bit | (wait_bit << 32);
-
-	balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int
-	{
-		if ((cvx32 & wait_bit) == 0)
-		{
-			// Remove signal and unlock at once
-			cvx32 &= ~lock_bit;
-			return +1;
-		}
-
-		if constexpr (sizeof...(ret))
-		{
-			cvx32 &= ~lock_bit;
-			return -1;
-		}
-
-		return 0;
-	});
-}
-
-bool shared_cond::wait_all(shared_cond::shared_lock& lock) noexcept
-{
-	AUDIT(lock.m_this == this);
-
-	if (lock.m_slot >= 32)
-	{
-		// Invalid argument, assume notified
-		return true;
-	}
-
-	const u64 wait_bit = c_wait << lock.m_slot;
-	const u64 lock_bit = c_lock << lock.m_slot;
-
-	// Try to acquire waiting state only if there are other locks
-	const auto [old_, not_alone] = m_cvx32.fetch_op([&](u64& cvx32)
-	{
-		// Check locking alone
-		if (((cvx32 >> 32) & cvx32) == (lock_bit >> 32))
-		{
-			return false;
-		}
-
-		// c_lock -> c_wait, c_sig -> unlock
-		cvx32 &= ~(lock_bit & ~wait_bit);
-		return true;
-	});
-
-	if (!not_alone)
-	{
-		return false;
-	}
-	else
-	{
-		// Set invalid slot to acknowledge unlocking
-		lock.m_slot = 33;
-	}
-
-	if ((old_ & wait_bit) == 0)
-	{
-		// Already signaled, return without waiting
-		return true;
-	}
-
-	balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int
-	{
-		if ((cvx32 & wait_bit) == 0)
-		{
-			// Remove signal and unlock at once
-			cvx32 &= ~lock_bit;
-			return +1;
-		}
-
-		if constexpr (sizeof...(ret))
-		{
-			cvx32 &= ~lock_bit;
-			return -1;
-		}
-
-		return 0;
-	});
-
-	return true;
-}
-
-bool shared_cond::notify_all(shared_cond::shared_lock& lock) noexcept
-{
-	AUDIT(lock.m_this == this);
-
-	if (lock.m_slot >= 32)
-	{
-		// Invalid argument
-		return false;
-	}
-
-	const u64 slot_mask = c_sig << lock.m_slot;
-
-	auto [old, ok] = m_cvx32.fetch_op([&](u64& cvx32)
-	{
-		if (((cvx32 << 32) & cvx32) != slot_mask)
-		{
-			return false;
-		}
-
-		if (const u64 sig_mask = cvx32 & 0xffffffff)
-		{
-			cvx32 &= (0xffffffffull << 32) & ~slot_mask;
-			cvx32 |= (sig_mask << 32) & ~slot_mask;
-			return true;
-		}
-
-		return false;
-	});
-
-	if (!ok)
-	{
-		// Not an exclusive reader
-		return false;
-	}
-
-	// Set invalid slot to acknowledge unlocking
-	lock.m_slot = 34;
-
-	// Determine if some waiters need a syscall notification
-	const u64 wait_mask = old & (~old >> 32);
-
-	if (UNLIKELY(!wait_mask))
-	{
-		return true;
-	}
-
-	balanced_awaken<true>(m_cvx32, utils::popcnt32(wait_mask));
-	return true;
-}
--- a/Utilities/cond.h
+++ b/Utilities/cond.h
@ -94,110 +94,3 @@ public:

 	static constexpr u64 max_timeout = UINT64_MAX / 1000;
 };
-
-// Condition variable fused with a pseudo-mutex supporting only reader locks (up to 32 readers).
-class shared_cond
-{
-	// For information, shouldn't modify
-	enum : u64
-	{
-		// Wait bit is aligned for compatibility with 32-bit futex.
-		c_wait = 1,
-		c_sig  = 1ull << 32,
-		c_lock = 1ull << 32 | 1,
-	};
-
-	// Split in 32-bit parts for convenient bit combining
-	atomic_t<u64> m_cvx32{0};
-
-	class shared_lock
-	{
-		shared_cond* m_this;
-		u32 m_slot;
-
-		friend class shared_cond;
-
-	public:
-		shared_lock(shared_cond* _this) noexcept
-			: m_this(_this)
-		{
-			// Lock and remember obtained slot index
-			m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32)
-			{
-				// Combine used bits and invert to find least significant bit unused
-				const u32 slot = static_cast<u32>(utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true));
-
-				// Set lock bits (does nothing if all slots are used)
-				const u64 bit = (1ull << slot) & 0xffffffff;
-				cvx32 |= bit | (bit << 32);
-				return slot;
-			});
-		}
-
-		shared_lock(const shared_lock&) = delete;
-
-		shared_lock(shared_lock&& rhs)
-			: m_this(rhs.m_this)
-			, m_slot(rhs.m_slot)
-		{
-			rhs.m_slot = 32;
-		}
-
-		shared_lock& operator=(const shared_lock&) = delete;
-
-		~shared_lock()
-		{
-			// Clear the slot (does nothing if all slots are used)
-			const u64 bit = (1ull << m_slot) & 0xffffffff;
-			m_this->m_cvx32 &= ~(bit | (bit << 32));
-		}
-
-		explicit operator bool() const noexcept
-		{
-			// Check success
-			return m_slot < 32;
-		}
-
-		bool wait(u64 usec_timeout = -1) const noexcept
-		{
-			return m_this->wait(*this, usec_timeout);
-		}
-	};
-
-	bool imp_wait(u32 slot, u64 _timeout) noexcept;
-	void imp_notify() noexcept;
-
-public:
-	constexpr shared_cond() = default;
-
-	shared_lock try_shared_lock() noexcept
-	{
-		return shared_lock(this);
-	}
-
-	u32 count() const noexcept
-	{
-		const u64 cvx32 = m_cvx32;
-		return utils::popcnt32(static_cast<u32>(cvx32 | (cvx32 >> 32)));
-	}
-
-	bool wait(shared_lock const& lock, u64 usec_timeout = -1) noexcept
-	{
-		AUDIT(lock.m_this == this);
-		return imp_wait(lock.m_slot, usec_timeout);
-	}
-
-	void wait_all() noexcept;
-
-	bool wait_all(shared_lock& lock) noexcept;
-
-	void notify_all() noexcept
-	{
-		if (LIKELY(!m_cvx32))
-			return;
-
-		imp_notify();
-	}
-
-	bool notify_all(shared_lock& lock) noexcept;
-};
--- a/Utilities/sync.h
+++ b/Utilities/sync.h
@ -149,160 +149,3 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
 	return g_futex(uaddr, futex_op, val, timeout, mask);
 #endif
 }
-
-template <typename T, typename Pred>
-bool balanced_wait_until(atomic_t<T>& var, u64 usec_timeout, Pred&& pred)
-{
-	static_assert(sizeof(T) == 4 || sizeof(T) == 8);
-
-	const bool is_inf = usec_timeout > u64{UINT32_MAX / 1000} * 1000000;
-
-	// Optional second argument indicates that the predicate should try to retire
-	auto test_pred = [&](T& _new, auto... args)
-	{
-		T old = var.load();
-
-		while (true)
-		{
-			_new = old;
-
-			// Zero indicates failure without modifying the value
-			// Negative indicates failure but modifies the value
-			auto ret = std::invoke(std::forward<Pred>(pred), _new, args...);
-
-			if (LIKELY(!ret || var.compare_exchange(old, _new)))
-			{
-				return ret > 0;
-			}
-		}
-	};
-
-	T value;
-
-#ifdef _WIN32
-	if (OptWaitOnAddress)
-	{
-		while (!test_pred(value))
-		{
-			if (OptWaitOnAddress(&var, &value, sizeof(T), is_inf ? INFINITE : usec_timeout / 1000))
-			{
-				if (!test_pred(value, nullptr))
-				{
-					return false;
-				}
-
-				break;
-			}
-
-			if (GetLastError() == ERROR_TIMEOUT)
-			{
-				// Retire
-				return test_pred(value, nullptr);
-			}
-		}
-
-		return true;
-	}
-
-	LARGE_INTEGER timeout;
-	timeout.QuadPart = usec_timeout * -10;
-
-	if (!usec_timeout || NtWaitForKeyedEvent(nullptr, &var, false, is_inf ? nullptr : &timeout))
-	{
-		// Timed out: retire
-		if (!test_pred(value, nullptr))
-		{
-			return false;
-		}
-
-		// Signaled in the last moment: restore balance
-		NtWaitForKeyedEvent(nullptr, &var, false, nullptr);
-		return true;
-	}
-
-	if (!test_pred(value, nullptr))
-	{
-		// Stolen notification: restore balance
-		NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
-		return false;
-	}
-
-	return true;
-#else
-	struct timespec timeout;
-	timeout.tv_sec  = usec_timeout / 1000000;
-	timeout.tv_nsec = (usec_timeout % 1000000) * 1000;
-
-	char* ptr = reinterpret_cast<char*>(&var);
-
-	if constexpr (sizeof(T) == 8)
-	{
-		ptr += 4 * IS_BE_MACHINE;
-	}
-
-	while (!test_pred(value))
-	{
-		if (futex(ptr, FUTEX_WAIT_PRIVATE, static_cast<u32>(value), is_inf ? nullptr : &timeout) == 0)
-		{
-			if (!test_pred(value, nullptr))
-			{
-				return false;
-			}
-
-			break;
-		}
-
-		switch (errno)
-		{
-		case EAGAIN: break;
-		case ETIMEDOUT: return test_pred(value, nullptr);
-		default: verify("Unknown futex error" HERE), 0;
-		}
-	}
-
-	return true;
-#endif
-}
-
-template <bool All = false, typename T>
-void balanced_awaken(atomic_t<T>& var, u32 weight)
-{
-	static_assert(sizeof(T) == 4 || sizeof(T) == 8);
-
-#ifdef _WIN32
-	if (OptWaitOnAddress)
-	{
-		if (All || weight > 3)
-		{
-			OptWakeByAddressAll(&var);
-			return;
-		}
-
-		for (u32 i = 0; i < weight; i++)
-		{
-			OptWakeByAddressSingle(&var);
-		}
-
-		return;
-	}
-
-	for (u32 i = 0; i < weight; i++)
-	{
-		NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
-	}
-#else
-	char* ptr = reinterpret_cast<char*>(&var);
-
-	if constexpr (sizeof(T) == 8)
-	{
-		ptr += 4 * IS_BE_MACHINE;
-	}
-
-	if (All || weight)
-	{
-		futex(ptr, FUTEX_WAKE_PRIVATE, All ? INT_MAX : std::min<u32>(INT_MAX, weight));
-	}
-
-	return;
-#endif
-}
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -2385,13 +2385,6 @@ s64 spu_thread::get_ch_value(u32 ch)
 				fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1);
 			}

-			const auto pseudo_lock = vm::reservation_notifier(raddr, 128).try_shared_lock();
-
-			if (!pseudo_lock)
-			{
-				fmt::throw_exception("Unexpected: reservation notifier lock failed");
-			}
-
 			while (res = get_events(), !res)
 			{
 				state += cpu_flag::wait;
@ -2401,7 +2394,7 @@ s64 spu_thread::get_ch_value(u32 ch)
 					return -1;
 				}

-				pseudo_lock.wait(100);
+				vm::reservation_notifier(raddr, 128).wait(rtime, atomic_wait_timeout{30000});
 			}

 			check_state();
--- a/rpcs3/Emu/Memory/vm.cpp
+++ b/rpcs3/Emu/Memory/vm.cpp
@ -17,8 +17,6 @@
 #include <thread>
 #include <deque>

-static_assert(sizeof(shared_cond) == 8, "Unexpected size of shared_cond");
-
 namespace vm
 {
 	static u8* memory_reserve_4GiB(std::uintptr_t _addr = 0)
@ -50,9 +48,6 @@ namespace vm
 	// Reservation stats (compressed x16)
 	u8* const g_reservations = memory_reserve_4GiB((std::uintptr_t)g_stat_addr);

-	// Reservation sync variables
-	u8* const g_reservations2 = g_reservations + 0x10000000;
-
 	// Memory locations
 	std::vector<std::shared_ptr<block_t>> g_locations;

@ -634,11 +629,9 @@ namespace vm
 			if (addr == 0x10000)
 			{
 				utils::memory_commit(g_reservations, 0x1000);
-				utils::memory_commit(g_reservations2, 0x1000);
 			}

 			utils::memory_commit(g_reservations + addr / 16, size / 16);
-			utils::memory_commit(g_reservations2 + addr / 16, size / 16);
 		}
 		else
 		{
@ -646,12 +639,10 @@ namespace vm
 			for (u32 i = 0; i < 6; i++)
 			{
 				utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000);
-				utils::memory_commit(g_reservations2 + addr / 16 + i * 0x10000, 0x4000);
 			}

 			// End of the address space
 			utils::memory_commit(g_reservations + 0xfff0000, 0x10000);
-			utils::memory_commit(g_reservations2 + 0xfff0000, 0x10000);
 		}

 		if (flags & 0x100)
--- a/rpcs3/Emu/Memory/vm.h
+++ b/rpcs3/Emu/Memory/vm.h
@ -14,7 +14,6 @@ namespace vm
 	extern u8* const g_exec_addr;
 	extern u8* const g_stat_addr;
 	extern u8* const g_reservations;
-	extern u8* const g_reservations2;

 	struct writer_lock;

--- a/rpcs3/Emu/Memory/vm_reservation.h
+++ b/rpcs3/Emu/Memory/vm_reservation.h
@ -21,9 +21,9 @@ namespace vm
 	}

 	// Get reservation sync variable
-	inline shared_cond& reservation_notifier(u32 addr, u32 size)
+	inline atomic_t<u64>& reservation_notifier(u32 addr, u32 size)
 	{
-		return *reinterpret_cast<shared_cond*>(g_reservations2 + addr / 128 * 8);
+		return reinterpret_cast<atomic_t<u64>*>(g_reservations)[addr / 128];
 	}

 	void reservation_lock_internal(atomic_t<u64>&);