Rewrite cpu_thread::suspend_all

Now it's a function of higher order. Make only one thread do the hard work of thread pausing.
2024-11-24 19:52:37 +01:00 · 2020-10-09 20:33:12 +03:00 · 2020-10-09 20:33:12 +03:00 · 050c3e1d6b
commit 050c3e1d6b
parent 6d83c9cc0e
10 changed files with 299 additions and 415 deletions
--- a/Utilities/JIT.cpp
+++ b/Utilities/JIT.cpp
@ -201,7 +201,7 @@ asmjit::JitRuntime& asmjit::get_global_runtime()
 	return g_rt;
 }

-void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
+asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
 {
 	Label fall = c.newLabel();
 	Label begin = c.newLabel();
@ -234,7 +234,10 @@ void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fall
 	c.jae(fallback);
 	c.align(kAlignCode, 16);
 	c.bind(begin);
-	c.xbegin(fall);
+	return fall;
+
+	// xbegin should be issued manually, allows to add more check before entering transaction
+	//c.xbegin(fall);
 }

 void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code)
--- a/Utilities/JIT.h
+++ b/Utilities/JIT.h
@ -56,7 +56,7 @@ namespace asmjit
 	asmjit::JitRuntime& get_global_runtime();

 	// Emit xbegin and adjacent loop, return label at xbegin
-	void build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than);
+	[[nodiscard]] asmjit::Label build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than);

 	// Emit xabort
 	void build_transaction_abort(X86Assembler& c, unsigned char code);
--- a/Utilities/mutex.cpp
+++ b/Utilities/mutex.cpp
@ -39,84 +39,6 @@ void shared_mutex::imp_unlock_shared(u32 old)
 	}
 }

-void shared_mutex::imp_lock_low(u32 val)
-{
-	verify("shared_mutex underflow" HERE), val < c_err;
-
-	for (int i = 0; i < 10; i++)
-	{
-		busy_wait();
-
-		if (try_lock_low())
-		{
-			return;
-		}
-	}
-
-	// Acquire writer lock and downgrade
-	const u32 old = m_value.fetch_add(c_one);
-
-	if (old == 0)
-	{
-		lock_downgrade();
-		return;
-	}
-
-	verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
-	imp_wait();
-	lock_downgrade();
-}
-
-void shared_mutex::imp_unlock_low(u32 old)
-{
-	verify("shared_mutex underflow" HERE), old - 1 < c_err;
-
-	// Check reader count, notify the writer if necessary
-	if ((old - 1) % c_vip == 0)
-	{
-		imp_signal();
-	}
-}
-
-void shared_mutex::imp_lock_vip(u32 val)
-{
-	verify("shared_mutex underflow" HERE), val < c_err;
-
-	for (int i = 0; i < 10; i++)
-	{
-		busy_wait();
-
-		if (try_lock_vip())
-		{
-			return;
-		}
-	}
-
-	// Acquire writer lock and downgrade
-	const u32 old = m_value.fetch_add(c_one);
-
-	if (old == 0)
-	{
-		lock_downgrade_to_vip();
-		return;
-	}
-
-	verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
-	imp_wait();
-	lock_downgrade_to_vip();
-}
-
-void shared_mutex::imp_unlock_vip(u32 old)
-{
-	verify("shared_mutex underflow" HERE), old - 1 < c_err;
-
-	// Check reader count, notify the writer if necessary
-	if ((old - 1) % c_one / c_vip == 0)
-	{
-		imp_signal();
-	}
-}
-
 void shared_mutex::imp_wait()
 {
 	while (true)
@ -241,18 +163,3 @@ void shared_mutex::imp_lock_unlock()
 	imp_wait();
 	unlock();
 }
-
-bool shared_mutex::downgrade_unique_vip_lock_to_low_or_unlock()
-{
-	return m_value.atomic_op([](u32& value)
-	{
-		if (value % c_one / c_vip == 1)
-		{
-			value -= c_vip - 1;
-			return true;
-		}
-
-		value -= c_vip;
-		return false;
-	});
-}
--- a/Utilities/mutex.h
+++ b/Utilities/mutex.h
@ -12,17 +12,12 @@ class shared_mutex final
 		c_one = 1u << 14, // Fixed-point 1.0 value (one writer, max_readers = c_one - 1)
 		c_sig = 1u << 30,
 		c_err = 1u << 31,
-		c_vip = 1u << 7,
 	};

 	atomic_t<u32> m_value{};

 	void imp_lock_shared(u32 val);
 	void imp_unlock_shared(u32 old);
-	void imp_lock_low(u32 val);
-	void imp_unlock_low(u32 old);
-	void imp_lock_vip(u32 val);
-	void imp_unlock_vip(u32 old);
 	void imp_wait();
 	void imp_signal();
 	void imp_lock(u32 val);
@ -88,64 +83,6 @@ public:
 		}
 	}

-	bool try_lock_low()
-	{
-		const u32 value = m_value.load();
-
-		// Conditional increment
-		return value < c_vip - 1 && m_value.compare_and_swap_test(value, value + 1);
-	}
-
-	void lock_low()
-	{
-		const u32 value = m_value.load();
-
-		if (value >= c_vip - 1 || !m_value.compare_and_swap_test(value, value + 1)) [[unlikely]]
-		{
-			imp_lock_low(value);
-		}
-	}
-
-	void unlock_low()
-	{
-		// Unconditional decrement (can result in broken state)
-		const u32 value = m_value.fetch_sub(1);
-
-		if (value >= c_one) [[unlikely]]
-		{
-			imp_unlock_low(value);
-		}
-	}
-
-	bool try_lock_vip()
-	{
-		const u32 value = m_value.load();
-
-		// Conditional increment
-		return (value < c_one - 1 || value & (c_one - c_vip)) && (value % c_vip) == 0 && m_value.compare_and_swap_test(value, value + c_vip);
-	}
-
-	void lock_vip()
-	{
-		const u32 value = m_value.load();
-
-		if ((value >= c_one - 1 && !(value & (c_one - c_vip))) || (value % c_vip) || !m_value.compare_and_swap_test(value, value + c_vip)) [[unlikely]]
-		{
-			imp_lock_vip(value);
-		}
-	}
-
-	void unlock_vip()
-	{
-		// Unconditional decrement (can result in broken state)
-		const u32 value = m_value.fetch_sub(c_vip);
-
-		if (value >= c_one) [[unlikely]]
-		{
-			imp_unlock_vip(value);
-		}
-	}
-
 	bool try_lock()
 	{
 		return m_value.compare_and_swap_test(0, c_one);
@ -214,12 +151,6 @@ public:
 		m_value -= c_one - 1;
 	}

-	void lock_downgrade_to_vip()
-	{
-		// Convert to vip lock (can result in broken state)
-		m_value -= c_one - c_vip;
-	}
-
 	// Optimized wait for lockability without locking, relaxed
 	void lock_unlock()
 	{
@ -240,12 +171,9 @@ public:
 	{
 		return m_value.load() < c_one - 1;
 	}
-
-	// Special purpose logic
-	bool downgrade_unique_vip_lock_to_low_or_unlock();
 };

-// Simplified shared (reader) lock implementation. Mutually incompatible with low_lock and vip_lock.
+// Simplified shared (reader) lock implementation.
 class reader_lock final
 {
 	shared_mutex& m_mutex;
@ -283,47 +211,3 @@ public:
 		m_upgraded ? m_mutex.unlock() : m_mutex.unlock_shared();
 	}
 };
-
-// Special shared (reader) lock, mutually exclusive with vip locks. Mutually incompatible with normal shared (reader) lock.
-class low_lock final
-{
-	shared_mutex& m_mutex;
-
-public:
-	low_lock(const low_lock&) = delete;
-
-	low_lock& operator=(const low_lock&) = delete;
-
-	explicit low_lock(shared_mutex& mutex)
-		: m_mutex(mutex)
-	{
-		m_mutex.lock_low();
-	}
-
-	~low_lock()
-	{
-		m_mutex.unlock_low();
-	}
-};
-
-// Special shared (reader) lock, mutually exclusive with low locks. Mutually incompatible with normal shared (reader) lock.
-class vip_lock final
-{
-	shared_mutex& m_mutex;
-
-public:
-	vip_lock(const vip_lock&) = delete;
-
-	vip_lock& operator=(const vip_lock&) = delete;
-
-	explicit vip_lock(shared_mutex& mutex)
-		: m_mutex(mutex)
-	{
-		m_mutex.lock_vip();
-	}
-
-	~vip_lock()
-	{
-		m_mutex.unlock_vip();
-	}
-};
--- a/rpcs3/Emu/CPU/CPUThread.cpp
+++ b/rpcs3/Emu/CPU/CPUThread.cpp
@ -15,6 +15,7 @@

 DECLARE(cpu_thread::g_threads_created){0};
 DECLARE(cpu_thread::g_threads_deleted){0};
+DECLARE(cpu_thread::g_suspend_counter){0};

 LOG_CHANNEL(profiler);
 LOG_CHANNEL(sys_log, "SYS");
@ -245,6 +246,9 @@ struct cpu_counter
 	// For synchronizing suspend_all operation
 	alignas(64) shared_mutex cpu_suspend_lock;

+	// Workload linked list
+	alignas(64) atomic_t<cpu_thread::suspend_work*> cpu_suspend_work{};
+
 	// Semaphore for global thread array (global counter)
 	alignas(64) atomic_t<u32> cpu_array_sema{0};

@ -306,7 +310,7 @@ struct cpu_counter
 };

 template <typename F>
-void for_all_cpu(F&& func) noexcept
+void for_all_cpu(F func) noexcept
 {
 	auto ctr = g_fxo->get<cpu_counter>();

@ -475,6 +479,7 @@ bool cpu_thread::check_state() noexcept

 	bool cpu_sleep_called = false;
 	bool escape, retval;
+	u64 susp_ctr = -1;

 	while (true)
 	{
@ -483,6 +488,16 @@ bool cpu_thread::check_state() noexcept
 		{
 			bool store = false;

+			// Easy way obtain suspend counter
+			if (flags & cpu_flag::pause && !(flags & cpu_flag::wait))
+			{
+				susp_ctr = g_suspend_counter;
+			}
+			else
+			{
+				susp_ctr = -1;
+			}
+
 			if (flags & cpu_flag::signal)
 			{
 				flags -= cpu_flag::signal;
@ -559,8 +574,22 @@ bool cpu_thread::check_state() noexcept
 				continue;
 			}

-			// If only cpu_flag::pause was set, notification won't arrive
-			g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
+			// If only cpu_flag::pause was set, wait on suspend counter instead
+			if (state0 & cpu_flag::pause)
+			{
+				// Hard way
+				if (susp_ctr == umax)
+				{
+					g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
+					continue;
+				}
+
+				// Wait for current suspend_all operation
+				while (busy_wait(), g_suspend_counter == susp_ctr)
+				{
+					g_suspend_counter.wait(susp_ctr);
+				}
+			}
 		}
 	}
 }
@ -641,69 +670,114 @@ std::string cpu_thread::dump_misc() const
 	return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load());
 }

-cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept
-	: m_this(_this)
+void cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
 {
-	if (m_this)
-	{
-		m_this->state += cpu_flag::wait;
-	}
+	// Can't allow pre-set wait bit (it'd be a problem)
+	verify(HERE), !_this || !(_this->state & cpu_flag::wait);

-	g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_vip();
+	// Value must be reliable because cpu_flag::wait hasn't been observed only (but not if pause is set)
+	const u64 susp_ctr = g_suspend_counter;

-	for_all_cpu([](cpu_thread* cpu)
+	// Try to push workload
+	auto& queue = g_fxo->get<cpu_counter>()->cpu_suspend_work;
+
+	do
 	{
-		// Should be atomic
-		if (!(cpu->state & cpu_flag::pause))
+		// Load current head
+		next = queue.load();
+
+		if (!_this && next)
 		{
-			cpu->state += cpu_flag::pause;
+			// If _this == nullptr, it only works if this is the first workload pushed
+			g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
+			continue;
 		}
-	});
+	}
+	while (!queue.compare_and_swap_test(next, this));

-	busy_wait(500);
-
-	while (true)
+	if (!next)
 	{
-		bool ok = true;
+		// First thread to push the work to the workload list pauses all threads and processes it
+		std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);

 		for_all_cpu([&](cpu_thread* cpu)
 		{
-			if (!(cpu->state & cpu_flag::wait))
+			if (!(cpu->state & cpu_flag::pause) && cpu != _this)
 			{
-				ok = false;
+				cpu->state += cpu_flag::pause;
 			}
 		});

-		if (ok) [[likely]]
+		busy_wait(500);
+
+		while (true)
 		{
-			break;
+			bool ok = true;
+
+			for_all_cpu([&](cpu_thread* cpu)
+			{
+				if (!(cpu->state & cpu_flag::wait) && cpu != _this)
+				{
+					ok = false;
+				}
+			});
+
+			if (ok) [[likely]]
+			{
+				break;
+			}
 		}

-		busy_wait(500);
-	}
-}
+		// Extract queue and reverse element order (FILO to FIFO) (TODO: maybe leave order as is?)
+		auto* head = queue.exchange(nullptr);
+
+		if (auto* prev = head->next)
+		{
+			head->next = nullptr;
+
+			do
+			{
+				auto* pre2 = prev->next;
+				prev->next = head;
+
+				head = std::exchange(prev, pre2);
+			}
+			while (prev);
+		}
+
+		// Execute all stored workload
+		for (; head; head = head->next)
+		{
+			head->exec(head->func_ptr, head->res_buf);
+		}
+
+		// Finalization
+		g_suspend_counter++;

-cpu_thread::suspend_all::~suspend_all()
-{
-	// Make sure the latest thread does the cleanup and notifies others
-	if (g_fxo->get<cpu_counter>()->cpu_suspend_lock.downgrade_unique_vip_lock_to_low_or_unlock())
-	{
 		for_all_cpu([&](cpu_thread* cpu)
 		{
-			cpu->state -= cpu_flag::pause;
+			if (cpu != _this)
+			{
+				cpu->state -= cpu_flag::pause;
+			}
 		});
-
-		g_fxo->get<cpu_counter>()->cpu_suspend_lock.unlock_low();
 	}
 	else
 	{
-		g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
+		// Seems safe to set pause on self because wait flag hasn't been observed yet
+		_this->state += cpu_flag::pause + cpu_flag::wait;
+
+		// Subscribe for notification broadcast
+		while (busy_wait(), g_suspend_counter == susp_ctr)
+		{
+			g_suspend_counter.wait(susp_ctr);
+		}
+
+		_this->check_state();
+		return;
 	}

-	if (m_this)
-	{
-		m_this->check_state();
-	}
+	g_suspend_counter.notify_all();
 }

 void cpu_thread::stop_all() noexcept
@ -716,7 +790,7 @@ void cpu_thread::stop_all() noexcept
 	}
 	else
 	{
-		::vip_lock lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
+		std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);

 		for_all_cpu([](cpu_thread* cpu)
 		{
--- a/rpcs3/Emu/CPU/CPUThread.h
+++ b/rpcs3/Emu/CPU/CPUThread.h
@ -88,7 +88,7 @@ private:

 public:
 	// Thread stats for external observation
-	static atomic_t<u64> g_threads_created, g_threads_deleted;
+	static atomic_t<u64> g_threads_created, g_threads_deleted, g_suspend_counter;

 	// Get thread name (as assigned to named_thread)
 	std::string get_name() const;
@ -123,18 +123,50 @@ public:
 	// Callback for cpu_flag::ret
 	virtual void cpu_return() {}

-	// Thread locker
-	class suspend_all
+	// For internal use
+	struct suspend_work
 	{
-		cpu_thread* m_this;
+		void* func_ptr;
+		void* res_buf;

-	public:
-		suspend_all(cpu_thread* _this) noexcept;
-		suspend_all(const suspend_all&) = delete;
-		suspend_all& operator=(const suspend_all&) = delete;
-		~suspend_all();
+		// Type-erased op executor
+		void (*exec)(void* func, void* res);
+
+		// Next object in the linked list
+		suspend_work* next;
+
+		// Internal method
+		void push(cpu_thread* _this) noexcept;
 	};

+	// Suspend all threads and execute op (may be executed by other thread than caller!)
+	template <typename F>
+	static auto suspend_all(cpu_thread* _this, F op)
+	{
+		if constexpr (std::is_void_v<std::invoke_result_t<F>>)
+		{
+			suspend_work work{&op, nullptr, [](void* func, void*)
+			{
+				(*static_cast<F*>(func))();
+			}};
+
+			work.push(_this);
+			return;
+		}
+		else
+		{
+			std::invoke_result_t<F> result;
+
+			suspend_work work{&op, &result, [](void* func, void* res_buf)
+			{
+				*static_cast<std::invoke_result_t<F>*>(res_buf) = (*static_cast<F*>(func))();
+			}};
+
+			work.push(_this);
+			return result;
+		}
+	}
+
 	// Stop all threads with cpu_flag::dbg_global_stop
 	static void stop_all() noexcept;

--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@ -1275,7 +1275,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
 	}

 	// Begin transaction
-	build_transaction_enter(c, fall, x86::r12, 4);
+	Label tx0 = build_transaction_enter(c, fall, x86::r12, 4);
+	c.xbegin(tx0);
 	c.mov(x86::rax, x86::qword_ptr(x86::rbx));
 	c.test(x86::eax, vm::rsrv_unique_lock);
 	c.jnz(skip);
@ -1336,7 +1337,6 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
 	c.bind(fall);
 	c.sar(x86::eax, 24);
 	c.js(fail);
-	c.lock().bts(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::wait));

 	// Touch memory if transaction failed without RETRY flag on the first attempt
 	c.cmp(x86::r12, 1);
@ -1361,7 +1361,14 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
 	c.cmp(x86::rax, x86::r13);
 	c.jne(fail2);

-	build_transaction_enter(c, fall2, x86::r12, 666);
+	Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
+	c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
+	c.jc(fail3);
+	c.mov(x86::rax, x86::qword_ptr(x86::rbx));
+	c.and_(x86::rax, -128);
+	c.cmp(x86::rax, x86::r13);
+	c.jne(fail2);
+	c.xbegin(tx1);

 	if (s_tsx_avx)
 	{
@ -1535,30 +1542,18 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
 				default: break;
 				}

-				cpu_thread::suspend_all cpu_lock(&ppu);
-
-				// Obtain unique lock
-				while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
+				return cpu_thread::suspend_all(&ppu, [&]
 				{
-					busy_wait(100);
-
-					// Give up if reservation has been updated
-					if ((res & -128) != rtime)
+					if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
 					{
-						res -= 1;
-						return false;
+						data.release(reg_value);
+						res += 127;
+						return true;
 					}
-				}

-				if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
-				{
-					data.release(reg_value);
-					res += 63;
-					return true;
-				}
-
-				res -= (vm::rsrv_unique_lock + 1);
-				return false;
+					res -= 1;
+					return false;
+				});
 			}

 			while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -376,7 +376,8 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
 	}

 	// Begin transaction
-	build_transaction_enter(c, fall, x86::r12, 4);
+	Label tx0 = build_transaction_enter(c, fall, x86::r12, 4);
+	c.xbegin(tx0);
 	c.mov(x86::rax, x86::qword_ptr(x86::rbx));
 	c.test(x86::eax, vm::rsrv_unique_lock);
 	c.jnz(skip);
@ -450,7 +451,6 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
 	c.bind(fall);
 	c.sar(x86::eax, 24);
 	c.js(fail);
-	c.lock().bts(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::wait));

 	// Touch memory if transaction failed without RETRY flag on the first attempt
 	c.cmp(x86::r12, 1);
@ -471,11 +471,14 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
 	c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax);
 	c.test(x86::eax, vm::rsrv_unique_lock);
 	c.jnz(fail3);
+	c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
+	c.jc(fail3);
 	c.and_(x86::rax, -128);
 	c.cmp(x86::rax, x86::r13);
 	c.jne(fail2);

-	build_transaction_enter(c, fall2, x86::r12, 666);
+	Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
+	c.xbegin(tx1);

 	if (s_tsx_avx)
 	{
@ -648,7 +651,8 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
 	}

 	// Begin transaction
-	build_transaction_enter(c, fall, x86::r12, 8);
+	Label tx0 = build_transaction_enter(c, fall, x86::r12, 8);
+	c.xbegin(tx0);
 	c.test(x86::dword_ptr(x86::rbx), vm::rsrv_unique_lock);
 	c.jnz(skip);

@ -683,7 +687,6 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
 	//c.jmp(fall);

 	c.bind(fall);
-	c.lock().bts(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::wait));

 	// Touch memory if transaction failed without RETRY flag on the first attempt
 	c.cmp(x86::r12, 1);
@ -703,7 +706,12 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
 	c.test(x86::eax, vm::rsrv_unique_lock);
 	c.jnz(fall2);

-	build_transaction_enter(c, fall2, x86::r12, 666);
+	Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
+
+	// Check pause flag
+	c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
+	c.jc(fall2);
+	c.xbegin(tx1);

 	if (s_tsx_avx)
 	{
@ -1848,38 +1856,26 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)

 				if (render) render->pause();

-				cpu_thread::suspend_all cpu_lock(this);
-
-				// Obtain unique lock
-				while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
+				const bool ok = cpu_thread::suspend_all(this, [&]()
 				{
-					busy_wait(100);
-
-					// Give up if reservation has been updated
-					if ((res & -128) != rtime)
+					if ((res & -128) == rtime)
 					{
-						res -= 1;
-						if (render) render->unpause();
-						return false;
+						auto& data = vm::_ref<spu_rdata_t>(addr);
+
+						if (cmp_rdata(rdata, data))
+						{
+							mov_rdata(data, to_write);
+							res += 127;
+							return true;
+						}
 					}
-				}

-				if ((res & -128) == rtime)
-				{
-					auto& data = vm::_ref<spu_rdata_t>(addr);
+					res -= 1;
+					return false;
+				});

-					if (cmp_rdata(rdata, data))
-					{
-						mov_rdata(data, to_write);
-						res += 63;
-						if (render) render->unpause();
-						return true;
-					}
-				}
-
-				res -= (vm::rsrv_unique_lock | 1);
 				if (render) render->unpause();
-				return false;
+				return ok;
 			}
 			case 1: return true;
 			case 0: return false;
@ -1973,15 +1969,11 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)

 		if (result == 0)
 		{
-			cpu_thread::suspend_all cpu_lock(cpu);
-
-			while (vm::reservation_acquire(addr, 128).bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
+			cpu_thread::suspend_all(cpu, [&]
 			{
-				busy_wait(100);
-			}
-
-			mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
-			vm::reservation_acquire(addr, 128) += 63;
+				mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
+				vm::reservation_acquire(addr, 128) += 127;
+			});
 		}

 		if (render) render->unpause();
--- a/rpcs3/Emu/Memory/vm.cpp
+++ b/rpcs3/Emu/Memory/vm.cpp
@ -497,33 +497,25 @@ namespace vm

 	void reservation_op_internal(u32 addr, std::function<bool()> func)
 	{
-		const auto _cpu = get_current_cpu_thread();
-
-		// Acknowledge contender if necessary (TODO: check)
-		_cpu->state += cpu_flag::wait;
-
+		const bool ok = cpu_thread::suspend_all(get_current_cpu_thread(), [&]
 		{
-			cpu_thread::suspend_all cpu_lock(_cpu);
-
-			// Wait to acquire unique lock
-			while (vm::reservation_acquire(addr, 128).bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
-			{
-				busy_wait(100);
-			}
-
 			if (func())
 			{
 				// Success, release all locks if necessary
-				vm::reservation_acquire(addr, 128) += 63;
+				vm::reservation_acquire(addr, 128) += 127;
+				return true;
 			}
 			else
 			{
-				// Fake update (TODO)
-				vm::reservation_acquire(addr, 128) += 63;
+				vm::reservation_acquire(addr, 128) -= 1;
+				return false;
 			}
-		}
+		});

-		vm::reservation_notifier(addr, 128).notify_all();
+		if (ok)
+		{
+			vm::reservation_notifier(addr, 128).notify_all();
+		}
 	}

 	void reservation_escape_internal()
--- a/rpcs3/rpcs3qt/cheat_manager.cpp
+++ b/rpcs3/rpcs3qt/cheat_manager.cpp
@ -319,35 +319,36 @@ std::vector<u32> cheat_engine::search(const T value, const std::vector<u32>& to_
 	if (Emu.IsStopped())
 		return {};

-	cpu_thread::suspend_all cpu_lock(nullptr);
-
-	if (!to_filter.empty())
+	cpu_thread::suspend_all(nullptr, [&]
 	{
-		for (const auto& off : to_filter)
+		if (!to_filter.empty())
 		{
-			if (vm::check_addr(off, sizeof(T)))
+			for (const auto& off : to_filter)
 			{
-				if (*vm::get_super_ptr<T>(off) == value_swapped)
-					results.push_back(off);
-			}
-		}
-	}
-	else
-	{
-		// Looks through mapped memory
-		for (u32 page_start = 0x10000; page_start < 0xF0000000; page_start += 4096)
-		{
-			if (vm::check_addr(page_start))
-			{
-				// Assumes the values are aligned
-				for (u32 index = 0; index < 4096; index += sizeof(T))
+				if (vm::check_addr(off, sizeof(T)))
 				{
-					if (*vm::get_super_ptr<T>(page_start + index) == value_swapped)
-						results.push_back(page_start + index);
+					if (*vm::get_super_ptr<T>(off) == value_swapped)
+						results.push_back(off);
 				}
 			}
 		}
-	}
+		else
+		{
+			// Looks through mapped memory
+			for (u32 page_start = 0x10000; page_start < 0xF0000000; page_start += 4096)
+			{
+				if (vm::check_addr(page_start))
+				{
+					// Assumes the values are aligned
+					for (u32 index = 0; index < 4096; index += sizeof(T))
+					{
+						if (*vm::get_super_ptr<T>(page_start + index) == value_swapped)
+							results.push_back(page_start + index);
+					}
+				}
+			}
+		}
+	});

 	return results;
 }
@ -361,19 +362,17 @@ T cheat_engine::get_value(const u32 offset, bool& success)
 		return 0;
 	}

-	cpu_thread::suspend_all cpu_lock(nullptr);
-
-	if (!vm::check_addr(offset, sizeof(T)))
+	return cpu_thread::suspend_all(nullptr, [&]() -> T
 	{
-		success = false;
-		return 0;
-	}
+		if (!vm::check_addr(offset, sizeof(T)))
+		{
+			success = false;
+			return 0;
+		}

-	success = true;
-
-	T ret_value = *vm::get_super_ptr<T>(offset);
-
-	return ret_value;
+		success = true;
+		return *vm::get_super_ptr<T>(offset);
+	});
 }

 template <typename T>
@ -382,55 +381,61 @@ bool cheat_engine::set_value(const u32 offset, const T value)
 	if (Emu.IsStopped())
 		return false;

-	cpu_thread::suspend_all cpu_lock(nullptr);
-
 	if (!vm::check_addr(offset, sizeof(T)))
 	{
 		return false;
 	}

-	*vm::get_super_ptr<T>(offset) = value;
-
-	const bool exec_code_at_start = vm::check_addr(offset, 1, vm::page_executable);
-	const bool exec_code_at_end = [&]()
+	return cpu_thread::suspend_all(nullptr, [&]
 	{
-		if constexpr (sizeof(T) == 1)
+		if (!vm::check_addr(offset, sizeof(T)))
 		{
-			return exec_code_at_start;
-		}
-		else
-		{
-			return vm::check_addr(offset + sizeof(T) - 1, 1, vm::page_executable);
-		}
-	}();
-
-	if (exec_code_at_end || exec_code_at_start)
-	{
-		extern void ppu_register_function_at(u32, u32, ppu_function_t);
-
-		u32 addr = offset, size = sizeof(T);
-
-		if (exec_code_at_end && exec_code_at_start)
-		{
-			size = align<u32>(addr + size, 4) - (addr & -4);
-			addr &= -4;
-		}
-		else if (exec_code_at_end)
-		{
-			size -= align<u32>(size - 4096 + (addr & 4095), 4);
-			addr = align<u32>(addr, 4096);
-		}
-		else if (exec_code_at_start)
-		{
-			size = align<u32>(4096 - (addr & 4095), 4);
-			addr &= -4;
+			return false;
 		}

-		// Reinitialize executable code
-		ppu_register_function_at(addr, size, nullptr);
-	}
+		*vm::get_super_ptr<T>(offset) = value;

-	return true;
+		const bool exec_code_at_start = vm::check_addr(offset, 1, vm::page_executable);
+		const bool exec_code_at_end = [&]()
+		{
+			if constexpr (sizeof(T) == 1)
+			{
+				return exec_code_at_start;
+			}
+			else
+			{
+				return vm::check_addr(offset + sizeof(T) - 1, 1, vm::page_executable);
+			}
+		}();
+
+		if (exec_code_at_end || exec_code_at_start)
+		{
+			extern void ppu_register_function_at(u32, u32, ppu_function_t);
+
+			u32 addr = offset, size = sizeof(T);
+
+			if (exec_code_at_end && exec_code_at_start)
+			{
+				size = align<u32>(addr + size, 4) - (addr & -4);
+				addr &= -4;
+			}
+			else if (exec_code_at_end)
+			{
+				size -= align<u32>(size - 4096 + (addr & 4095), 4);
+				addr = align<u32>(addr, 4096);
+			}
+			else if (exec_code_at_start)
+			{
+				size = align<u32>(4096 - (addr & 4095), 4);
+				addr &= -4;
+			}
+
+			// Reinitialize executable code
+			ppu_register_function_at(addr, size, nullptr);
+		}
+
+		return true;
+	});
 }

 bool cheat_engine::is_addr_safe(const u32 offset)