diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 9cf80dbb0b..419a02b45c 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -190,18 +190,34 @@ asmjit::JitRuntime& asmjit::get_global_runtime() return g_rt; } -asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback) +void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than) { Label fall = c.newLabel(); Label begin = c.newLabel(); c.jmp(begin); c.bind(fall); - c.test(x86::eax, _XABORT_RETRY); - c.jz(fallback); + + if (less_than < 65) + { + c.add(ctr, 1); + c.test(x86::eax, _XABORT_RETRY); + c.jz(fallback); + } + else + { + // Count an attempt without RETRY flag as 65 normal attempts and continue + c.not_(x86::eax); + c.and_(x86::eax, _XABORT_RETRY); + c.shl(x86::eax, 5); + c.add(x86::eax, 1); // eax = RETRY ? 1 : 65 + c.add(ctr, x86::rax); + } + + c.cmp(ctr, less_than); + c.jae(fallback); c.align(kAlignCode, 16); c.bind(begin); c.xbegin(fall); - return begin; } void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code) diff --git a/Utilities/JIT.h b/Utilities/JIT.h index d3028ce47e..ac658c7cbb 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -43,7 +43,7 @@ namespace asmjit asmjit::JitRuntime& get_global_runtime(); // Emit xbegin and adjacent loop, return label at xbegin - Label build_transaction_enter(X86Assembler& c, Label fallback); + void build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than); // Emit xabort void build_transaction_abort(X86Assembler& c, unsigned char code); diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index fe29ef58dc..b07bbe2323 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -3,6 +3,7 @@ #include "Emu/System.h" #include "Emu/IdManager.h" #include "Emu/Cell/SPUThread.h" +#include "Emu/Cell/PPUThread.h" #include "Emu/Cell/RawSPUThread.h" #include "Emu/Cell/lv2/sys_mmapper.h" #include "Emu/Cell/lv2/sys_event.h" @@ -1101,6 +1102,11 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) try { + if (cpu) + { + vm::temporary_unlock(*cpu); + } + handled = rsx::g_access_violation_handler(addr, is_writing); } catch (const std::exception& e) @@ -1109,7 +1115,6 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) if (cpu) { - vm::temporary_unlock(*cpu); cpu->state += cpu_flag::dbg_pause; if (cpu->test_stopped()) @@ -1131,6 +1136,10 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) return true; } + + if (cpu && cpu->test_stopped()) + { + } } auto code = (const u8*)RIP(context); diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index 718ce5cb30..8a829cd282 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -273,6 +273,177 @@ void shared_cond::imp_notify() noexcept balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); } +void shared_cond::wait_all() noexcept +{ + // Try to acquire waiting state without locking but only if there are other locks + const auto [old_, result] = m_cvx32.fetch_op([](u64& cvx32) -> u64 + { + // Check waiting alone + if ((cvx32 & 0xffffffff) == 0) + { + return 0; + } + + // Combine used bits and invert to find least significant bit unused + const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); + + // Set waiting bit (does nothing if all slots are used) + cvx32 |= (1ull << slot) & 0xffffffff; + return 1ull << slot; + }); + + if (!result) + { + return; + } + + if (result > 0xffffffffu) + { + // All slots are used, fallback to spin wait + while (m_cvx32 & 0xffffffff) + { + busy_wait(); + } + + return; + } + + const u64 wait_bit = result; + const u64 lock_bit = wait_bit | (wait_bit << 32); + + balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int + { + if ((cvx32 & wait_bit) == 0) + { + // Remove signal and unlock at once + cvx32 &= ~lock_bit; + return +1; + } + + if constexpr (sizeof...(ret)) + { + cvx32 &= ~lock_bit; + return -1; + } + + return 0; + }); +} + +bool shared_cond::wait_all(shared_cond::shared_lock& lock) noexcept +{ + AUDIT(lock.m_this == this); + + if (lock.m_slot >= 32) + { + // Invalid argument, assume notified + return true; + } + + const u64 wait_bit = c_wait << lock.m_slot; + const u64 lock_bit = c_lock << lock.m_slot; + + // Try to acquire waiting state only if there are other locks + const auto [old_, not_alone] = m_cvx32.fetch_op([&](u64& cvx32) + { + // Check locking alone + if (((cvx32 >> 32) & cvx32) == (lock_bit >> 32)) + { + return false; + } + + // c_lock -> c_wait, c_sig -> unlock + cvx32 &= ~(lock_bit & ~wait_bit); + return true; + }); + + if (!not_alone) + { + return false; + } + else + { + // Set invalid slot to acknowledge unlocking + lock.m_slot = 33; + } + + if ((old_ & wait_bit) == 0) + { + // Already signaled, return without waiting + return true; + } + + balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int + { + if ((cvx32 & wait_bit) == 0) + { + // Remove signal and unlock at once + cvx32 &= ~lock_bit; + return +1; + } + + if constexpr (sizeof...(ret)) + { + cvx32 &= ~lock_bit; + return -1; + } + + return 0; + }); + + return true; +} + +bool shared_cond::notify_all(shared_cond::shared_lock& lock) noexcept +{ + AUDIT(lock.m_this == this); + + if (lock.m_slot >= 32) + { + // Invalid argument + return false; + } + + const u64 slot_mask = c_sig << lock.m_slot; + + auto [old, ok] = m_cvx32.fetch_op([&](u64& cvx32) + { + if (((cvx32 << 32) & cvx32) != slot_mask) + { + return false; + } + + if (const u64 sig_mask = cvx32 & 0xffffffff) + { + cvx32 &= (0xffffffffull << 32) & ~slot_mask; + cvx32 |= (sig_mask << 32) & ~slot_mask; + return true; + } + + return false; + }); + + if (!ok) + { + // Not an exclusive reader + return false; + } + + // Set invalid slot to acknowledge unlocking + lock.m_slot = 34; + + // Determine if some waiters need a syscall notification + const u64 wait_mask = old & (~old >> 32); + + if (UNLIKELY(!wait_mask)) + { + return true; + } + + balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); + return true; +} + bool lf_queue_base::wait(u64 _timeout) { auto _old = m_head.compare_and_swap(0, 1); diff --git a/Utilities/cond.h b/Utilities/cond.h index dc716fab88..d76bf81d8e 100644 --- a/Utilities/cond.h +++ b/Utilities/cond.h @@ -206,7 +206,7 @@ class shared_cond m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32) { // Combine used bits and invert to find least significant bit unused - const u32 slot = utils::cnttz32(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); + const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); // Set lock bits (does nothing if all slots are used) const u64 bit = (1ull << slot) & 0xffffffff; @@ -217,6 +217,13 @@ class shared_cond shared_lock(const shared_lock&) = delete; + shared_lock(shared_lock&& rhs) + : m_this(rhs.m_this) + , m_slot(rhs.m_slot) + { + rhs.m_slot = 32; + } + shared_lock& operator=(const shared_lock&) = delete; ~shared_lock() @@ -261,6 +268,10 @@ public: return imp_wait(lock.m_slot, usec_timeout); } + void wait_all() noexcept; + + bool wait_all(shared_lock& lock) noexcept; + void notify_all() noexcept { if (LIKELY(!m_cvx32)) @@ -268,4 +279,6 @@ public: imp_notify(); } + + bool notify_all(shared_lock& lock) noexcept; }; diff --git a/llvm b/llvm index b860b5e8f4..99b5284463 160000 --- a/llvm +++ b/llvm @@ -1 +1 @@ -Subproject commit b860b5e8f4ee90d6eb567d83ce8ed1a3e71e496f +Subproject commit 99b5284463025849c59067e79a3c08899049757e diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 150a197f78..75130fe545 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -19,10 +19,13 @@ void fmt_class_string::format(std::string& out, u64 arg) { case cpu_flag::stop: return "STOP"; case cpu_flag::exit: return "EXIT"; + case cpu_flag::wait: return "w"; + case cpu_flag::pause: return "p"; case cpu_flag::suspend: return "s"; case cpu_flag::ret: return "ret"; case cpu_flag::signal: return "sig"; case cpu_flag::memory: return "mem"; + case cpu_flag::jit_return: return "JIT"; case cpu_flag::dbg_global_pause: return "G-PAUSE"; case cpu_flag::dbg_global_stop: return "G-EXIT"; case cpu_flag::dbg_pause: return "PAUSE"; @@ -42,10 +45,43 @@ void fmt_class_string>::format(std::string& out, u64 arg) thread_local cpu_thread* g_tls_current_cpu_thread = nullptr; +// For coordination and notification +alignas(64) shared_cond g_cpu_array_lock; + +// For cpu_flag::pause bit setting/removing +alignas(64) shared_mutex g_cpu_pause_lock; + +// For cpu_flag::pause +alignas(64) atomic_t g_cpu_pause_ctr{0}; + +// Semaphore for global thread array (global counter) +alignas(64) atomic_t g_cpu_array_sema{0}; + +// Semaphore subdivision for each array slot (64 x N in total) +atomic_t g_cpu_array_bits[6]{}; + +// All registered threads +atomic_t g_cpu_array[sizeof(g_cpu_array_bits) * 8]{}; + +template +void for_all_cpu(F&& func) noexcept +{ + for (u32 i = 0; i < ::size32(g_cpu_array_bits); i++) + { + for (u64 bits = g_cpu_array_bits[i]; bits; bits &= bits - 1) + { + const u64 index = i * 64 + utils::cnttz64(bits, true); + + if (cpu_thread* cpu = g_cpu_array[index].load()) + { + func(cpu); + } + } + } +} + void cpu_thread::operator()() { - state -= cpu_flag::exit; - g_tls_current_cpu_thread = this; if (g_cfg.core.thread_scheduler_enabled) @@ -58,6 +94,48 @@ void cpu_thread::operator()() thread_ctrl::set_native_priority(-1); } + // Register thread in g_cpu_array + if (!g_cpu_array_sema.try_inc(sizeof(g_cpu_array_bits) * 8)) + { + LOG_FATAL(GENERAL, "Too many threads"); + Emu.Pause(); + return; + } + + u64 array_slot = -1; + + for (u32 i = 0;; i = (i + 1) % ::size32(g_cpu_array_bits)) + { + if (LIKELY(~g_cpu_array_bits[i])) + { + const u64 found = g_cpu_array_bits[i].atomic_op([](u64& bits) -> u64 + { + // Find empty array slot and set its bit + if (LIKELY(~bits)) + { + const u64 bit = utils::cnttz64(~bits, true); + bits |= 1ull << bit; + return bit; + } + + return 64; + }); + + if (LIKELY(found < 64)) + { + // Fixup + array_slot = i * 64 + found; + break; + } + } + } + + // Register and wait if necessary + verify("g_cpu_array[...] -> this" HERE), g_cpu_array[array_slot].exchange(this) == nullptr; + + state += cpu_flag::wait; + g_cpu_array_lock.wait_all(); + // Check thread status while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop))) { @@ -86,6 +164,13 @@ void cpu_thread::operator()() thread_ctrl::wait(); } + + // Unregister and wait if necessary + state += cpu_flag::wait; + verify("g_cpu_array[...] -> null" HERE), g_cpu_array[array_slot].exchange(nullptr) == this; + g_cpu_array_bits[array_slot / 64] &= ~(1ull << (array_slot % 64)); + g_cpu_array_sema--; + g_cpu_array_lock.wait_all(); } void cpu_thread::on_abort() @@ -105,7 +190,7 @@ cpu_thread::cpu_thread(u32 id) g_threads_created++; } -bool cpu_thread::check_state() +bool cpu_thread::check_state() noexcept { #ifdef WITH_GDB_DEBUGGER if (state & cpu_flag::dbg_pause) @@ -117,6 +202,11 @@ bool cpu_thread::check_state() bool cpu_sleep_called = false; bool cpu_flag_memory = false; + if (!(state & cpu_flag::wait)) + { + state += cpu_flag::wait; + } + while (true) { if (state & cpu_flag::memory) @@ -131,8 +221,9 @@ bool cpu_thread::check_state() state -= cpu_flag::memory; } - if (state & cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop) + if (state & (cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop)) { + state += cpu_flag::wait; return true; } @@ -141,7 +232,24 @@ bool cpu_thread::check_state() cpu_sleep_called = false; } - if (!is_paused()) + const auto [state0, escape] = state.fetch_op([&](bs_t& flags) + { + // Check pause flags which hold thread inside check_state + if (flags & (cpu_flag::pause + cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause)) + { + return false; + } + + // Atomically clean wait flag and escape + if (!(flags & (cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop + cpu_flag::ret + cpu_flag::stop))) + { + flags -= cpu_flag::wait; + } + + return true; + }); + + if (escape) { if (cpu_flag_memory) { @@ -150,14 +258,43 @@ bool cpu_thread::check_state() break; } - else if (!cpu_sleep_called && state & cpu_flag::suspend) + else if (!cpu_sleep_called && state0 & cpu_flag::suspend) { cpu_sleep(); cpu_sleep_called = true; continue; } - thread_ctrl::wait(); + if (state & cpu_flag::wait) + { + // Spin wait once for a bit before resorting to thread_ctrl::wait + for (u32 i = 0; i < 10; i++) + { + if (state0 & (cpu_flag::pause + cpu_flag::suspend)) + { + busy_wait(500); + } + else + { + break; + } + } + + if (!(state0 & (cpu_flag::pause + cpu_flag::suspend))) + { + continue; + } + } + + if (state0 & (cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause)) + { + thread_ctrl::wait(); + } + else + { + // If only cpu_flag::pause was set, notification won't arrive + g_cpu_array_lock.wait_all(); + } } const auto state_ = state.load(); @@ -196,3 +333,90 @@ std::string cpu_thread::dump() const { return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load()); } + +cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept + : m_lock(g_cpu_array_lock.try_shared_lock()) + , m_this(_this) +{ + // TODO + if (!m_lock) + { + LOG_FATAL(GENERAL, "g_cpu_array_lock: too many concurrent accesses"); + Emu.Pause(); + return; + } + + if (m_this) + { + m_this->state += cpu_flag::wait; + } + + g_cpu_pause_ctr++; + + reader_lock lock(g_cpu_pause_lock); + + for_all_cpu([](cpu_thread* cpu) + { + cpu->state += cpu_flag::pause; + }); + + busy_wait(500); + + while (true) + { + bool ok = true; + + for_all_cpu([&](cpu_thread* cpu) + { + if (!(cpu->state & cpu_flag::wait)) + { + ok = false; + } + }); + + if (LIKELY(ok)) + { + break; + } + + busy_wait(500); + } +} + +cpu_thread::suspend_all::~suspend_all() +{ + // Make sure the latest thread does the cleanup and notifies others + u64 pause_ctr = 0; + + while ((pause_ctr = g_cpu_pause_ctr), !g_cpu_array_lock.wait_all(m_lock)) + { + if (pause_ctr) + { + std::lock_guard lock(g_cpu_pause_lock); + + // Detect possible unfortunate reordering of flag clearing after suspend_all's reader lock + if (g_cpu_pause_ctr != pause_ctr) + { + continue; + } + + for_all_cpu([&](cpu_thread* cpu) + { + if (g_cpu_pause_ctr == pause_ctr) + { + cpu->state -= cpu_flag::pause; + } + }); + } + + if (g_cpu_array_lock.notify_all(m_lock)) + { + break; + } + } + + if (m_this) + { + m_this->check_state(); + } +} diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 7eb3fdf633..a1f3af46e9 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -2,12 +2,15 @@ #include "../Utilities/Thread.h" #include "../Utilities/bit_set.h" +#include "../Utilities/cond.h" // Thread state flags enum class cpu_flag : u32 { stop, // Thread not running (HLE, initial state) exit, // Irreversible exit + wait, // Indicates waiting state, set by the thread itself + pause, // Thread suspended by suspend_all technique suspend, // Thread suspended ret, // Callback return requested signal, // Thread received a signal (HLE) @@ -39,15 +42,15 @@ public: const u32 id; // Public thread state - atomic_bs_t state{+cpu_flag::stop}; + atomic_bs_t state{cpu_flag::stop + cpu_flag::wait}; // Process thread state, return true if the checker must return - bool check_state(); + bool check_state() noexcept; // Process thread state (pause) [[nodiscard]] bool test_stopped() { - if (UNLIKELY(state)) + if (state) { if (check_state()) { @@ -99,6 +102,20 @@ public: // Callback for vm::temporary_unlock virtual void cpu_unmem() {} + + // Thread locker + class suspend_all + { + decltype(std::declval().try_shared_lock()) m_lock; + + cpu_thread* m_this; + + public: + suspend_all(cpu_thread* _this) noexcept; + suspend_all(const suspend_all&) = delete; + suspend_all& operator=(const suspend_all&) = delete; + ~suspend_all(); + }; }; inline cpu_thread* get_current_cpu_thread() noexcept diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 48c455f822..cd8a9b7d38 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1064,11 +1064,12 @@ const auto ppu_stwcx_tx = build_function_asmtest_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } } void spu_recompiler::STOP(spu_opcode_t op) @@ -1407,7 +1413,7 @@ void spu_recompiler::MFSPR(spu_opcode_t op) c->movdqa(SPU_OFF_128(gpr, op.rt), vr); } -static s64 spu_rdch(spu_thread* _spu, u32 ch) +static u32 spu_rdch(spu_thread* _spu, u32 ch) { const s64 result = _spu->get_ch_value(ch); @@ -1416,7 +1422,13 @@ static s64 spu_rdch(spu_thread* _spu, u32 ch) spu_runtime::g_escape(_spu); } - return result; + if (_spu->test_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } + + return static_cast(result & 0xffffffff); } void spu_recompiler::RDCH(spu_opcode_t op) @@ -2319,14 +2331,26 @@ static void spu_wrch(spu_thread* _spu, u32 ch, u32 value) { spu_runtime::g_escape(_spu); } + + if (_spu->test_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } } -static void spu_wrch_mfc(spu_thread* _spu, spu_function_t _ret) +static void spu_wrch_mfc(spu_thread* _spu) { if (!_spu->process_mfc_cmd()) { spu_runtime::g_escape(_spu); } + + if (_spu->test_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } } void spu_recompiler::WRCH(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index cd3cfa6301..6c90bb449f 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -167,6 +167,13 @@ bool spu_interpreter::RDCH(spu_thread& spu, spu_opcode_t op) } spu.gpr[op.rt] = v128::from32r(static_cast(result)); + + if (spu.state) + { + spu.pc += 4; + return false; + } + return true; } @@ -414,7 +421,18 @@ bool spu_interpreter::MTSPR(spu_thread& spu, spu_opcode_t op) bool spu_interpreter::WRCH(spu_thread& spu, spu_opcode_t op) { - return spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3]); + if (!spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3])) + { + return false; + } + + if (spu.state) + { + spu.pc += 4; + return false; + } + + return true; } bool spu_interpreter::BIZ(spu_thread& spu, spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index ff6dc304e4..41b8267d6c 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -5125,34 +5125,30 @@ public: call("spu_unknown", &exec_unk, m_thread, m_ir->getInt32(op_unk.opcode)); } - static bool exec_stop(spu_thread* _spu, u32 code) + static void exec_stop(spu_thread* _spu, u32 code) { - return _spu->stop_and_signal(code); + if (!_spu->stop_and_signal(code)) + { + spu_runtime::g_escape(_spu); + } + + if (_spu->test_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } } void STOP(spu_opcode_t op) // { if (m_interp_magn) { - const auto succ = call("spu_syscall", &exec_stop, m_thread, m_ir->CreateAnd(m_interp_op, m_ir->getInt32(0x3fff))); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(succ, next, stop); - m_ir->SetInsertPoint(stop); - m_ir->CreateRetVoid(); - m_ir->SetInsertPoint(next); + call("spu_syscall", &exec_stop, m_thread, m_ir->CreateAnd(m_interp_op, m_ir->getInt32(0x3fff))); return; } update_pc(); - const auto succ = call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(op.opcode & 0x3fff)); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(succ, next, stop); - m_ir->SetInsertPoint(stop); - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); - m_ir->CreateBr(next); - m_ir->SetInsertPoint(next); + call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(op.opcode & 0x3fff)); if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { @@ -5167,28 +5163,35 @@ public: { if (m_interp_magn) { - const auto succ = call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(0x3fff)); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(succ, next, stop); - m_ir->SetInsertPoint(stop); - m_ir->CreateRetVoid(); - m_ir->SetInsertPoint(next); + call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(0x3fff)); return; } STOP(spu_opcode_t{0x3fff}); } - static s64 exec_rdch(spu_thread* _spu, u32 ch) + static u32 exec_rdch(spu_thread* _spu, u32 ch) { - return _spu->get_ch_value(ch); + const s64 result = _spu->get_ch_value(ch); + + if (result < 0) + { + spu_runtime::g_escape(_spu); + } + + if (_spu->test_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } + + return static_cast(result & 0xffffffff); } - static s64 exec_read_in_mbox(spu_thread* _spu) + static u32 exec_read_in_mbox(spu_thread* _spu) { // TODO - return _spu->get_ch_value(SPU_RdInMbox); + return exec_rdch(_spu, SPU_RdInMbox); } static u32 exec_read_dec(spu_thread* _spu) @@ -5203,7 +5206,7 @@ public: return res; } - static s64 exec_read_events(spu_thread* _spu) + static u32 exec_read_events(spu_thread* _spu) { if (const u32 events = _spu->get_events()) { @@ -5211,7 +5214,7 @@ public: } // TODO - return _spu->get_ch_value(SPU_RdEventStat); + return exec_rdch(_spu, SPU_RdEventStat); } llvm::Value* get_rdch(spu_opcode_t op, u32 off, bool atomic) @@ -5234,20 +5237,17 @@ public: const auto _cur = m_ir->GetInsertBlock(); const auto done = llvm::BasicBlock::Create(m_context, "", m_function); const auto wait = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpSLT(val0, m_ir->getInt64(0)), done, wait); + const auto cond = m_ir->CreateICmpSLT(val0, m_ir->getInt64(0)); + val0 = m_ir->CreateTrunc(val0, get_type()); + m_ir->CreateCondBr(cond, done, wait); m_ir->SetInsertPoint(wait); const auto val1 = call("spu_read_channel", &exec_rdch, m_thread, m_ir->getInt32(op.ra)); - m_ir->CreateCondBr(m_ir->CreateICmpSLT(val1, m_ir->getInt64(0)), stop, done); - m_ir->SetInsertPoint(stop); - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); m_ir->CreateBr(done); m_ir->SetInsertPoint(done); - const auto rval = m_ir->CreatePHI(get_type(), 2); + const auto rval = m_ir->CreatePHI(get_type(), 2); rval->addIncoming(val0, _cur); rval->addIncoming(val1, wait); - rval->addIncoming(m_ir->getInt64(0), stop); - return m_ir->CreateTrunc(rval, get_type()); + return rval; } void RDCH(spu_opcode_t op) // @@ -5257,13 +5257,6 @@ public: if (m_interp_magn) { res.value = call("spu_read_channel", &exec_rdch, m_thread, get_imm(op.ra).value); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next); - m_ir->SetInsertPoint(stop); - m_ir->CreateRetVoid(); - m_ir->SetInsertPoint(next); - res.value = m_ir->CreateTrunc(res.value, get_type()); set_vr(op.rt, insert(splat(0), 3, res)); return; } @@ -5279,14 +5272,6 @@ public: { update_pc(); res.value = call("spu_read_in_mbox", &exec_read_in_mbox, m_thread); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next); - m_ir->SetInsertPoint(stop); - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); - m_ir->CreateBr(next); - m_ir->SetInsertPoint(next); - res.value = m_ir->CreateTrunc(res.value, get_type()); break; } case MFC_RdTagStat: @@ -5333,14 +5318,6 @@ public: { update_pc(); res.value = call("spu_read_events", &exec_read_events, m_thread); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next); - m_ir->SetInsertPoint(stop); - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); - m_ir->CreateBr(next); - m_ir->SetInsertPoint(next); - res.value = m_ir->CreateTrunc(res.value, get_type()); break; } case SPU_RdMachStat: @@ -5353,14 +5330,6 @@ public: { update_pc(); res.value = call("spu_read_channel", &exec_rdch, m_thread, m_ir->getInt32(op.ra)); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next); - m_ir->SetInsertPoint(stop); - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); - m_ir->CreateBr(next); - m_ir->SetInsertPoint(next); - res.value = m_ir->CreateTrunc(res.value, get_type()); break; } } @@ -5471,14 +5440,18 @@ public: set_vr(op.rt, insert(splat(0), 3, res)); } - static bool exec_wrch(spu_thread* _spu, u32 ch, u32 value) + static void exec_wrch(spu_thread* _spu, u32 ch, u32 value) { - return _spu->set_ch_value(ch, value); - } + if (!_spu->set_ch_value(ch, value)) + { + spu_runtime::g_escape(_spu); + } - static void exec_mfc(spu_thread* _spu) - { - return _spu->do_mfc(); + if (_spu->test_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } } static void exec_list_unstall(spu_thread* _spu, u32 tag) @@ -5491,12 +5464,21 @@ public: } } - return exec_mfc(_spu); + _spu->do_mfc(); } - static bool exec_mfc_cmd(spu_thread* _spu) + static void exec_mfc_cmd(spu_thread* _spu) { - return _spu->process_mfc_cmd(); + if (!_spu->process_mfc_cmd()) + { + spu_runtime::g_escape(_spu); + } + + if (_spu->test_stopped()) + { + _spu->pc += 4; + spu_runtime::g_escape(_spu); + } } void WRCH(spu_opcode_t op) // @@ -5505,13 +5487,7 @@ public: if (m_interp_magn) { - const auto succ = call("spu_write_channel", &exec_wrch, m_thread, get_imm(op.ra).value, val.value); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(succ, next, stop); - m_ir->SetInsertPoint(stop); - m_ir->CreateRetVoid(); - m_ir->SetInsertPoint(next); + call("spu_write_channel", &exec_wrch, m_thread, get_imm(op.ra).value, val.value); return; } @@ -5922,14 +5898,7 @@ public: } update_pc(); - const auto succ = call("spu_write_channel", &exec_wrch, m_thread, m_ir->getInt32(op.ra), val.value); - const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(succ, next, stop); - m_ir->SetInsertPoint(stop); - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); - m_ir->CreateBr(next); - m_ir->SetInsertPoint(next); + call("spu_write_channel", &exec_wrch, m_thread, m_ir->getInt32(op.ra), val.value); } void LNOP(spu_opcode_t op) // diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 2fbc731501..df632ed807 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -29,36 +29,39 @@ static const bool s_tsx_avx = utils::has_avx(); // For special case static const bool s_tsx_haswell = utils::has_rtm() && !utils::has_mpx(); -#ifdef _MSC_VER -bool operator ==(const u128& lhs, const u128& rhs) +static FORCE_INLINE bool cmp_rdata(const decltype(spu_thread::rdata)& lhs, const decltype(spu_thread::rdata)& rhs) { - return lhs.lo == rhs.lo && lhs.hi == rhs.hi; + const v128 a = (lhs[0] ^ rhs[0]) | (lhs[1] ^ rhs[1]); + const v128 b = (lhs[2] ^ rhs[2]) | (lhs[3] ^ rhs[3]); + const v128 c = (lhs[4] ^ rhs[4]) | (lhs[5] ^ rhs[5]); + const v128 d = (lhs[6] ^ rhs[6]) | (lhs[7] ^ rhs[7]); + const v128 r = (a | b) | (c | d); + return !(r._u64[0] | r._u64[1]); } -#endif -static FORCE_INLINE void mov_rdata(u128* const dst, const u128* const src) +static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const decltype(spu_thread::rdata)& src) { { - const u128 data0 = src[0]; - const u128 data1 = src[1]; - const u128 data2 = src[2]; + const v128 data0 = src[0]; + const v128 data1 = src[1]; + const v128 data2 = src[2]; dst[0] = data0; dst[1] = data1; dst[2] = data2; } { - const u128 data0 = src[3]; - const u128 data1 = src[4]; - const u128 data2 = src[5]; + const v128 data0 = src[3]; + const v128 data1 = src[4]; + const v128 data2 = src[5]; dst[3] = data0; dst[4] = data1; dst[5] = data2; } { - const u128 data0 = src[6]; - const u128 data1 = src[7]; + const v128 data0 = src[6]; + const v128 data1 = src[7]; dst[6] = data0; dst[7] = data1; } @@ -182,13 +185,15 @@ namespace spu } } -const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; Label fall = c.newLabel(); Label fail = c.newLabel(); Label _ret = c.newLabel(); + Label skip = c.newLabel(); + Label next = c.newLabel(); if (utils::has_avx() && !s_tsx_avx) { @@ -197,8 +202,6 @@ const auto spu_putllc_tx = build_function_asm(cpu_flag::wait)); + + // Touch memory if transaction failed without RETRY flag on the first attempt + c.cmp(x86::r12, 1); + c.jne(next); + c.xor_(x86::rbp, 0xf80); + c.lock().add(x86::dword_ptr(x86::rbp), 0); + c.xor_(x86::rbp, 0xf80); Label fall2 = c.newLabel(); - Label next2 = c.newLabel(); + Label fail2 = c.newLabel(); // Lightened transaction: only compare and swap data - Label retry = build_transaction_enter(c, fall2); + c.bind(next); + build_transaction_enter(c, fall2, x86::r12, 666); if (s_tsx_avx) { @@ -379,7 +393,7 @@ const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_getll_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -558,10 +508,9 @@ const auto spu_getll_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_getll_inexact = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -691,7 +620,6 @@ const auto spu_getll_fast = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_putlluc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; Label fall = c.newLabel(); Label _ret = c.newLabel(); + Label skip = c.newLabel(); + Label next = c.newLabel(); if (utils::has_avx() && !s_tsx_avx) { @@ -884,7 +809,9 @@ const auto spu_putlluc_tx = build_function_asm(cpu_flag::wait)); + + // Touch memory if transaction failed without RETRY flag on the first attempt + c.cmp(x86::r12, 1); + c.jne(next); + c.xor_(x86::rbp, 0xf80); + c.lock().add(x86::dword_ptr(x86::rbp), 0); + c.xor_(x86::rbp, 0xf80); Label fall2 = c.newLabel(); // Lightened transaction - Label retry = build_transaction_enter(c, fall2); + c.bind(next); + build_transaction_enter(c, fall2, x86::r12, 666); if (s_tsx_avx) { @@ -944,57 +881,12 @@ const auto spu_putlluc_tx = build_function_asm(dst) = *reinterpret_cast(src); + *reinterpret_cast(dst) = *reinterpret_cast(src); dst += 16; src += 16; @@ -1501,7 +1393,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) while (size >= 128) { - mov_rdata(reinterpret_cast(dst), reinterpret_cast(src)); + mov_rdata(*reinterpret_cast(dst), *reinterpret_cast(src)); dst += 128; src += 128; @@ -1510,7 +1402,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) while (size) { - *reinterpret_cast(dst) = *reinterpret_cast(src); + *reinterpret_cast(dst) = *reinterpret_cast(src); dst += 16; src += 16; @@ -1556,7 +1448,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) { while (size >= 128) { - mov_rdata(reinterpret_cast(dst), reinterpret_cast(src)); + mov_rdata(*reinterpret_cast(dst), *reinterpret_cast(src)); dst += 128; src += 128; @@ -1565,7 +1457,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) while (size) { - *reinterpret_cast(dst) = *reinterpret_cast(src); + *reinterpret_cast(dst) = *reinterpret_cast(src); dst += 16; src += 16; @@ -1690,7 +1582,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) if (raddr && addr == raddr) { // Last check for event before we clear the reservation - if ((vm::reservation_acquire(addr, 128) & -128) != rtime || rdata != vm::_ref(addr)) + if ((vm::reservation_acquire(addr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref(addr))) { ch_event_stat |= SPU_EVENT_LR; } @@ -1703,11 +1595,31 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) // Store unconditionally if (LIKELY(g_use_rtm)) { - const u64 count = spu_putlluc_tx(addr, to_write.data()); + const u32 result = spu_putlluc_tx(addr, to_write.data(), this); - if (count >= 10) + if (result == 2) { - LOG_ERROR(SPU, "%s took too long: %u", args.cmd, count); + cpu_thread::suspend_all cpu_lock(this); + + // Try to obtain bit 7 (+64) + if (!atomic_storage::bts(vm::reservation_acquire(addr, 128).raw(), 6)) + { + auto& data = vm::_ref(addr); + mov_rdata(data, to_write); + + // Keep checking written data against a rogue transaction sneak in + while (std::atomic_thread_fence(std::memory_order_seq_cst), !cmp_rdata(data, to_write)) + { + mov_rdata(data, to_write); + } + + vm::reservation_acquire(addr, 128) += 63; + } + else + { + // Give up if another PUTLLUC command took precedence + vm::reservation_acquire(addr, 128) -= 1; + } } } else @@ -1722,12 +1634,12 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) // Full lock (heavyweight) // TODO: vm::check_addr vm::writer_lock lock(addr); - mov_rdata(data.data(), to_write.data()); + mov_rdata(data, to_write); res.release(res.load() + 127); } else { - mov_rdata(data.data(), to_write.data()); + mov_rdata(data, to_write); res.release(res.load() + 127); } } @@ -1847,6 +1759,8 @@ bool spu_thread::process_mfc_cmd() // Stall infinitely if MFC queue is full while (UNLIKELY(mfc_size >= 16)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -1873,8 +1787,10 @@ bool spu_thread::process_mfc_cmd() { rtime = vm::reservation_acquire(addr, 128) & -128; - while (rdata == data && (vm::reservation_acquire(addr, 128)) == rtime) + while (cmp_rdata(rdata, data) && (vm::reservation_acquire(addr, 128)) == rtime) { + state += cpu_flag::wait; + if (is_stopped()) { break; @@ -1882,15 +1798,40 @@ bool spu_thread::process_mfc_cmd() thread_ctrl::wait_for(100); } + + if (test_stopped()) + { + return false; + } } - if (LIKELY(g_use_rtm)) + if (LIKELY(g_use_rtm && !g_cfg.core.spu_accurate_getllar && raddr != addr)) { - const u64 count = g_cfg.core.spu_accurate_getllar ? spu_getll_tx(addr, dst.data(), &ntime) : spu_getll_fast(addr, dst.data(), &ntime); + // TODO: maybe always start from a transaction + ntime = spu_getll_inexact(addr, dst.data()); + } + else if (g_use_rtm) + { + ntime = spu_getll_tx(addr, dst.data()); - if (count >= 10) + if (ntime == 1) { - LOG_ERROR(SPU, "%s took too long: %u", ch_mfc_cmd.cmd, count); + if (!g_cfg.core.spu_accurate_getllar) + { + ntime = spu_getll_inexact(addr, dst.data()); + } + else + { + cpu_thread::suspend_all cpu_lock(this); + + while (vm::reservation_acquire(addr, 128) & 127) + { + busy_wait(100); + } + + ntime = vm::reservation_acquire(addr, 128); + mov_rdata(dst, data); + } } } else @@ -1907,37 +1848,37 @@ bool spu_thread::process_mfc_cmd() vm::writer_lock lock(addr); ntime = old_time; - mov_rdata(dst.data(), data.data()); + mov_rdata(dst, data); res.release(old_time); } else { ntime = old_time; - mov_rdata(dst.data(), data.data()); + mov_rdata(dst, data); res.release(old_time); } } - if (const u32 _addr = raddr) + if (raddr && raddr != addr) { // Last check for event before we replace the reservation with a new one - if ((vm::reservation_acquire(_addr, 128) & -128) != rtime || rdata != vm::_ref(_addr)) + if ((vm::reservation_acquire(raddr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref(raddr))) + { + ch_event_stat |= SPU_EVENT_LR; + } + } + else if (raddr == addr) + { + // Lost previous reservation on polling + if (ntime != rtime || !cmp_rdata(rdata, dst)) { ch_event_stat |= SPU_EVENT_LR; - - if (_addr == addr) - { - // Lost current reservation - raddr = 0; - ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); - return true; - } } } raddr = addr; rtime = ntime; - mov_rdata(rdata.data(), dst.data()); + mov_rdata(rdata, dst); ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); return true; @@ -1949,29 +1890,39 @@ bool spu_thread::process_mfc_cmd() const u32 addr = ch_mfc_cmd.eal & -128u; u32 result = 0; - if (raddr == addr && rtime == (vm::reservation_acquire(raddr, 128) & -128)) + if (raddr == addr) { const auto& to_write = _ref(ch_mfc_cmd.lsa & 0x3ff80); if (LIKELY(g_use_rtm)) { - u64 count = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()); + result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data()); - if ((count >> 63) == 0) + if (result == 2) { - result = 1; - } - else - { - count = ~count; - } + result = 0; - if (count >= 10) - { - LOG_ERROR(SPU, "%s took too long: %u (r=%u)", ch_mfc_cmd.cmd, count, result); + cpu_thread::suspend_all cpu_lock(this); + + // Give up if other PUTLLC/PUTLLUC commands are in progress + if (!vm::reservation_acquire(addr, 128).try_dec(rtime + 1)) + { + auto& data = vm::_ref(addr); + + if ((vm::reservation_acquire(addr, 128) & -128) == rtime && cmp_rdata(rdata, data)) + { + mov_rdata(data, to_write); + vm::reservation_acquire(addr, 128) += 127; + result = 1; + } + else + { + vm::reservation_acquire(addr, 128) -= 1; + } + } } } - else if (auto& data = vm::_ref(addr); rdata == data) + else if (auto& data = vm::_ref(addr); rtime == (vm::reservation_acquire(raddr, 128) & -128) && cmp_rdata(rdata, data)) { auto& res = vm::reservation_lock(raddr, 128); const u64 old_time = res.load() & -128; @@ -1984,9 +1935,9 @@ bool spu_thread::process_mfc_cmd() // TODO: vm::check_addr vm::writer_lock lock(addr); - if (rdata == data) + if (cmp_rdata(rdata, data)) { - mov_rdata(data.data(), to_write.data()); + mov_rdata(data, to_write); res.release(old_time + 128); result = 1; } @@ -2012,7 +1963,7 @@ bool spu_thread::process_mfc_cmd() if (raddr) { // Last check for event before we clear the reservation - if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & -128) || rdata != vm::_ref(raddr)) + if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & -128) || !cmp_rdata(rdata, vm::_ref(raddr))) { ch_event_stat |= SPU_EVENT_LR; } @@ -2164,7 +2115,7 @@ u32 spu_thread::get_events(bool waiting) } // Check reservation status and set SPU_EVENT_LR if lost - if (raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & -128) != rtime || rdata != vm::_ref(raddr))) + if (raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & -128) != rtime || !cmp_rdata(rdata, vm::_ref(raddr)))) { ch_event_stat |= SPU_EVENT_LR; raddr = 0; @@ -2256,6 +2207,11 @@ s64 spu_thread::get_ch_value(u32 ch) auto read_channel = [&](spu_channel& channel) -> s64 { + if (channel.get_count() == 0) + { + state += cpu_flag::wait; + } + for (int i = 0; i < 10 && channel.get_count() == 0; i++) { busy_wait(); @@ -2273,6 +2229,7 @@ s64 spu_thread::get_ch_value(u32 ch) thread_ctrl::wait(); } + check_state(); return out; }; @@ -2284,6 +2241,11 @@ s64 spu_thread::get_ch_value(u32 ch) } case SPU_RdInMbox: { + if (ch_in_mbox.get_count() == 0) + { + state += cpu_flag::wait; + } + while (true) { for (int i = 0; i < 10 && ch_in_mbox.get_count() == 0; i++) @@ -2300,6 +2262,7 @@ s64 spu_thread::get_ch_value(u32 ch) int_ctrl[2].set(SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT); } + check_state(); return out; } @@ -2410,6 +2373,8 @@ s64 spu_thread::get_ch_value(u32 ch) while (res = get_events(), !res) { + state += cpu_flag::wait; + if (is_stopped()) { return -1; @@ -2418,11 +2383,14 @@ s64 spu_thread::get_ch_value(u32 ch) pseudo_lock.wait(100); } + check_state(); return res; } while (res = get_events(true), !res) { + state += cpu_flag::wait; + if (is_stopped()) { return -1; @@ -2431,6 +2399,7 @@ s64 spu_thread::get_ch_value(u32 ch) thread_ctrl::wait_for(100); } + check_state(); return res; } @@ -2463,6 +2432,8 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) { while (!ch_out_intr_mbox.try_push(value)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -2472,9 +2443,12 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) } int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT); + check_state(); return true; } + state += cpu_flag::wait; + const u32 code = value >> 24; { if (code < 64) @@ -2609,6 +2583,8 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) { while (!ch_out_mbox.try_push(value)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -2617,6 +2593,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) thread_ctrl::wait(); } + check_state(); return true; } @@ -2770,6 +2747,7 @@ bool spu_thread::stop_and_signal(u32 code) if (offset >= RAW_SPU_BASE_ADDR) { + state += cpu_flag::wait; status.atomic_op([code](u32& status) { status = (status & 0xffff) | (code << 16); @@ -2779,6 +2757,7 @@ bool spu_thread::stop_and_signal(u32 code) int_ctrl[2].set(SPU_INT2_STAT_SPU_STOP_AND_SIGNAL_INT); state += cpu_flag::stop; + check_state(); return true; } @@ -2808,6 +2787,8 @@ bool spu_thread::stop_and_signal(u32 code) // HACK: wait for executable code while (!_ref(pc)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -2816,12 +2797,15 @@ bool spu_thread::stop_and_signal(u32 code) thread_ctrl::wait_for(1000); } + check_state(); return false; } case 0x001: { + state += cpu_flag::wait; thread_ctrl::wait_for(1000); // hack + check_state(); return true; } @@ -2857,6 +2841,8 @@ bool spu_thread::stop_and_signal(u32 code) std::shared_ptr queue; + state += cpu_flag::wait; + while (true) { queue.reset(); @@ -2897,6 +2883,7 @@ bool spu_thread::stop_and_signal(u32 code) if (!queue) { + check_state(); return ch_in_mbox.set_values(1, CELL_EINVAL), true; // TODO: check error value } @@ -2927,6 +2914,7 @@ bool spu_thread::stop_and_signal(u32 code) const auto data3 = static_cast(std::get<3>(event)); ch_in_mbox.set_values(4, CELL_OK, data1, data2, data3); queue->events.pop_front(); + check_state(); return true; } } @@ -2972,6 +2960,7 @@ bool spu_thread::stop_and_signal(u32 code) } } + check_state(); return true; } @@ -3045,6 +3034,8 @@ bool spu_thread::stop_and_signal(u32 code) { /* ===== sys_spu_thread_group_exit ===== */ + state += cpu_flag::wait; + u32 value = 0; if (!ch_out_mbox.try_pop(value)) @@ -3069,6 +3060,7 @@ bool spu_thread::stop_and_signal(u32 code) group->join_state = SYS_SPU_THREAD_GROUP_JOIN_GROUP_EXIT; state += cpu_flag::stop; + check_state(); return true; } @@ -3076,6 +3068,8 @@ bool spu_thread::stop_and_signal(u32 code) { /* ===== sys_spu_thread_exit ===== */ + state += cpu_flag::wait; + if (!ch_out_mbox.get_count()) { fmt::throw_exception("sys_spu_thread_exit(): Out_MBox is empty" HERE); @@ -3084,6 +3078,7 @@ bool spu_thread::stop_and_signal(u32 code) LOG_TRACE(SPU, "sys_spu_thread_exit(status=0x%x)", ch_out_mbox.get_value()); status |= SPU_STATUS_STOPPED_BY_STOP; state += cpu_flag::stop; + check_state(); return true; } } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 177ed87d9b..b79075a2f5 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -529,7 +529,7 @@ public: // Reservation Data u64 rtime = 0; - std::array rdata{}; + std::array rdata{}; u32 raddr = 0; u32 srr0; diff --git a/rpcs3/Emu/Cell/lv2/sys_net.cpp b/rpcs3/Emu/Cell/lv2/sys_net.cpp index 8d08b6aa10..ea3d87d302 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net.cpp @@ -357,6 +357,11 @@ s32 sys_net_bnet_accept(ppu_thread& ppu, s32 s, vm::ptr addr, } } + if (ppu.is_stopped()) + { + return 0; + } + auto newsock = std::make_shared(native_socket); result = idm::import_existing(newsock); @@ -975,6 +980,11 @@ s32 sys_net_bnet_recvfrom(ppu_thread& ppu, s32 s, vm::ptr buf, u32 len, s3 } } + if (ppu.is_stopped()) + { + return 0; + } + // TODO if (addr) { @@ -1796,6 +1806,11 @@ s32 sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr readf } } + if (ppu.is_stopped()) + { + return 0; + } + if (readfds) *readfds = rread; if (writefds) diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 0e4fed623e..1a6dd0a623 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -172,6 +172,8 @@ namespace vm void temporary_unlock(cpu_thread& cpu) noexcept { + cpu.state += cpu_flag::wait; + if (g_tls_locked && g_tls_locked->compare_and_swap_test(&cpu, nullptr)) { cpu.cpu_unmem(); diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 71d0d66f77..909120786a 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -936,11 +936,18 @@ void Emulator::Load(const std::string& title_id, bool add_only, bool force_globa // Set RTM usage g_use_rtm = utils::has_rtm() && ((utils::has_mpx() && g_cfg.core.enable_TSX == tsx_usage::enabled) || g_cfg.core.enable_TSX == tsx_usage::forced); + if (g_use_rtm && !utils::has_mpx()) { LOG_WARNING(GENERAL, "TSX forced by User"); } + if (g_use_rtm && g_cfg.core.preferred_spu_threads) + { + g_cfg.core.preferred_spu_threads.set(0); + LOG_ERROR(GENERAL, "Preferred SPU Threads forcefully disabled - not compatible with TSX in this version."); + } + // Load patches from different locations fxm::check_unlocked()->append(fs::get_config_dir() + "data/" + m_title_id + "/patch.yml"); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index a6c939b1c8..cdf38b1027 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -385,7 +385,6 @@ struct cfg_root : cfg::node cfg::_enum spu_block_size{this, "SPU Block Size", spu_block_size_type::safe}; cfg::_bool spu_accurate_getllar{this, "Accurate GETLLAR", false}; cfg::_bool spu_accurate_putlluc{this, "Accurate PUTLLUC", false}; - cfg::_bool spu_accurate_putllc{this, "Accurate PUTLLC", false}; cfg::_bool spu_verification{this, "SPU Verification", true}; // Should be enabled cfg::_bool spu_cache{this, "SPU Cache", true}; cfg::_enum enable_TSX{this, "Enable TSX", tsx_usage::enabled}; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully