From 0d88f037ffa245a4bea57d2dc1f8c5fc513b54c4 Mon Sep 17 00:00:00 2001 From: Eladash Date: Tue, 11 Jun 2019 22:45:11 +0300 Subject: [PATCH] Add new accuracy control for PUTLLUC accuracy setting (non-TSX) With the option enabled GET commands are blocked until the current PUTLLC/PUTLLUC executer on that address finishes Additional improvements: - Minor race fix of sys_ppu_thread_exit (wait until the writer finishes) - Max number of ppu threads bumped to 8 --- rpcs3/Emu/Cell/SPUThread.cpp | 22 +++++++++++----------- rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp | 3 +++ rpcs3/Emu/Memory/vm.cpp | 21 +++++++++------------ rpcs3/Emu/System.h | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 0939e3e0c8..78edcd2e5c 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1315,7 +1315,12 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) u8* dst = vm::_ptr(eal); u8* src = vm::_ptr(offset + lsa); - if (UNLIKELY(!is_get && !g_use_rtm)) + if (is_get) + { + std::swap(dst, src); + } + + if (UNLIKELY(!g_use_rtm && (!is_get || g_cfg.core.spu_accurate_putlluc))) { switch (u32 size = args.size) { @@ -1323,28 +1328,28 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) { auto& res = vm::reservation_lock(eal, 1); *reinterpret_cast(dst) = *reinterpret_cast(src); - res.release(res.load() + 127); + res.release(res.load() - 1); break; } case 2: { auto& res = vm::reservation_lock(eal, 2); *reinterpret_cast(dst) = *reinterpret_cast(src); - res.release(res.load() + 127); + res.release(res.load() - 1); break; } case 4: { auto& res = vm::reservation_lock(eal, 4); *reinterpret_cast(dst) = *reinterpret_cast(src); - res.release(res.load() + 127); + res.release(res.load() - 1); break; } case 8: { auto& res = vm::reservation_lock(eal, 8); *reinterpret_cast(dst) = *reinterpret_cast(src); - res.release(res.load() + 127); + res.release(res.load() - 1); break; } default: @@ -1363,7 +1368,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) size -= 16; } - res.release(res.load() + 127); + res.release(res.load() - 1); break; } @@ -1395,11 +1400,6 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) return; } - if (is_get) - { - std::swap(dst, src); - } - switch (u32 size = args.size) { case 1: diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index 14f2954afd..cb65b87820 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -15,6 +15,9 @@ void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode) { vm::temporary_unlock(ppu); + // Need to wait until the current writer finish + if (ppu.state & cpu_flag::memory) vm::g_mutex.lock_unlock(); + sys_ppu_thread.trace("_sys_ppu_thread_exit(errorcode=0x%llx)", errorcode); ppu.state += cpu_flag::exit; diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index d5f3025aad..73dde1bc1d 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -62,7 +62,7 @@ namespace vm // Memory mutex acknowledgement thread_local atomic_t* g_tls_locked = nullptr; - // Currently locked address + // Currently locked cache line atomic_t g_addr_lock = 0; // Memory mutex: passive locks @@ -71,13 +71,15 @@ namespace vm static void _register_lock(cpu_thread* _cpu) { - for (u32 i = 0;; i = (i + 1) % g_locks.size()) + for (u32 i = 0, max = g_cfg.core.ppu_threads;;) { if (!g_locks[i] && g_locks[i].compare_and_swap_test(nullptr, _cpu)) { g_tls_locked = g_locks.data() + i; return; } + + if (++i == max) i = 0; } } @@ -165,7 +167,7 @@ namespace vm void cleanup_unlock(cpu_thread& cpu) noexcept { - for (u32 i = 0; i < g_locks.size(); i++) + for (u32 i = 0, max = g_cfg.core.ppu_threads; i < max; i++) { if (g_locks[i] == &cpu) { @@ -247,9 +249,9 @@ namespace vm if (addr) { - for (auto& lock : g_locks) + for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++) { - if (cpu_thread* ptr = lock) + if (cpu_thread* ptr = *lock) { ptr->state.test_and_set(cpu_flag::memory); } @@ -279,15 +281,10 @@ namespace vm } } - for (auto& lock : g_locks) + for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++) { - while (cpu_thread* ptr = lock) + while (*lock) { - if (ptr->is_stopped()) - { - break; - } - _mm_pause(); } } diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 1f028283cf..a1b917e0b3 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -388,7 +388,7 @@ struct cfg_root : cfg::node node_core(cfg::node* _this) : cfg::node(_this, "Core") {} cfg::_enum ppu_decoder{this, "PPU Decoder", ppu_decoder_type::llvm}; - cfg::_int<1, 4> ppu_threads{this, "PPU Threads", 2}; // Amount of PPU threads running simultaneously (must be 2) + cfg::_int<1, 8> ppu_threads{this, "PPU Threads", 2}; // Amount of PPU threads running simultaneously (must be 2) cfg::_bool ppu_debug{this, "PPU Debug"}; cfg::_bool llvm_logs{this, "Save LLVM logs"}; cfg::string llvm_cpu{this, "Use LLVM CPU"};