From ec59f4d37efa20f305a871856a829359de12ae14 Mon Sep 17 00:00:00 2001 From: Elad Ashkenazi <18193363+elad335@users.noreply.github.com> Date: Fri, 9 Feb 2024 19:45:07 +0200 Subject: [PATCH] PPU/LV2: Implement HW threads switching delay when signaling higher priority threads In real PS3 (it seems), when a thread with a higher priority than the caller is signaled and that there is available space on the running queue for the other hardware thread to start It prioritizes signaled thread caller's hardware thread switches instantly to the new thread code while signaling to the other hardware thread to execute the caller's code. Resulting in a delay to the caller after such thread is signaled --- rpcs3/Emu/CPU/CPUThread.cpp | 8 +++++++ rpcs3/Emu/Cell/PPUThread.h | 1 + rpcs3/Emu/Cell/lv2/lv2.cpp | 44 ++++++++++++++++++++++++++++++----- rpcs3/Emu/Cell/lv2/sys_sync.h | 13 ++++++++++- 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 806bd7e54c..af195ed770 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -964,6 +964,14 @@ bool cpu_thread::check_state() noexcept } else if (auto ppu = try_get()) { + if (u32 usec = ppu->hw_sleep_time) + { + thread_ctrl::wait_for_accurate(usec); + ppu->hw_sleep_time = 0; + ppu->raddr = 0; // Also lose reservation if there is any (reservation is unsaved on hw thread switch) + continue; + } + if (ppu->raddr && ppu->rtime == vm::reservation_acquire(ppu->raddr)) { // Same diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 72b99eae61..8c46475a68 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -276,6 +276,7 @@ public: const u32 stack_addr; // Stack address atomic_t joiner; // Joining thread or status + u32 hw_sleep_time = 0; // Very specific delay for hardware threads switching, see lv2_obj::awake_unlocked for more details lf_fifo, 127> cmd_queue; // Command queue for asynchronous operations. diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 1cb800c802..e82efc1c88 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1662,12 +1662,16 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio) // Yield changed the queue before bool changed_queue = prio == yield_cmd; + s32 lowest_new_priority = smax; + const bool has_free_hw_thread_space = count_non_sleeping_threads().onproc_count < g_cfg.core.ppu_threads + 0u; + if (cpu && prio != yield_cmd) { // Emplace current thread if (emplace_thread(cpu)) { changed_queue = true; + lowest_new_priority = std::min(static_cast(cpu)->prio.load().prio, lowest_new_priority); } } else for (const auto _cpu : g_to_awake) @@ -1676,13 +1680,15 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio) if (emplace_thread(_cpu)) { changed_queue = true; + lowest_new_priority = std::min(static_cast(_cpu)->prio.load().prio, lowest_new_priority); } } auto target = +g_ppu; + usz i = 0; // Suspend threads if necessary - for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++) + for (usz thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++) { if (i >= thread_count && cpu_flag::suspend - target->state) { @@ -1709,6 +1715,27 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio) } } + // In real PS3 (it seems), when a thread with a higher priority than the caller is signaled and - + // - that there is available space on the running queue for the other hardware thread to start + // It prioritizes signaled thread - caller's hardware thread switches instantly to the new thread code + // While signaling to the other hardware thread to execute the caller's code. + // Resulting in a delay to the caller after such thread is signaled + + if (current_ppu && changed_queue && has_free_hw_thread_space) + { + if (current_ppu->prio.load().prio > lowest_new_priority) + { + if (!current_ppu->state.test_and_set(cpu_flag::yield) || current_ppu->hw_sleep_time != 0) + { + current_ppu->hw_sleep_time += 35; // Seems like 35us after extensive testing + } + else + { + current_ppu->hw_sleep_time = 30000; // In addition to another flag's use (TODO: Refactor and clean this) + } + } + } + return changed_queue; } @@ -1920,19 +1947,24 @@ bool lv2_obj::is_scheduler_ready() return g_to_sleep.empty(); } -bool lv2_obj::has_ppus_in_running_state() +ppu_non_sleeping_count_t lv2_obj::count_non_sleeping_threads() { + ppu_non_sleeping_count_t total{}; + auto target = atomic_storage::load(g_ppu); - for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = atomic_storage::load(target->next_ppu), i++) + for (usz thread_count = g_cfg.core.ppu_threads; target; target = atomic_storage::load(target->next_ppu)) { - if (i >= thread_count) + if (total.onproc_count == thread_count) { - return true; + total.has_running = true; + break; } + + total.onproc_count++; } - return false; + return total; } void lv2_obj::set_yield_frequency(u64 freq, u64 max_allowed_tsc) diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index bd09d26a61..ad3235c81b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -60,6 +60,12 @@ enum enum ppu_thread_status : u32; +struct ppu_non_sleeping_count_t +{ + bool has_running; // no actual count for optimization sake + u32 onproc_count; +}; + namespace vm { extern u8 g_reservations[65536 / 128 * 64]; @@ -280,7 +286,12 @@ public: static bool is_scheduler_ready(); // Must be called under IDM lock - static bool has_ppus_in_running_state(); + static ppu_non_sleeping_count_t count_non_sleeping_threads(); + + static inline bool has_ppus_in_running_state() noexcept + { + return count_non_sleeping_threads().has_running != 0; + } static void set_yield_frequency(u64 freq, u64 max_allowed_tsx);