diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 806bd7e54c..af195ed770 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -964,6 +964,14 @@ bool cpu_thread::check_state() noexcept } else if (auto ppu = try_get()) { + if (u32 usec = ppu->hw_sleep_time) + { + thread_ctrl::wait_for_accurate(usec); + ppu->hw_sleep_time = 0; + ppu->raddr = 0; // Also lose reservation if there is any (reservation is unsaved on hw thread switch) + continue; + } + if (ppu->raddr && ppu->rtime == vm::reservation_acquire(ppu->raddr)) { // Same diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 72b99eae61..8c46475a68 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -276,6 +276,7 @@ public: const u32 stack_addr; // Stack address atomic_t joiner; // Joining thread or status + u32 hw_sleep_time = 0; // Very specific delay for hardware threads switching, see lv2_obj::awake_unlocked for more details lf_fifo, 127> cmd_queue; // Command queue for asynchronous operations. diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 1cb800c802..e82efc1c88 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1662,12 +1662,16 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio) // Yield changed the queue before bool changed_queue = prio == yield_cmd; + s32 lowest_new_priority = smax; + const bool has_free_hw_thread_space = count_non_sleeping_threads().onproc_count < g_cfg.core.ppu_threads + 0u; + if (cpu && prio != yield_cmd) { // Emplace current thread if (emplace_thread(cpu)) { changed_queue = true; + lowest_new_priority = std::min(static_cast(cpu)->prio.load().prio, lowest_new_priority); } } else for (const auto _cpu : g_to_awake) @@ -1676,13 +1680,15 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio) if (emplace_thread(_cpu)) { changed_queue = true; + lowest_new_priority = std::min(static_cast(_cpu)->prio.load().prio, lowest_new_priority); } } auto target = +g_ppu; + usz i = 0; // Suspend threads if necessary - for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++) + for (usz thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++) { if (i >= thread_count && cpu_flag::suspend - target->state) { @@ -1709,6 +1715,27 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio) } } + // In real PS3 (it seems), when a thread with a higher priority than the caller is signaled and - + // - that there is available space on the running queue for the other hardware thread to start + // It prioritizes signaled thread - caller's hardware thread switches instantly to the new thread code + // While signaling to the other hardware thread to execute the caller's code. + // Resulting in a delay to the caller after such thread is signaled + + if (current_ppu && changed_queue && has_free_hw_thread_space) + { + if (current_ppu->prio.load().prio > lowest_new_priority) + { + if (!current_ppu->state.test_and_set(cpu_flag::yield) || current_ppu->hw_sleep_time != 0) + { + current_ppu->hw_sleep_time += 35; // Seems like 35us after extensive testing + } + else + { + current_ppu->hw_sleep_time = 30000; // In addition to another flag's use (TODO: Refactor and clean this) + } + } + } + return changed_queue; } @@ -1920,19 +1947,24 @@ bool lv2_obj::is_scheduler_ready() return g_to_sleep.empty(); } -bool lv2_obj::has_ppus_in_running_state() +ppu_non_sleeping_count_t lv2_obj::count_non_sleeping_threads() { + ppu_non_sleeping_count_t total{}; + auto target = atomic_storage::load(g_ppu); - for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = atomic_storage::load(target->next_ppu), i++) + for (usz thread_count = g_cfg.core.ppu_threads; target; target = atomic_storage::load(target->next_ppu)) { - if (i >= thread_count) + if (total.onproc_count == thread_count) { - return true; + total.has_running = true; + break; } + + total.onproc_count++; } - return false; + return total; } void lv2_obj::set_yield_frequency(u64 freq, u64 max_allowed_tsc) diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index bd09d26a61..ad3235c81b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -60,6 +60,12 @@ enum enum ppu_thread_status : u32; +struct ppu_non_sleeping_count_t +{ + bool has_running; // no actual count for optimization sake + u32 onproc_count; +}; + namespace vm { extern u8 g_reservations[65536 / 128 * 64]; @@ -280,7 +286,12 @@ public: static bool is_scheduler_ready(); // Must be called under IDM lock - static bool has_ppus_in_running_state(); + static ppu_non_sleeping_count_t count_non_sleeping_threads(); + + static inline bool has_ppus_in_running_state() noexcept + { + return count_non_sleeping_threads().has_running != 0; + } static void set_yield_frequency(u64 freq, u64 max_allowed_tsx);