1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

PPU/LV2: Implement HW threads switching delay when signaling higher priority threads

In real PS3 (it seems), when a thread with a higher priority than the caller is signaled and that there is available space on the running queue for the other hardware thread to start It prioritizes signaled thread caller's hardware thread switches instantly to the new thread code while signaling to the other hardware thread to execute the caller's code.
Resulting in a delay to the caller after such thread is signaled
This commit is contained in:
Elad Ashkenazi 2024-02-09 19:45:07 +02:00 committed by Elad.Ash
parent 2c03878c3b
commit ec59f4d37e
4 changed files with 59 additions and 7 deletions

View File

@ -964,6 +964,14 @@ bool cpu_thread::check_state() noexcept
}
else if (auto ppu = try_get<ppu_thread>())
{
if (u32 usec = ppu->hw_sleep_time)
{
thread_ctrl::wait_for_accurate(usec);
ppu->hw_sleep_time = 0;
ppu->raddr = 0; // Also lose reservation if there is any (reservation is unsaved on hw thread switch)
continue;
}
if (ppu->raddr && ppu->rtime == vm::reservation_acquire(ppu->raddr))
{
// Same

View File

@ -276,6 +276,7 @@ public:
const u32 stack_addr; // Stack address
atomic_t<ppu_join_status> joiner; // Joining thread or status
u32 hw_sleep_time = 0; // Very specific delay for hardware threads switching, see lv2_obj::awake_unlocked for more details
lf_fifo<atomic_t<cmd64>, 127> cmd_queue; // Command queue for asynchronous operations.

View File

@ -1662,12 +1662,16 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
// Yield changed the queue before
bool changed_queue = prio == yield_cmd;
s32 lowest_new_priority = smax;
const bool has_free_hw_thread_space = count_non_sleeping_threads().onproc_count < g_cfg.core.ppu_threads + 0u;
if (cpu && prio != yield_cmd)
{
// Emplace current thread
if (emplace_thread(cpu))
{
changed_queue = true;
lowest_new_priority = std::min<s32>(static_cast<ppu_thread*>(cpu)->prio.load().prio, lowest_new_priority);
}
}
else for (const auto _cpu : g_to_awake)
@ -1676,13 +1680,15 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
if (emplace_thread(_cpu))
{
changed_queue = true;
lowest_new_priority = std::min<s32>(static_cast<ppu_thread*>(_cpu)->prio.load().prio, lowest_new_priority);
}
}
auto target = +g_ppu;
usz i = 0;
// Suspend threads if necessary
for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++)
for (usz thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++)
{
if (i >= thread_count && cpu_flag::suspend - target->state)
{
@ -1709,6 +1715,27 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
}
}
// In real PS3 (it seems), when a thread with a higher priority than the caller is signaled and -
// - that there is available space on the running queue for the other hardware thread to start
// It prioritizes signaled thread - caller's hardware thread switches instantly to the new thread code
// While signaling to the other hardware thread to execute the caller's code.
// Resulting in a delay to the caller after such thread is signaled
if (current_ppu && changed_queue && has_free_hw_thread_space)
{
if (current_ppu->prio.load().prio > lowest_new_priority)
{
if (!current_ppu->state.test_and_set(cpu_flag::yield) || current_ppu->hw_sleep_time != 0)
{
current_ppu->hw_sleep_time += 35; // Seems like 35us after extensive testing
}
else
{
current_ppu->hw_sleep_time = 30000; // In addition to another flag's use (TODO: Refactor and clean this)
}
}
}
return changed_queue;
}
@ -1920,19 +1947,24 @@ bool lv2_obj::is_scheduler_ready()
return g_to_sleep.empty();
}
bool lv2_obj::has_ppus_in_running_state()
ppu_non_sleeping_count_t lv2_obj::count_non_sleeping_threads()
{
ppu_non_sleeping_count_t total{};
auto target = atomic_storage<ppu_thread*>::load(g_ppu);
for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = atomic_storage<ppu_thread*>::load(target->next_ppu), i++)
for (usz thread_count = g_cfg.core.ppu_threads; target; target = atomic_storage<ppu_thread*>::load(target->next_ppu))
{
if (i >= thread_count)
if (total.onproc_count == thread_count)
{
return true;
}
total.has_running = true;
break;
}
return false;
total.onproc_count++;
}
return total;
}
void lv2_obj::set_yield_frequency(u64 freq, u64 max_allowed_tsc)

View File

@ -60,6 +60,12 @@ enum
enum ppu_thread_status : u32;
struct ppu_non_sleeping_count_t
{
bool has_running; // no actual count for optimization sake
u32 onproc_count;
};
namespace vm
{
extern u8 g_reservations[65536 / 128 * 64];
@ -280,7 +286,12 @@ public:
static bool is_scheduler_ready();
// Must be called under IDM lock
static bool has_ppus_in_running_state();
static ppu_non_sleeping_count_t count_non_sleeping_threads();
static inline bool has_ppus_in_running_state() noexcept
{
return count_non_sleeping_threads().has_running != 0;
}
static void set_yield_frequency(u64 freq, u64 max_allowed_tsx);