mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
PPU LLVM: Re-add multi-threaded overlay module compilation
This commit is contained in:
parent
d399bd5b6a
commit
b4fc43d787
@ -694,7 +694,7 @@ class named_thread_group final
|
||||
{
|
||||
using Thread = named_thread<Context>;
|
||||
|
||||
const u32 m_count;
|
||||
u32 m_count = 0;
|
||||
|
||||
Thread* m_threads;
|
||||
|
||||
@ -705,7 +705,7 @@ class named_thread_group final
|
||||
|
||||
public:
|
||||
// Lambda constructor, also the implicit deduction guide candidate
|
||||
named_thread_group(std::string_view name, u32 count, const Context& f)
|
||||
named_thread_group(std::string_view name, u32 count, Context&& f) noexcept
|
||||
: m_count(count)
|
||||
, m_threads(nullptr)
|
||||
{
|
||||
@ -717,14 +717,60 @@ public:
|
||||
init_threads();
|
||||
|
||||
// Create all threads
|
||||
for (u32 i = 0; i < m_count; i++)
|
||||
for (u32 i = 0; i < m_count - 1; i++)
|
||||
{
|
||||
new (static_cast<void*>(m_threads + i)) Thread(std::string(name) + std::to_string(i + 1), f);
|
||||
// Copy the context
|
||||
new (static_cast<void*>(m_threads + i)) Thread(std::string(name) + std::to_string(i + 1), static_cast<const Context&>(f));
|
||||
}
|
||||
|
||||
// Move the context (if movable)
|
||||
new (static_cast<void*>(m_threads + m_count - 1)) Thread(std::string(name) + std::to_string(m_count - 1), std::forward<Context>(f));
|
||||
}
|
||||
|
||||
// Constructor with a function performed before adding more threads
|
||||
template <typename CheckAndPrepare>
|
||||
named_thread_group(std::string_view name, u32 count, Context&& f, CheckAndPrepare&& check) noexcept
|
||||
: m_count(count)
|
||||
, m_threads(nullptr)
|
||||
{
|
||||
if (count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
init_threads();
|
||||
m_count = 0;
|
||||
|
||||
// Create all threads
|
||||
for (u32 i = 0; i < count - 1; i++)
|
||||
{
|
||||
// Copy the context
|
||||
std::remove_cvref_t<Context> context(static_cast<const Context&>(f));
|
||||
|
||||
// Perform the check and additional preparations for each context
|
||||
if (!std::invoke(std::forward<CheckAndPrepare>(check), i, context))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
m_count++;
|
||||
new (static_cast<void*>(m_threads + i)) Thread(std::string(name) + std::to_string(i + 1), std::move(context));
|
||||
}
|
||||
|
||||
// Move the context (if movable)
|
||||
std::remove_cvref_t<Context> context(std::forward<Context>(f));
|
||||
|
||||
if (!std::invoke(std::forward<CheckAndPrepare>(check), m_count - 1, context))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
m_count++;
|
||||
new (static_cast<void*>(m_threads + m_count - 1)) Thread(std::string(name) + std::to_string(m_count - 1), std::move(context));
|
||||
}
|
||||
|
||||
// Default constructor
|
||||
named_thread_group(std::string_view name, u32 count)
|
||||
named_thread_group(std::string_view name, u32 count) noexcept
|
||||
: m_count(count)
|
||||
, m_threads(nullptr)
|
||||
{
|
||||
@ -791,10 +837,10 @@ public:
|
||||
return m_count;
|
||||
}
|
||||
|
||||
~named_thread_group()
|
||||
~named_thread_group() noexcept
|
||||
{
|
||||
// Destroy all threads (it should join them)
|
||||
for (u32 i = 0; i < m_count; i++)
|
||||
for (u32 i = m_count - 1; i < m_count; i--)
|
||||
{
|
||||
std::launder(m_threads + i)->~Thread();
|
||||
}
|
||||
|
@ -189,7 +189,7 @@ struct main_ppu_module : public ppu_module
|
||||
{
|
||||
u32 elf_entry{};
|
||||
u32 seg0_code_end{};
|
||||
std::basic_string<u32> applied_pathes;
|
||||
std::basic_string<u32> applied_patches;
|
||||
};
|
||||
|
||||
// Aux
|
||||
|
@ -2460,7 +2460,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
||||
|
||||
_main.elf_entry = static_cast<u32>(elf.header.e_entry);
|
||||
_main.seg0_code_end = end;
|
||||
_main.applied_pathes = applied;
|
||||
_main.applied_patches = applied;
|
||||
|
||||
if (!virtual_load)
|
||||
{
|
||||
@ -2987,13 +2987,23 @@ std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_ex
|
||||
}
|
||||
|
||||
ovlm->entry = static_cast<u32>(elf.header.e_entry);
|
||||
ovlm->seg0_code_end = end;
|
||||
ovlm->applied_patches = std::move(applied);
|
||||
|
||||
const bool is_being_used_in_emulation = (vm::base(ovlm->segs[0].addr) == ovlm->segs[0].ptr);
|
||||
|
||||
if (!is_being_used_in_emulation)
|
||||
{
|
||||
// Postpone to later
|
||||
return {std::move(ovlm), {}};
|
||||
}
|
||||
|
||||
const auto cpu = cpu_thread::get_current();
|
||||
|
||||
// Analyse executable (TODO)
|
||||
if (!ovlm->analyse(0, ovlm->entry, end, applied, !cpu ? std::function<bool()>() : [cpu, is_being_used_in_emulation = (vm::base(ovlm->segs[0].addr) == ovlm->segs[0].ptr)]()
|
||||
if (!ovlm->analyse(0, ovlm->entry, end, ovlm->applied_patches, !cpu ? std::function<bool()>() : [cpu]()
|
||||
{
|
||||
return is_being_used_in_emulation && cpu->state & cpu_flag::exit;
|
||||
return !!(cpu->state & cpu_flag::exit);
|
||||
}))
|
||||
{
|
||||
return {nullptr, CellError{CELL_CANCEL + 0u}};
|
||||
|
@ -3418,6 +3418,19 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
return ppu_store_reservation<u64>(ppu, addr, reg_value);
|
||||
}
|
||||
|
||||
struct jit_core_allocator
|
||||
{
|
||||
const s32 thread_count = g_cfg.core.llvm_threads ? std::min<s32>(g_cfg.core.llvm_threads, limit()) : limit();
|
||||
|
||||
// Initialize global semaphore with the max number of threads
|
||||
::semaphore<0x7fffffff> sem{std::max<s32>(thread_count, 1)};
|
||||
|
||||
static s32 limit()
|
||||
{
|
||||
return static_cast<s32>(utils::get_thread_count());
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
namespace
|
||||
{
|
||||
@ -3771,7 +3784,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||
atomic_t<usz> fnext = 0;
|
||||
|
||||
lf_queue<file_info> possible_exec_file_paths;
|
||||
shared_mutex ovl_mtx;
|
||||
|
||||
named_thread_group workers("SPRX Worker ", std::min<u32>(utils::get_thread_count(), ::size32(file_queue)), [&]
|
||||
{
|
||||
@ -3854,15 +3866,18 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||
break;
|
||||
}
|
||||
|
||||
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
||||
|
||||
// Participate in thread execution limitation (takes a long time)
|
||||
if (std::lock_guard lock(g_fxo->get<jit_core_allocator>().sem); !ovlm->analyse(0, ovlm->entry, ovlm->seg0_code_end, ovlm->applied_patches, []()
|
||||
{
|
||||
// Does not really require this lock, this is done for performance reasons.
|
||||
// Seems like too many created threads is hard for Windows to manage efficiently with many CPU threads.
|
||||
std::lock_guard lock(ovl_mtx);
|
||||
ppu_initialize(*ovlm, false, file_size);
|
||||
return Emu.IsStopped();
|
||||
}))
|
||||
{
|
||||
// Emulation stopped
|
||||
break;
|
||||
}
|
||||
|
||||
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
||||
ppu_initialize(*ovlm, false, file_size);
|
||||
ppu_finalize(*ovlm);
|
||||
break;
|
||||
}
|
||||
@ -3910,7 +3925,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||
|
||||
ppu_log.notice("Trying to load as executable: %s", path);
|
||||
|
||||
// Load MSELF, SPRX or SELF
|
||||
// Load SELF
|
||||
fs::file src{path};
|
||||
|
||||
if (!src)
|
||||
@ -3952,7 +3967,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||
break;
|
||||
}
|
||||
|
||||
if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_pathes, [](){ return Emu.IsStopped(); }))
|
||||
if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_patches, [](){ return Emu.IsStopped(); }))
|
||||
{
|
||||
g_fxo->get<spu_cache>() = std::move(current_cache);
|
||||
break;
|
||||
@ -4004,7 +4019,7 @@ extern void ppu_initialize()
|
||||
scoped_progress_dialog progr = "Analyzing PPU Executable...";
|
||||
|
||||
// Analyse executable
|
||||
if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_pathes, [](){ return Emu.IsStopped(); }))
|
||||
if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_patches, [](){ return Emu.IsStopped(); }))
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -4238,19 +4253,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
|
||||
progr.emplace("Loading PPU modules...");
|
||||
}
|
||||
|
||||
struct jit_core_allocator
|
||||
{
|
||||
const s32 thread_count = g_cfg.core.llvm_threads ? std::min<s32>(g_cfg.core.llvm_threads, limit()) : limit();
|
||||
|
||||
// Initialize global semaphore with the max number of threads
|
||||
::semaphore<0x7fffffff> sem{std::max<s32>(thread_count, 1)};
|
||||
|
||||
static s32 limit()
|
||||
{
|
||||
return static_cast<s32>(utils::get_thread_count());
|
||||
}
|
||||
};
|
||||
|
||||
// Permanently loaded compiled PPU modules (name -> data)
|
||||
jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));
|
||||
|
||||
@ -4606,13 +4608,11 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
|
||||
g_progr_fknown_bits += file_size;
|
||||
}
|
||||
|
||||
// Create worker threads for compilation
|
||||
if (!workload.empty())
|
||||
{
|
||||
*progr = "Compiling PPU modules...";
|
||||
}
|
||||
|
||||
// Create worker threads for compilation (TODO: how many threads)
|
||||
{
|
||||
u32 thread_count = rpcs3::utils::get_max_threads();
|
||||
|
||||
if (workload.size() < thread_count)
|
||||
@ -4625,20 +4625,53 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
|
||||
atomic_t<u64> index = 0;
|
||||
};
|
||||
|
||||
// Prevent watchdog thread from terminating
|
||||
g_watchdog_hold_ctr++;
|
||||
struct thread_op
|
||||
{
|
||||
atomic_t<u32>& work_cv;
|
||||
std::vector<std::pair<std::string, ppu_module>>& workload;
|
||||
const std::string& cache_path;
|
||||
const cpu_thread* cpu;
|
||||
|
||||
named_thread_group threads(fmt::format("PPUW.%u.", ++g_fxo->get<thread_index_allocator>().index), thread_count, [&]()
|
||||
std::unique_lock<decltype(jit_core_allocator::sem)> core_lock;
|
||||
|
||||
thread_op(atomic_t<u32>& work_cv, std::vector<std::pair<std::string, ppu_module>>& workload
|
||||
, const cpu_thread* cpu, const std::string& cache_path, decltype(jit_core_allocator::sem)& sem) noexcept
|
||||
|
||||
: work_cv(work_cv)
|
||||
, workload(workload)
|
||||
, cache_path(cache_path)
|
||||
, cpu(cpu)
|
||||
{
|
||||
// Save mutex
|
||||
core_lock = std::unique_lock{sem, std::defer_lock};
|
||||
}
|
||||
|
||||
thread_op(const thread_op& other) noexcept
|
||||
: work_cv(other.work_cv)
|
||||
, workload(other.workload)
|
||||
, cache_path(other.cache_path)
|
||||
, cpu(other.cpu)
|
||||
{
|
||||
if (auto mtx = other.core_lock.mutex())
|
||||
{
|
||||
// Save mutex
|
||||
core_lock = std::unique_lock{*mtx, std::defer_lock};
|
||||
}
|
||||
}
|
||||
|
||||
thread_op(thread_op&& other) noexcept = default;
|
||||
|
||||
void operator()()
|
||||
{
|
||||
// Set low priority
|
||||
thread_ctrl::scoped_priority low_prio(-1);
|
||||
|
||||
#ifdef __APPLE__
|
||||
#ifdef __APPLE__
|
||||
pthread_jit_write_protect_np(false);
|
||||
#endif
|
||||
#endif
|
||||
for (u32 i = work_cv++; i < workload.size(); i = work_cv++, g_progr_pdone++)
|
||||
{
|
||||
if (Emu.IsStopped())
|
||||
if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@ -4646,14 +4679,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
|
||||
// Keep allocating workload
|
||||
const auto& [obj_name, part] = std::as_const(workload)[i];
|
||||
|
||||
// Allocate "core"
|
||||
std::lock_guard jlock(g_fxo->get<jit_core_allocator>().sem);
|
||||
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
ppu_log.warning("LLVM: Compiling module %s%s", cache_path, obj_name);
|
||||
|
||||
// Use another JIT instance
|
||||
@ -4662,12 +4687,31 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
|
||||
|
||||
ppu_log.success("LLVM: Compiled module %s", obj_name);
|
||||
}
|
||||
|
||||
core_lock.unlock();
|
||||
}
|
||||
};
|
||||
|
||||
// Prevent watchdog thread from terminating
|
||||
g_watchdog_hold_ctr++;
|
||||
|
||||
named_thread_group threads(fmt::format("PPUW.%u.", ++g_fxo->get<thread_index_allocator>().index), thread_count
|
||||
, thread_op(work_cv, workload, cpu, cache_path, g_fxo->get<jit_core_allocator>().sem)
|
||||
, [&](u32 /*thread_index*/, thread_op& op)
|
||||
{
|
||||
// Allocate "core"
|
||||
op.core_lock.lock();
|
||||
|
||||
// Second check before creating another thread
|
||||
return work_cv < workload.size() && (cpu ? !cpu->state.all_of(cpu_flag::exit) : !Emu.IsStopped());
|
||||
});
|
||||
|
||||
threads.join();
|
||||
|
||||
g_watchdog_hold_ctr--;
|
||||
}
|
||||
|
||||
{
|
||||
if (!is_being_used_in_emulation || (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped()))
|
||||
{
|
||||
return compiled_new;
|
||||
|
@ -8,6 +8,8 @@ struct lv2_overlay final : lv2_obj, ppu_module
|
||||
static const u32 id_base = 0x25000000;
|
||||
|
||||
u32 entry;
|
||||
u32 seg0_code_end{};
|
||||
std::basic_string<u32> applied_patches;
|
||||
|
||||
lv2_overlay() = default;
|
||||
lv2_overlay(utils::serial&){}
|
||||
|
@ -1480,7 +1480,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
|
||||
{
|
||||
if (auto& _main = *ensure(g_fxo->try_get<main_ppu_module>()); !_main.path.empty())
|
||||
{
|
||||
if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_pathes, [](){ return Emu.IsStopped(); }))
|
||||
if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_patches, [](){ return Emu.IsStopped(); }))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user