From 0eb6bf6a678cee1fe7fd4eeba98bc1ffc05d7b2d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 26 Feb 2017 18:56:31 +0300 Subject: [PATCH] LLVM: splitting and caching --- Utilities/JIT.cpp | 127 ++++++++++----- Utilities/JIT.h | 23 ++- rpcs3/Emu/Cell/PPUAnalyser.cpp | 6 +- rpcs3/Emu/Cell/PPUAnalyser.h | 7 + rpcs3/Emu/Cell/PPUModule.cpp | 52 +++--- rpcs3/Emu/Cell/PPUModule.h | 6 +- rpcs3/Emu/Cell/PPUThread.cpp | 261 +++++++++++++++++++++++-------- rpcs3/Emu/Cell/PPUTranslator.cpp | 14 +- rpcs3/Emu/Cell/lv2/sys_prx.cpp | 7 +- rpcs3/Emu/Cell/lv2/sys_prx.h | 3 +- rpcs3/Emu/System.cpp | 4 +- 11 files changed, 362 insertions(+), 148 deletions(-) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 994dae2a3c..8b2f95efde 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -68,16 +68,16 @@ static u8* s_unwind_info; static u64 s_unwind_size; #ifdef _WIN32 -static std::vector s_unwind; // Custom .pdata section replacement +static std::vector> s_unwind; // .pdata #endif // Helper class struct MemoryManager final : llvm::RTDyldMemoryManager { - std::unordered_map table; + std::unordered_map& m_link; - MemoryManager(std::unordered_map&& table) - : table(std::move(table)) + MemoryManager(std::unordered_map& table) + : m_link(table) { } @@ -95,9 +95,9 @@ struct MemoryManager final : llvm::RTDyldMemoryManager return addr; } - const auto found = table.find(name); + const auto found = m_link.find(name); - if (found != table.end()) + if (found != m_link.end()) { return found->second; } @@ -131,7 +131,7 @@ struct MemoryManager final : llvm::RTDyldMemoryManager s_code_addr = (u8*)m_next; s_code_size = size; - LOG_SUCCESS(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align); + LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align); return (u8*)std::exchange(m_next, (void*)next); } @@ -161,21 +161,21 @@ struct MemoryManager final : llvm::RTDyldMemoryManager return nullptr; } - LOG_SUCCESS(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw"); + LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw"); return (u8*)std::exchange(m_next, (void*)next); } virtual bool finalizeMemory(std::string* = nullptr) override { // TODO: make only read-only sections read-only -#ifdef _WIN32 - DWORD op; - VirtualProtect(s_memory, (u64)m_next - (u64)s_memory, PAGE_READONLY, &op); - VirtualProtect(s_code_addr, s_code_size, PAGE_EXECUTE_READ, &op); -#else - ::mprotect(s_memory, (u64)m_next - (u64)s_memory, PROT_READ); - ::mprotect(s_code_addr, s_code_size, PROT_READ | PROT_EXEC); -#endif +//#ifdef _WIN32 +// DWORD op; +// VirtualProtect(s_memory, (u64)m_next - (u64)s_memory, PAGE_READONLY, &op); +// VirtualProtect(s_code_addr, s_code_size, PAGE_EXECUTE_READ, &op); +//#else +// ::mprotect(s_memory, (u64)m_next - (u64)s_memory, PROT_READ); +// ::mprotect(s_code_addr, s_code_size, PROT_READ | PROT_EXEC); +//#endif return false; } @@ -197,11 +197,16 @@ struct MemoryManager final : llvm::RTDyldMemoryManager ~MemoryManager() { #ifdef _WIN32 - if (!RtlDeleteFunctionTable(s_unwind.data())) + for (auto&& unwind : s_unwind) { - LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(%p) failed! Error %u", s_unwind_info, GetLastError()); + if (!RtlDeleteFunctionTable(unwind.data())) + { + LOG_FATAL(GENERAL, "RtlDeleteFunctionTable() failed! Error %u", GetLastError()); + } } + s_unwind.clear(); + if (!VirtualFree(s_memory, 0, MEM_DECOMMIT)) { LOG_FATAL(GENERAL, "VirtualFree(%p) failed! Error %u", s_memory, GetLastError()); @@ -223,36 +228,44 @@ private: // Helper class struct EventListener final : llvm::JITEventListener { + std::string path; + virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override { - const llvm::StringRef elf = obj.getData(); - fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite) - .write(elf.data(), elf.size()); + if (!path.empty()) + { + const llvm::StringRef elf = obj.getData(); + fs::file(path, fs::rewrite).write(elf.data(), elf.size()); + } } }; static EventListener s_listener; -jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unordered_map&& table) +jit_compiler::jit_compiler(std::unordered_map init_linkage_info) + : m_link(std::move(init_linkage_info)) { verify(HERE), s_memory; - std::string result; - - const auto module_ptr = _module.get(); - // Initialization llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); LLVMLinkInMCJIT(); - const auto _cpu = llvm::sys::getHostCPUName(); + m_cpu = llvm::sys::getHostCPUName(); - m_engine.reset(llvm::EngineBuilder(std::move(_module)) + if (m_cpu == "skylake") + { + m_cpu = "haswell"; + } + + std::string result; + + m_engine.reset(llvm::EngineBuilder(std::make_unique("", g_llvm_ctx)) .setErrorStr(&result) - .setMCJITMemoryManager(std::make_unique(std::move(table))) + .setMCJITMemoryManager(std::make_unique(m_link)) .setOptLevel(llvm::CodeGenOpt::Aggressive) .setCodeModel((u64)s_memory <= 0x60000000 ? llvm::CodeModel::Small : llvm::CodeModel::Large) // TODO - .setMCPU(_cpu == "skylake" ? "haswell" : _cpu) + .setMCPU(m_cpu) .create()); if (!m_engine) @@ -262,8 +275,45 @@ jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unorder m_engine->setProcessAllSections(true); // ??? m_engine->RegisterJITEventListener(&s_listener); +} + +void jit_compiler::load(std::unique_ptr module, std::unique_ptr object) +{ + s_listener.path.clear(); + + auto* module_ptr = module.get(); + + m_engine->addModule(std::move(module)); + m_engine->addObjectFile(std::move(object)); m_engine->finalizeObject(); + m_map.clear(); + + for (auto& func : module_ptr->functions()) + { + const std::string& name = func.getName(); + + if (!m_link.count(name)) + { + // Register compiled function + m_map[name] = m_engine->getFunctionAddress(name); + } + } + + init(); +} + +void jit_compiler::make(std::unique_ptr module, std::string path) +{ + s_listener.path = std::move(path); + + auto* module_ptr = module.get(); + + m_engine->addModule(std::move(module)); + m_engine->finalizeObject(); + + m_map.clear(); + for (auto& func : module_ptr->functions()) { if (!func.empty()) @@ -278,6 +328,11 @@ jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unorder func.deleteBody(); } + init(); +} + +void jit_compiler::init() +{ #ifdef _WIN32 // Register .xdata UNWIND_INFO (.pdata section is empty for some reason) std::set func_set; @@ -290,8 +345,8 @@ jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unorder const u64 base = (u64)s_memory; const u8* bits = s_unwind_info; - s_unwind.clear(); - s_unwind.reserve(m_map.size()); + std::vector unwind; + unwind.reserve(m_map.size()); for (const u64 addr : func_set) { @@ -304,7 +359,7 @@ jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unorder uw.BeginAddress = static_cast(addr - base); uw.EndAddress = static_cast(next - base); uw.UnwindData = static_cast((u64)bits - base); - s_unwind.emplace_back(uw); + unwind.emplace_back(uw); // Parse .xdata UNWIND_INFO record const u8 flags = *bits++; // Version and flags @@ -327,14 +382,16 @@ jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unorder { LOG_ERROR(GENERAL, "LLVM: .xdata analysis failed! (%p != %p)", s_unwind_info + s_unwind_size, bits); } - else if (!RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base)) + else if (!RtlAddFunctionTable(unwind.data(), (DWORD)unwind.size(), base)) { LOG_ERROR(GENERAL, "RtlAddFunctionTable(%p) failed! Error %u", s_unwind_info, GetLastError()); } else { - LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (%p, size=0x%llx)", s_unwind_info, s_unwind_size); + LOG_NOTICE(GENERAL, "LLVM: UNWIND_INFO registered (%p, size=0x%llx)", s_unwind_info, s_unwind_size); } + + s_unwind.emplace_back(std::move(unwind)); #endif } diff --git a/Utilities/JIT.h b/Utilities/JIT.h index d24a7f0ed0..f9e7c2f499 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -31,10 +31,25 @@ class jit_compiler final // Compiled functions std::unordered_map m_map; + // Linkage cache + std::unordered_map m_link; + + // Arch + std::string m_cpu; + + // Internal + void init(); + public: - jit_compiler(std::unique_ptr&&, std::unordered_map&&); + jit_compiler(std::unordered_map); ~jit_compiler(); + // Compile module + void make(std::unique_ptr, std::string); + + // Load object + void load(std::unique_ptr, std::unique_ptr); + // Get compiled function address std::uintptr_t get(const std::string& name) const { @@ -47,6 +62,12 @@ public: return 0; } + + // Get CPU info + const std::string& cpu() const + { + return m_cpu; + } }; #endif diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index 5f8e9c2abb..659e920b3f 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -1136,9 +1136,11 @@ std::vector ppu_analyse(const std::vector>& se // Convert map to vector (destructive) std::vector result; - for (auto&& func : funcs) + for (auto&& pair : funcs) { - result.emplace_back(std::move(func.second)); + auto& func = pair.second; + LOG_TRACE(PPU, "Function __0x%x (size=0x%x, toc=0x%x, attr %#x)", func.addr, func.size, func.toc, func.attr); + result.emplace_back(std::move(func)); } LOG_NOTICE(PPU, "Function analysis: %zu functions (%zu enqueued)", result.size(), func_queue.size()); diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index 2a3516c33b..c5fb3a3bee 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -35,6 +35,13 @@ struct ppu_function std::set called_from; // Set of called functions }; +// PPU Module Information +struct ppu_module +{ + std::string name; + std::vector funcs; +}; + // Aux struct ppu_pattern { diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 7ed9ddc6fa..c3fc2702c5 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -117,6 +117,8 @@ cfg::set_entry g_cfg_load_libs(cfg::root.core, "Load libraries"); extern std::string ppu_get_function_name(const std::string& module, u32 fnid); extern std::string ppu_get_variable_name(const std::string& module, u32 vnid); extern void ppu_register_range(u32 addr, u32 size); +extern void ppu_initialize(const ppu_module& info); +extern void ppu_initialize(); extern void sys_initialize_tls(ppu_thread&, u64, u32, u32, u32); @@ -735,7 +737,7 @@ static void ppu_load_imports(const std::shared_ptr& link, u32 } } -std::shared_ptr ppu_load_prx(const ppu_prx_object& elf) +std::shared_ptr ppu_load_prx(const ppu_prx_object& elf, const std::string& name) { std::vector> segments; std::vector> sections; @@ -931,7 +933,7 @@ std::shared_ptr ppu_load_prx(const ppu_prx_object& elf) prx->start.set(prx->specials[0xbc9a0086]); prx->stop.set(prx->specials[0xab779874]); prx->exit.set(prx->specials[0x3ab9a95e]); - + prx->name = name; return prx; } @@ -953,9 +955,6 @@ void ppu_load_exec(const ppu_exec_object& elf) // Section info (optional) std::vector> sections; - // Functions - std::vector exec_set; - // TLS information u32 tls_vaddr = 0; u32 tls_fsize = 0; @@ -1114,7 +1113,7 @@ void ppu_load_exec(const ppu_exec_object& elf) } // Initialize process - std::vector start_funcs; + std::vector> loaded_modules; // Load modules const std::string& lle_dir = vfs::get("/dev_flash/sys/external"); @@ -1125,7 +1124,7 @@ void ppu_load_exec(const ppu_exec_object& elf) if (obj == elf_error::ok) { - start_funcs.push_back(ppu_load_prx(obj)->start.addr()); + loaded_modules.push_back(ppu_load_prx(obj, "liblv2.sprx")); } else { @@ -1142,16 +1141,7 @@ void ppu_load_exec(const ppu_exec_object& elf) { LOG_WARNING(LOADER, "Loading library: %s", name); - const auto prx = ppu_load_prx(obj); - - // Register start function - if (prx->start) - { - start_funcs.push_back(prx->start.addr()); - } - - // Add functions - exec_set.insert(exec_set.end(), prx->funcs.begin(), prx->funcs.end()); + auto prx = ppu_load_prx(obj, name); if (prx->funcs.empty()) { @@ -1162,6 +1152,8 @@ void ppu_load_exec(const ppu_exec_object& elf) // TODO: fix arguments ppu_validate(lle_dir + '/' + name, prx->funcs, prx->funcs[0].addr); } + + loaded_modules.emplace_back(std::move(prx)); } else { @@ -1292,18 +1284,15 @@ void ppu_load_exec(const ppu_exec_object& elf) } } - // Analyse executable - std::vector main_funcs = ppu_analyse(segments, sections, 0); + { + // Analyse executable + std::vector main_funcs = ppu_analyse(segments, sections, 0); - ppu_validate(vfs::get(Emu.GetPath()), main_funcs, 0); + ppu_validate(vfs::get(Emu.GetPath()), main_funcs, 0); - // Append - exec_set.insert(exec_set.cend(), - std::make_move_iterator(main_funcs.begin()), - std::make_move_iterator(main_funcs.end())); - - // Share function list - fxm::make>(std::move(exec_set)); + // Share function list + fxm::make>(std::move(main_funcs)); + } // Set SDK version g_ps3_sdk_version = sdk_version; @@ -1344,13 +1333,18 @@ void ppu_load_exec(const ppu_exec_object& elf) } // Run start functions - for (u32 func : start_funcs) + for (const auto& prx : loaded_modules) { + if (!prx->start) + { + continue; + } + // Reset arguments, run module entry point function ppu->cmd_list ({ { ppu_cmd::set_args, 2 }, u64{0}, u64{0}, - { ppu_cmd::lle_call, func }, + { ppu_cmd::lle_call, prx->start.addr() }, }); } diff --git a/rpcs3/Emu/Cell/PPUModule.h b/rpcs3/Emu/Cell/PPUModule.h index 3cf893ef4b..527e460039 100644 --- a/rpcs3/Emu/Cell/PPUModule.h +++ b/rpcs3/Emu/Cell/PPUModule.h @@ -4,6 +4,8 @@ #include "PPUCallback.h" #include "ErrorCodes.h" +#include + // Generate FNID or VNID for given name extern u32 ppu_generate_id(const char* name); @@ -41,8 +43,8 @@ public: task_stack on_load; task_stack on_unload; - std::unordered_map functions; - std::unordered_map variables; + std::map functions; + std::map variables; public: ppu_static_module(const char* name); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 6eec05e5ca..583e578f21 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1,6 +1,7 @@ #include "stdafx.h" #include "Utilities/Config.h" #include "Utilities/VirtualMemory.h" +#include "Crypto/sha1.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" #include "Emu/IdManager.h" @@ -9,6 +10,7 @@ #include "PPUAnalyser.h" #include "PPUModule.h" #include "lv2/sys_sync.h" +#include "lv2/sys_prx.h" #ifdef LLVM_AVAILABLE #include "restore_new.h" @@ -93,7 +95,8 @@ cfg::map_entry g_cfg_ppu_decoder(cfg::root.core, "PPU Decoder" const ppu_decoder s_ppu_interpreter_precise; const ppu_decoder s_ppu_interpreter_fast; -static void ppu_initialize(); +extern void ppu_initialize(); +extern void ppu_initialize(const ppu_module& info); extern void ppu_execute_syscall(ppu_thread& ppu, u64 code); extern void ppu_execute_function(ppu_thread& ppu, u32 index); @@ -679,27 +682,27 @@ static void ppu_trace(u64 addr) LOG_NOTICE(PPU, "Trace: 0x%llx", addr); } -static u32 ppu_lwarx(u32 addr) +extern u32 ppu_lwarx(ppu_thread& ppu, u32 addr) { be_t reg_value; vm::reservation_acquire(®_value, addr, sizeof(reg_value)); return reg_value; } -static u64 ppu_ldarx(u32 addr) +extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr) { be_t reg_value; vm::reservation_acquire(®_value, addr, sizeof(reg_value)); return reg_value; } -static bool ppu_stwcx(u32 addr, u32 reg_value) +extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) { const be_t data = reg_value; return vm::reservation_update(addr, &data, sizeof(data)); } -static bool ppu_stdcx(u32 addr, u64 reg_value) +extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value) { const be_t data = reg_value; return vm::reservation_update(addr, &data, sizeof(data)); @@ -716,9 +719,14 @@ static bool adde_carry(u64 a, u64 b, bool c) #endif } -static void ppu_initialize() +extern void ppu_initialize() { - const auto _funcs = fxm::get_always>(); + const auto _funcs = fxm::withdraw>(); + + if (!_funcs) + { + return; + } if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || _funcs->empty()) { @@ -739,38 +747,145 @@ static void ppu_initialize() return; } - std::unordered_map link_table + std::size_t fpos = 0; + + while (fpos < _funcs->size()) { - { "__mptr", (u64)&vm::g_base_addr }, - { "__cptr", (u64)&s_ppu_compiled }, - { "__trap", (u64)&ppu_trap }, - { "__end", (u64)&ppu_unreachable }, - { "__check", (u64)&ppu_check }, - { "__trace", (u64)&ppu_trace }, - { "__hlecall", (u64)&ppu_execute_function }, - { "__syscall", (u64)&ppu_execute_syscall }, - { "__get_tb", (u64)&get_timebased_time }, - { "__lwarx", (u64)&ppu_lwarx }, - { "__ldarx", (u64)&ppu_ldarx }, - { "__stwcx", (u64)&ppu_stwcx }, - { "__stdcx", (u64)&ppu_stdcx }, - { "__adde_get_ca", (u64)&adde_carry }, - { "__vexptefp", (u64)&sse_exp2_ps }, - { "__vlogefp", (u64)&sse_log2_ps }, - { "__vperm", (u64)&sse_altivec_vperm }, - { "__lvsl", (u64)&sse_altivec_lvsl }, - { "__lvsr", (u64)&sse_altivec_lvsr }, - { "__lvlx", (u64)&sse_cellbe_lvlx }, - { "__lvrx", (u64)&sse_cellbe_lvrx }, - { "__stvlx", (u64)&sse_cellbe_stvlx }, - { "__stvrx", (u64)&sse_cellbe_stvrx }, - }; + // Split module (TODO) + ppu_module info; + info.name = fmt::format("%05X", _funcs->at(fpos).addr); + info.funcs.reserve(2000); + + while (fpos < _funcs->size() && info.funcs.size() < 2000) + { + info.funcs.emplace_back(std::move(_funcs->at(fpos++))); + } + + if (!Emu.IsStopped()) + { + ppu_initialize(info); + } + } + + idm::select([](u32, lv2_prx& prx) + { + if (!Emu.IsStopped()) + { + ppu_initialize(prx); + } + }); +} + +extern void ppu_initialize(const ppu_module& info) +{ + if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm) + { + for (const auto& func : info.funcs) + { + ppu_register_function_at(func.addr, func.size, nullptr); + } + + return; + } + + // Compute module hash + std::string obj_name; + { + sha1_context ctx; + u8 output[20]; + sha1_starts(&ctx); + + for (const auto& func : info.funcs) + { + if (func.size == 0) + { + continue; + } + + const be_t addr = func.addr; + const be_t size = func.size; + sha1_update(&ctx, reinterpret_cast(&addr), sizeof(addr)); + sha1_update(&ctx, reinterpret_cast(&size), sizeof(size)); + + for (const auto& block : func.blocks) + { + if (block.second == 0) + { + continue; + } + + sha1_update(&ctx, vm::ps3::_ptr(block.first), block.second); + } + } + + sha1_finish(&ctx, output); + + // Version, module name and hash: vX-liblv2.sprx-0123456789ABCDEF.obj + fmt::append(obj_name, "v0-%s-%016X.obj", info.name, reinterpret_cast&>(output)); + } #ifdef LLVM_AVAILABLE using namespace llvm; + if (!fxm::check()) + { + std::unordered_map link_table + { + { "__mptr", (u64)&vm::g_base_addr }, + { "__cptr", (u64)&s_ppu_compiled }, + { "__trap", (u64)&ppu_trap }, + { "__end", (u64)&ppu_unreachable }, + { "__check", (u64)&ppu_check }, + { "__trace", (u64)&ppu_trace }, + { "__hlecall", (u64)&ppu_execute_function }, + { "__syscall", (u64)&ppu_execute_syscall }, + { "__get_tb", (u64)&get_timebased_time }, + { "__lwarx", (u64)&ppu_lwarx }, + { "__ldarx", (u64)&ppu_ldarx }, + { "__stwcx", (u64)&ppu_stwcx }, + { "__stdcx", (u64)&ppu_stdcx }, + { "__adde_get_ca", (u64)&adde_carry }, + { "__vexptefp", (u64)&sse_exp2_ps }, + { "__vlogefp", (u64)&sse_log2_ps }, + { "__vperm", (u64)&sse_altivec_vperm }, + { "__lvsl", (u64)&sse_altivec_lvsl }, + { "__lvsr", (u64)&sse_altivec_lvsr }, + { "__lvlx", (u64)&sse_cellbe_lvlx }, + { "__lvrx", (u64)&sse_cellbe_lvrx }, + { "__stvlx", (u64)&sse_cellbe_stvlx }, + { "__stvrx", (u64)&sse_cellbe_stvrx }, + }; + + for (u64 index = 0; index < 1024; index++) + { + if (auto sc = ppu_get_syscall(index)) + { + link_table.emplace(ppu_get_syscall_name(index), (u64)sc); + } + } + + for (u64 index = 1; ; index++) + { + if (auto func = ppu_get_function(index)) + { + link_table.emplace(ppu_get_module_function_name(index), (u64)func); + } + else + { + break; + } + } + + const auto jit = fxm::make(std::move(link_table)); + + LOG_SUCCESS(PPU, "LLVM: JIT initialized (%s)", jit->cpu()); + } + + // Initialize compiler + const auto jit = fxm::get(); + // Create LLVM module - std::unique_ptr module = std::make_unique("", g_llvm_ctx); + std::unique_ptr module = std::make_unique(obj_name, g_llvm_ctx); // Initialize target module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); @@ -783,16 +898,44 @@ static void ppu_initialize() const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false); // Initialize function list - for (const auto& info : *_funcs) + for (const auto& func : info.funcs) { - if (info.size) + if (func.size) { - const auto f = cast(module->getOrInsertFunction(fmt::format("__0x%x", info.addr), _func)); + const auto f = cast(module->getOrInsertFunction(fmt::format("__0x%x", func.addr), _func)); f->addAttribute(1, Attribute::NoAlias); - translator->AddFunction(info.addr, f); + translator->AddFunction(func.addr, f); } } + if (fs::file cached{Emu.GetCachePath() + obj_name}) + { + std::string buf; + buf.reserve(cached.size()); + cached.read(buf, cached.size()); + auto buffer = llvm::MemoryBuffer::getMemBuffer(buf, obj_name); + auto result = llvm::object::ObjectFile::createObjectFile(*buffer); + + if (result) + { + jit->load(std::move(module), std::move(result.get())); + + for (const auto& func : info.funcs) + { + if (func.size) + { + const std::uintptr_t link = jit->get(fmt::format("__0x%x", func.addr)); + s_ppu_compiled[func.addr / 4] = ::narrow(link); + } + } + + LOG_SUCCESS(PPU, "LLVM: Loaded executable: %s", obj_name); + return; + } + + LOG_ERROR(PPU, "LLVM: Failed to load executable: %s", obj_name); + } + legacy::FunctionPassManager pm(module.get()); // Basic optimizations @@ -831,11 +974,11 @@ static void ppu_initialize() Emu.CallAfter([=]() { - dlg->Create("Recompiling PPU executable.\nPlease wait..."); + dlg->Create("Compiling PPU executable: " + info.name + "\nPlease wait..."); }); // Translate functions - for (size_t fi = 0; fi < _funcs->size(); fi++) + for (size_t fi = 0, fmax = info.funcs.size(); fi < fmax; fi++) { if (Emu.IsStopped()) { @@ -843,21 +986,19 @@ static void ppu_initialize() return; } - auto& info = _funcs->at(fi); - - if (info.size) + if (info.funcs[fi].size) { - // Update dialog - Emu.CallAfter([=, max = _funcs->size()]() + // Update dialog + Emu.CallAfter([=, max = info.funcs.size()]() { - dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, max)); + dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax)); - if (fi * 100 / max != (fi + 1) * 100 / max) + if (fi * 100 / fmax != (fi + 1) * 100 / fmax) dlg->ProgressBarInc(0, 1); }); // Translate - const auto func = translator->TranslateToIR(info, vm::_ptr(info.addr)); + const auto func = translator->TranslateToIR(info.funcs[fi], vm::_ptr(info.funcs[fi].addr)); // Run optimization passes pm.run(*func); @@ -883,7 +1024,6 @@ static void ppu_initialize() { const auto n = ppu_get_syscall_name(index); const auto f = cast(module->getOrInsertFunction(n, _func)); - link_table.emplace(n, reinterpret_cast(ptr)); // Call the syscall directly ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)})); @@ -898,7 +1038,6 @@ static void ppu_initialize() { const auto n = ppu_get_module_function_name(index); const auto f = cast(module->getOrInsertFunction(n, _func)); - link_table.emplace(n, reinterpret_cast(ptr)); // Call the function directly ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)})); @@ -966,30 +1105,20 @@ static void ppu_initialize() return; } - LOG_SUCCESS(PPU, "LLVM: %zu functions generated", module->getFunctionList().size()); + LOG_NOTICE(PPU, "LLVM: %zu functions generated", module->getFunctionList().size()); - Module* module_ptr = module.get(); - - const auto jit = fxm::make(std::move(module), std::move(link_table)); - - if (!jit) - { - LOG_FATAL(PPU, "LLVM: Multiple modules are not yet supported"); - return; - } + jit->make(std::move(module), Emu.GetCachePath() + obj_name); // Get and install function addresses - for (const auto& info : *_funcs) + for (const auto& func : info.funcs) { - if (info.size) + if (func.size) { - const std::uintptr_t link = jit->get(fmt::format("__0x%x", info.addr)); - s_ppu_compiled[info.addr / 4] = ::narrow(link); - - LOG_TRACE(PPU, "** Function __0x%x -> 0x%llx (size=0x%x, toc=0x%x, attr %#x)", info.addr, link, info.size, info.toc, info.attr); + const std::uintptr_t link = jit->get(fmt::format("__0x%x", func.addr)); + s_ppu_compiled[func.addr / 4] = ::narrow(link); } } - LOG_SUCCESS(PPU, "LLVM: Compilation finished (%s)", sys::getHostCPUName().data()); + LOG_SUCCESS(PPU, "LLVM: Created executable: %s", obj_name); #endif } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index ae5bec9670..9447c8d568 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -245,12 +245,12 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t* bin, // Bloat the beginning of each block: check state const auto vstate = m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1)); - const auto vblock = BasicBlock::Create(m_context, fmt::format("l0c_%llx", m_current_addr), m_function); - const auto vcheck = BasicBlock::Create(m_context, fmt::format("lcc_%llx", m_current_addr), m_function); + const auto vblock = BasicBlock::Create(m_context, fmt::format("l0c_%llx", block.first), m_function); + const auto vcheck = BasicBlock::Create(m_context, fmt::format("lcc_%llx", block.first), m_function); m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), vblock, vcheck, m_md_unlikely); m_ir->SetInsertPoint(vcheck); - Call(GetType(), "__check", m_thread, m_ir->getInt64(m_current_addr)); + Call(GetType(), "__check", m_thread, m_ir->getInt64(block.first)); m_ir->CreateBr(vblock); m_ir->SetInsertPoint(vblock); @@ -2355,7 +2355,7 @@ void PPUTranslator::MFOCRF(ppu_opcode_t op) void PPUTranslator::LWARX(ppu_opcode_t op) { - SetGpr(op.rd, Call(GetType(), "__lwarx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); + SetGpr(op.rd, Call(GetType(), "__lwarx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); } void PPUTranslator::LDX(ppu_opcode_t op) @@ -2491,7 +2491,7 @@ void PPUTranslator::MULHW(ppu_opcode_t op) void PPUTranslator::LDARX(ppu_opcode_t op) { - SetGpr(op.rd, Call(GetType(), "__ldarx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); + SetGpr(op.rd, Call(GetType(), "__ldarx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); } void PPUTranslator::DCBF(ppu_opcode_t op) @@ -2601,7 +2601,7 @@ void PPUTranslator::STDX(ppu_opcode_t op) void PPUTranslator::STWCX(ppu_opcode_t op) { - const auto bit = Call(GetType(), "__stwcx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32)); + const auto bit = Call(GetType(), "__stwcx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32)); SetCrField(0, m_ir->getFalse(), m_ir->getFalse(), bit); } @@ -2662,7 +2662,7 @@ void PPUTranslator::SUBFZE(ppu_opcode_t op) void PPUTranslator::STDCX(ppu_opcode_t op) { - const auto bit = Call(GetType(), "__stdcx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs)); + const auto bit = Call(GetType(), "__stdcx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs)); SetCrField(0, m_ir->getFalse(), m_ir->getFalse(), bit); } diff --git a/rpcs3/Emu/Cell/lv2/sys_prx.cpp b/rpcs3/Emu/Cell/lv2/sys_prx.cpp index 0b110c788b..83698df989 100644 --- a/rpcs3/Emu/Cell/lv2/sys_prx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_prx.cpp @@ -10,7 +10,8 @@ namespace vm { using namespace ps3; } -extern std::shared_ptr ppu_load_prx(const ppu_prx_object&); +extern std::shared_ptr ppu_load_prx(const ppu_prx_object&, const std::string&); +extern void ppu_initialize(const ppu_module&); logs::channel sys_prx("sys_prx", logs::level::notice); @@ -25,13 +26,15 @@ s32 prx_load_module(std::string path, u64 flags, vm::ptr idlist; }; -struct lv2_prx final : lv2_obj +struct lv2_prx final : lv2_obj, ppu_module { static const u32 id_base = 0x23000000; bool is_started = false; std::unordered_map specials; - std::vector funcs; vm::ps3::ptr argv)> start = vm::null; vm::ps3::ptr argv)> stop = vm::null; diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 9210284a26..739aab8981 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -49,7 +49,7 @@ extern u64 get_system_time(); extern void ppu_load_exec(const ppu_exec_object&); extern void spu_load_exec(const spu_exec_object&); extern void arm_load_exec(const arm_exec_object&); -extern std::shared_ptr ppu_load_prx(const ppu_prx_object&); +extern std::shared_ptr ppu_load_prx(const ppu_prx_object&, const std::string&); extern void ppu_finalize(); fs::file g_tty; @@ -321,7 +321,7 @@ void Emulator::Load() g_system = system_type::ps3; m_status = Ready; vm::ps3::init(); - ppu_load_prx(ppu_prx); + ppu_load_prx(ppu_prx, ""); } else if (spu_exec.open(elf_file) == elf_error::ok) {