diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 3422cf529d..bb7606ecf3 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -1,10 +1,190 @@ +#include "types.h" #include "JIT.h" +#include "StrFmt.h" +#include "File.h" +#include "Log.h" +#include "mutex.h" +#include "sysinfo.h" +#include "VirtualMemory.h" #include -asmjit::JitRuntime& asmjit::get_global_runtime() +// Memory manager mutex +shared_mutex s_mutex2; + +#ifdef __linux__ +#define CAN_OVERCOMMIT +#endif + +static u8* get_jit_memory() +{ + // Reserve 2G memory (magic static) + static void* const s_memory2 = []() -> void* + { + void* ptr = utils::memory_reserve(0x80000000); + +#ifdef CAN_OVERCOMMIT + utils::memory_commit(ptr, 0x80000000); + utils::memory_protect(ptr, 0x40000000, utils::protection::wx); +#endif + return ptr; + }(); + + return static_cast(s_memory2); +} + +// Allocation counters (1G code, 1G data subranges) +static atomic_t s_code_pos{0}, s_data_pos{0}; + +// Snapshot of code generated before main() +static std::vector s_code_init, s_data_init; + +template & Ctr, uint Off, utils::protection Prot> +static u8* add_jit_memory(std::size_t size, uint align) +{ + // Select subrange + u8* pointer = get_jit_memory() + Off; + + if (UNLIKELY(!size && !align)) + { + // Return subrange info + return pointer; + } + +#ifndef CAN_OVERCOMMIT + std::lock_guard lock(s_mutex2); +#endif + + u64 olda, newa; + + // Simple allocation by incrementing pointer to the next free data + const u64 pos = Ctr.atomic_op([&](u64& ctr) -> u64 + { + const u64 _pos = ::align(ctr, align); + const u64 _new = ::align(_pos + size, align); + + if (UNLIKELY(_new > 0x40000000)) + { + return -1; + } + + // Check the necessity to commit more memory + olda = ::align(ctr, 0x10000); + newa = ::align(_new, 0x10000); + + ctr = _new; + return _pos; + }); + + if (UNLIKELY(pos == -1)) + { + LOG_FATAL(GENERAL, "JIT: Out of memory (size=0x%x, align=0x%x, off=0x%x)", size, align, Off); + return nullptr; + } + + if (UNLIKELY(olda != newa)) + { +#ifdef CAN_OVERCOMMIT + // TODO: possibly madvise +#else + // Commit more memory + utils::memory_commit(pointer + olda, newa - olda, Prot); +#endif + } + + return pointer + pos; +} + +jit_runtime::jit_runtime() + : HostRuntime() +{ +} + +jit_runtime::~jit_runtime() +{ +} + +asmjit::Error jit_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept +{ + std::size_t codeSize = code->getCodeSize(); + if (UNLIKELY(!codeSize)) + { + *dst = nullptr; + return asmjit::kErrorNoCodeGenerated; + } + + void* p = jit_runtime::alloc(codeSize, 16); + if (UNLIKELY(!p)) + { + *dst = nullptr; + return asmjit::kErrorNoVirtualMemory; + } + + std::size_t relocSize = code->relocate(p); + if (UNLIKELY(!relocSize)) + { + *dst = nullptr; + return asmjit::kErrorInvalidState; + } + + flush(p, relocSize); + *dst = p; + + return asmjit::kErrorOk; +} + +asmjit::Error jit_runtime::_release(void* ptr) noexcept +{ + return asmjit::kErrorOk; +} + +u8* jit_runtime::alloc(std::size_t size, uint align, bool exec) noexcept +{ + if (exec) + { + return add_jit_memory(size, align); + } + else + { + return add_jit_memory(size, align); + } +} + +void jit_runtime::initialize() +{ + if (!s_code_init.empty() || !s_data_init.empty()) + { + return; + } + + // Create code/data snapshot + s_code_init.resize(s_code_pos); + std::memcpy(s_code_init.data(), alloc(0, 0, true), s_code_pos); + s_data_init.resize(s_data_pos); + std::memcpy(s_data_init.data(), alloc(0, 0, false), s_data_pos); +} + +void jit_runtime::finalize() noexcept +{ + // Reset JIT memory +#ifdef CAN_OVERCOMMIT + utils::memory_reset(get_jit_memory(), 0x80000000); + utils::memory_protect(get_jit_memory(), 0x40000000, utils::protection::wx); +#else + utils::memory_decommit(get_jit_memory(), 0x80000000); +#endif + + s_code_pos = 0; + s_data_pos = 0; + + // Restore code/data snapshot + std::memcpy(alloc(s_code_init.size(), 1, true), s_code_init.data(), s_code_init.size()); + std::memcpy(alloc(s_data_init.size(), 1, false), s_data_init.data(), s_data_init.size()); +} + +::jit_runtime& asmjit::get_global_runtime() { // Magic static - static asmjit::JitRuntime g_rt; + static ::jit_runtime g_rt; return g_rt; } @@ -38,14 +218,6 @@ void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code #include #include -#include "types.h" -#include "StrFmt.h" -#include "File.h" -#include "Log.h" -#include "mutex.h" -#include "sysinfo.h" -#include "VirtualMemory.h" - #ifdef _MSC_VER #pragma warning(push, 0) #endif @@ -95,12 +267,6 @@ static void* const s_memory = []() -> void* return utils::memory_reserve(s_memory_size); }(); -// Reserve 2G of memory, should replace previous area for ASLR compatibility -static void* const s_memory2 = utils::memory_reserve(0x80000000); - -static u64 s_code_pos = 0; -static u64 s_data_pos = 0; - static void* s_next = s_memory; #ifdef _WIN32 @@ -135,11 +301,6 @@ extern void jit_finalize() utils::memory_decommit(s_memory, s_memory_size); s_next = s_memory; - - utils::memory_decommit(s_memory2, 0x80000000); - - s_code_pos = 0; - s_data_pos = 0; } // Helper class @@ -322,15 +483,6 @@ struct MemoryManager : llvm::RTDyldMemoryManager // Simple memory manager struct MemoryManager2 : llvm::RTDyldMemoryManager { - // Patchwork again... - void* const m_memory = s_memory2; - - u8* const m_code = static_cast(m_memory) + 0x00000000; - u8* const m_data = static_cast(m_memory) + 0x40000000; - - u64& m_code_pos = s_code_pos; - u64& m_data_pos = s_data_pos; - MemoryManager2() = default; ~MemoryManager2() override @@ -339,64 +491,12 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override { - std::lock_guard lock(s_mutex); - - // Simple allocation - const u64 old = m_code_pos; - const u64 pos = ::align(m_code_pos, align); - m_code_pos = ::align(pos + size, align); - - if (m_code_pos > 0x40000000) - { - LOG_FATAL(GENERAL, "LLVM: Out of code memory (size=0x%x, align=0x%x)", size, align); - return nullptr; - } - - const u64 olda = ::align(old, 0x10000); - const u64 newa = ::align(m_code_pos, 0x10000); - - if (olda != newa) - { - // Commit more memory - utils::memory_commit(m_code + olda, newa - olda, utils::protection::wx); - } - - if (!sec_id && sec_name.empty()) - { - // Special case: don't log - return m_code + pos; - } - - LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%x, align=0x%x)", sec_id, sec_name.data(), m_code + pos, size, align); - return m_code + pos; + return jit_runtime::alloc(size, align, true); } u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override { - std::lock_guard lock(s_mutex); - - // Simple allocation - const u64 old = m_data_pos; - const u64 pos = ::align(m_data_pos, align); - m_data_pos = ::align(pos + size, align); - - if (m_data_pos > 0x40000000) - { - LOG_FATAL(GENERAL, "LLVM: Out of data memory (size=0x%x, align=0x%x)", size, align); - return nullptr; - } - - const u64 olda = ::align(old, 0x10000); - const u64 newa = ::align(m_data_pos, 0x10000); - - if (olda != newa) - { - // Commit more memory - utils::memory_commit(m_data + olda, newa - olda); - } - - LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%x, align=0x%x, %s)", sec_id, sec_name.data(), m_data + pos, size, align, is_ro ? "ro" : "rw"); - return m_data + pos; + return jit_runtime::alloc(size, align, false); } bool finalizeMemory(std::string* = nullptr) override @@ -662,12 +762,4 @@ u64 jit_compiler::get(const std::string& name) return m_engine->getGlobalValueAddress(name); } -u8* jit_compiler::alloc(u32 size) -{ - // Dummy memory manager object - MemoryManager2 mm; - - return mm.allocateCodeSection(size, 16, 0, {}); -} - #endif diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 575016cdf5..8060c86a5c 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -7,10 +7,32 @@ #include #include +// ASMJIT runtime for emitting code in a single 2G region +struct jit_runtime final : asmjit::HostRuntime +{ + jit_runtime(); + ~jit_runtime() override; + + // Allocate executable memory + asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override; + + // Do nothing (deallocation is delayed) + asmjit::Error _release(void* p) noexcept override; + + // Allocate memory + static u8* alloc(std::size_t size, uint align, bool exec = true) noexcept; + + // Should be called at least once after global initialization + static void initialize(); + + // Deallocate all memory + static void finalize() noexcept; +}; + namespace asmjit { // Should only be used to build global functions - JitRuntime& get_global_runtime(); + ::jit_runtime& get_global_runtime(); // Emit xbegin and adjacent loop, return label at xbegin Label build_transaction_enter(X86Assembler& c, Label fallback); @@ -130,9 +152,6 @@ public: // Get compiled function address u64 get(const std::string& name); - // Allocate writable executable memory (alignment is assumed 16) - static u8* alloc(u32 size); - // Get CPU info static std::string cpu(const std::string& _cpu); diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 49f25cfb25..c8baea5c95 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -43,7 +43,6 @@ void spu_recompiler::init() { m_cache = fxm::get(); m_spurt = fxm::get_always(); - m_asmrt = m_spurt->get_asmjit_rt(); } } @@ -105,7 +104,7 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) } CodeHolder code; - code.init(m_asmrt->getCodeInfo()); + code.init(m_asmrt.getCodeInfo()); code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign; X86Assembler compiler(&code); @@ -842,11 +841,13 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) // Compile and get function address spu_function_t fn; - if (m_asmrt->add(&fn, &code)) + if (m_asmrt.add(&fn, &code)) { LOG_FATAL(SPU, "Failed to build a function"); } + m_spurt->add(*fn_info.first, fn); + if (g_cfg.core.spu_debug) { // Add ASMJIT logs @@ -863,239 +864,6 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) m_cache->add(func); } - lock.lock(); - - // Register function (possibly temporarily) - fn_location = fn; - - // Generate a dispatcher (übertrampoline) - std::vector addrv{func[0]}; - const auto beg = m_spurt->m_map.lower_bound(addrv); - addrv[0] += 4; - const auto _end = m_spurt->m_map.lower_bound(addrv); - const u32 size0 = std::distance(beg, _end); - - if (size0 == 1) - { - m_spurt->m_dispatcher[func[0] / 4] = fn; - } - else - { - CodeHolder code; - code.init(m_asmrt->getCodeInfo()); - - X86Assembler compiler(&code); - this->c = &compiler; - - struct work - { - u32 size; - u32 level; - Label label; - std::map, spu_function_t>::iterator beg; - std::map, spu_function_t>::iterator end; - }; - - std::vector workload; - workload.reserve(size0); - workload.emplace_back(); - workload.back().size = size0; - workload.back().level = 1; - workload.back().beg = beg; - workload.back().end = _end; - - for (std::size_t i = 0; i < workload.size(); i++) - { - // Get copy of the workload info - work w = workload[i]; - - // Split range in two parts - auto it = w.beg; - auto it2 = w.beg; - u32 size1 = w.size / 2; - u32 size2 = w.size - size1; - std::advance(it2, w.size / 2); - - while (true) - { - it = it2; - size1 = w.size - size2; - - if (w.level >= w.beg->first.size()) - { - // Cannot split: smallest function is a prefix of bigger ones (TODO) - break; - } - - const u32 x1 = w.beg->first.at(w.level); - - if (!x1) - { - // Cannot split: some functions contain holes at this level - w.level++; - continue; - } - - // Adjust ranges (forward) - while (it != w.end && x1 == it->first.at(w.level)) - { - it++; - size1++; - } - - if (it == w.end) - { - // Cannot split: words are identical within the range at this level - w.level++; - } - else - { - size2 = w.size - size1; - break; - } - } - - if (w.label.isValid()) - { - c->align(kAlignCode, 16); - c->bind(w.label); - } - - if (w.level >= w.beg->first.size()) - { - // If functions cannot be compared, assume smallest function - LOG_ERROR(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level); - c->jmp(imm_ptr(w.beg->second ? w.beg->second : &dispatch)); - continue; - } - - // Value for comparison - const u32 x = it->first.at(w.level); - - // Adjust ranges (backward) - while (true) - { - it--; - - if (it->first.at(w.level) != x) - { - it++; - break; - } - - verify(HERE), it != w.beg; - size1--; - size2++; - } - - c->cmp(x86::dword_ptr(*ls, start + (w.level - 1) * 4), x); - - // Low subrange target label - Label label_below; - - if (size1 == 1) - { - label_below = c->newLabel(); - c->jb(label_below); - } - else - { - workload.push_back(w); - workload.back().end = it; - workload.back().size = size1; - workload.back().label = c->newLabel(); - c->jb(workload.back().label); - } - - // Second subrange target - const auto target = it->second ? it->second : &dispatch; - - if (size2 == 1) - { - c->jmp(imm_ptr(target)); - } - else - { - it2 = it; - - // Select additional midrange for equality comparison - while (it2 != w.end && it2->first.at(w.level) == x) - { - size2--; - it2++; - } - - if (it2 != w.end) - { - // High subrange target label - Label label_above; - - if (size2 == 1) - { - label_above = c->newLabel(); - c->ja(label_above); - } - else - { - workload.push_back(w); - workload.back().beg = it2; - workload.back().size = size2; - workload.back().label = c->newLabel(); - c->ja(workload.back().label); - } - - const u32 size3 = w.size - size1 - size2; - - if (size3 == 1) - { - c->jmp(imm_ptr(target)); - } - else - { - workload.push_back(w); - workload.back().beg = it; - workload.back().end = it2; - workload.back().size = size3; - workload.back().label = c->newLabel(); - c->jmp(workload.back().label); - } - - if (label_above.isValid()) - { - c->bind(label_above); - c->jmp(imm_ptr(it2->second ? it2->second : &dispatch)); - } - } - else - { - workload.push_back(w); - workload.back().beg = it; - workload.back().size = w.size - size1; - workload.back().label = c->newLabel(); - c->jmp(workload.back().label); - } - } - - if (label_below.isValid()) - { - c->bind(label_below); - c->jmp(imm_ptr(w.beg->second ? w.beg->second : &dispatch)); - } - } - - spu_function_t tr; - - if (m_asmrt->add(&tr, &code)) - { - LOG_FATAL(SPU, "Failed to build a trampoline"); - } - - m_spurt->m_dispatcher[func[0] / 4] = tr; - } - - lock.unlock(); - m_spurt->m_cond.notify_all(); - return fn; } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index f8a093d6fe..178c1a943a 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -21,7 +21,7 @@ public: private: // ASMJIT runtime - asmjit::JitRuntime* m_asmrt; + ::jit_runtime m_asmrt; // emitter: asmjit::X86Assembler* c; diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index acb2da2325..e52b923930 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -253,15 +253,6 @@ spu_runtime::spu_runtime() LOG_SUCCESS(SPU, "SPU Recompiler Runtime initialized..."); } -asmjit::JitRuntime* spu_runtime::get_asmjit_rt() -{ - std::lock_guard lock(m_mutex); - - m_asmjit_rts.emplace_back(std::make_unique()); - - return m_asmjit_rts.back().get(); -} - void spu_runtime::add(std::pair, spu_function_t>& where, spu_function_t compiled) { std::unique_lock lock(m_mutex); @@ -289,11 +280,7 @@ void spu_runtime::add(std::pair, spu_function_t>& where, else { // Allocate some writable executable memory -#ifdef LLVM_AVAILABLE - const auto wxptr = jit_compiler::alloc(size0 * 20); -#else - u8* const wxptr = new u8[size0 * 20]; // dummy -#endif + u8* const wxptr = jit_runtime::alloc(size0 * 20, 16); // Raw assembly pointer u8* raw = wxptr; @@ -315,11 +302,7 @@ void spu_runtime::add(std::pair, spu_function_t>& where, if (!target && !tr_dispatch) { // Generate a special trampoline with pause instruction -#ifdef LLVM_AVAILABLE - const auto trptr = jit_compiler::alloc(16); -#else - u8* const trptr = new u8[16]; // dummy -#endif + u8* const trptr = jit_runtime::alloc(16, 16); trptr[0] = 0xf3; // pause trptr[1] = 0x90; trptr[2] = 0xff; // jmp [rip] @@ -623,6 +606,7 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip) else { // Far jumps: extremely rare and disabled due to implementation complexity + LOG_ERROR(SPU, "Impossible far jump"); bytes[0] = 0x0f; // nop (8-byte form) bytes[1] = 0x1f; bytes[2] = 0x84; diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 7d88ea9c94..f1c3ad1bd6 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -51,18 +51,12 @@ public: std::string m_cache_path; private: - // Temporarily: asmjit runtime collection - std::deque> m_asmjit_rts; - // Trampoline to spu_recompiler_base::dispatch spu_function_t tr_dispatch = nullptr; public: spu_runtime(); - // Get new ASMJIT runtime - asmjit::JitRuntime* get_asmjit_rt(); - // Add compiled function and generate trampoline if necessary void add(std::pair, spu_function_t>& where, spu_function_t compiled); }; diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index b1a4aa7d73..e1cdbe36c4 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -39,6 +39,7 @@ #include "Utilities/GDBDebugServer.h" #include "Utilities/sysinfo.h" +#include "Utilities/JIT.h" #if defined(_WIN32) || defined(HAVE_VULKAN) #include "Emu/RSX/VK/VulkanAPI.h" @@ -274,6 +275,8 @@ void fmt_class_string::format(std::string& out, u64 arg) void Emulator::Init() { + jit_runtime::initialize(); + if (!g_tty) { g_tty.open(fs::get_cache_dir() + "TTY.log", fs::rewrite + fs::append); @@ -1537,6 +1540,7 @@ void Emulator::Stop(bool restart) extern void jit_finalize(); jit_finalize(); #endif + jit_runtime::finalize(); if (restart) {