From a19b721c2614f3b35e2c3af36b41c15b34f08518 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 22 Jun 2016 16:37:51 +0300 Subject: [PATCH] LLVM: Added JIT.cpp Refactoring, used -fno-rtti option --- Utilities/JIT.cpp | 327 ++++++++++++++ Utilities/JIT.h | 50 +++ rpcs3/CMakeLists.txt | 3 + rpcs3/Emu/Cell/PPUThread.cpp | 761 ++++++++------------------------- rpcs3/Emu/Cell/PPUTranslator.h | 4 + rpcs3/emucore.vcxproj | 30 +- rpcs3/emucore.vcxproj.filters | 18 + 7 files changed, 595 insertions(+), 598 deletions(-) create mode 100644 Utilities/JIT.cpp create mode 100644 Utilities/JIT.h diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp new file mode 100644 index 0000000000..3f279db3d4 --- /dev/null +++ b/Utilities/JIT.cpp @@ -0,0 +1,327 @@ +#ifdef LLVM_AVAILABLE + +#include +#include +#include +#include +#include + +#include "types.h" +#include "Macro.h" +#include "StrFmt.h" +#include "File.h" +#include "Log.h" + +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#include +#endif + +#include "JIT.h" + +// Global LLVM context (thread-unsafe) +llvm::LLVMContext g_llvm_ctx; + +// Size of virtual memory area reserved: 512 MB +static const u64 s_memory_size = 0x20000000; + +// Try to reserve a portion of virtual memory in the first 2 GB address space beforehand, if possible. +static void* const s_memory = []() -> void* +{ +#ifdef _WIN32 + for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000) + { + if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS)) + { + return (void*)addr; + } + } + + return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS); +#else + return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); +#endif +}(); + +// EH frames +static u8* s_unwind_info; +static u64 s_unwind_size; + +#ifdef _WIN32 +static std::vector s_unwind; // Custom .pdata section replacement +#endif + +// Helper class +struct MemoryManager final : llvm::RTDyldMemoryManager +{ + std::unordered_map table; + + MemoryManager(std::unordered_map&& table) + : table(std::move(table)) + { + } + + [[noreturn]] static void null() + { + throw std::runtime_error("Null function" HERE); + } + + virtual u64 getSymbolAddress(const std::string& name) override + { + if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name)) + { + // This may be bad if LLVM requests some built-in functions like fma. + LOG_ERROR(GENERAL, "LLVM: Symbol requested %s -> 0x%016llx", name, addr); + return addr; + } + + const auto found = table.find(name); + + if (found != table.end()) + { + return found->second; + } + + // It's fine if some function is never called, for example. + LOG_ERROR(GENERAL, "LLVM: Linkage failed for %s", name); + return (u64)null; + } + + virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override + { + // Simple allocation + const u64 next = ::align((u64)m_next + size, 4096); + + if (next > (u64)s_memory + s_memory_size) + { + LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); + return nullptr; + } + +#ifdef _WIN32 + if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) +#else + if (::mprotect(m_next, size, PROT_READ | PROT_WRITE | PROT_EXEC)) +#endif + { + LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next); + return nullptr; + } + + LOG_SUCCESS(GENERAL, "LLVM: Code section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align); + return (u8*)std::exchange(m_next, (void*)next); + } + + virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override + { + // Simple allocation + const u64 next = ::align((u64)m_next + size, 4096); + + if (next > (u64)s_memory + s_memory_size) + { + LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); + return nullptr; + } + +#ifdef _WIN32 + if (!VirtualAlloc(m_next, size, MEM_COMMIT, PAGE_READWRITE)) +#else + if (::mprotect(m_next, size, PROT_READ | PROT_WRITE)) +#endif + { + LOG_FATAL(GENERAL, "LLVM: Failed to allocate memory at 0x%p", m_next); + return nullptr; + } + + LOG_SUCCESS(GENERAL, "LLVM: Data section %u '%s' allocated -> 0x%p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw"); + return (u8*)std::exchange(m_next, (void*)next); + } + + virtual bool finalizeMemory(std::string* = nullptr) override + { + // TODO: make sections read-only when necessary + return false; + } + + virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override + { + s_unwind_info = addr; + s_unwind_size = size; + + return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size); + } + + virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override + { + LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected + + return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size); + } + + ~MemoryManager() + { +#ifdef _WIN32 + if (!RtlDeleteFunctionTable(s_unwind.data())) + { + LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError()); + } + + if (!VirtualFree(s_memory, 0, MEM_DECOMMIT)) + { + LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError()); + } +#else + if (::mprotect(s_memory, s_memory_size, PROT_NONE)) + { + LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno); + } + + // TODO: unregister EH frames if necessary +#endif + } + +private: + void* m_next = s_memory; +}; + +// Helper class +struct EventListener final : llvm::JITEventListener +{ + virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override + { + const llvm::StringRef elf = obj.getData(); + fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite) + .write(elf.data(), elf.size()); + } +}; + +static EventListener s_listener; + +jit_compiler::jit_compiler(std::unique_ptr&& _module, std::unordered_map&& table) +{ + EXPECTS(s_memory); + + std::string result; + + const auto module_ptr = _module.get(); + + // Initialization + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + LLVMLinkInMCJIT(); + + m_engine.reset(llvm::EngineBuilder(std::move(_module)) + .setErrorStr(&result) + .setMCJITMemoryManager(std::make_unique(std::move(table))) + .setOptLevel(llvm::CodeGenOpt::Aggressive) + .setRelocationModel(llvm::Reloc::PIC_) + .setCodeModel((u64)s_memory <= 0x60000000 ? llvm::CodeModel::Medium : llvm::CodeModel::Large) // TODO + .setMCPU(llvm::sys::getHostCPUName()) + .create()); + + if (!m_engine) + { + throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result); + } + + m_engine->setProcessAllSections(true); // ??? + m_engine->RegisterJITEventListener(&s_listener); + m_engine->finalizeObject(); + + for (auto& func : module_ptr->functions()) + { + if (!func.empty()) + { + const std::string& name = func.getName(); + + // Register compiled function + m_map[name] = m_engine->getFunctionAddress(name); + } + + // Delete IR to lower memory consumption + func.deleteBody(); + } + +#ifdef _WIN32 + // Register .xdata UNWIND_INFO (.pdata section is empty for some reason) + std::set func_set; + + for (const auto& pair : m_map) + { + func_set.emplace(pair.second); + } + + // Hack (cannot obtain last function size) + func_set.emplace(::align(*--func_set.end() + 4096, 4096)); + + const u64 base = (u64)s_memory; + const u8* bits = s_unwind_info; + + s_unwind.clear(); + s_unwind.reserve(m_map.size()); + + for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++) + { + const u64 addr = *it; + const u64 next = *func_set.upper_bound(addr); + + // Generate RUNTIME_FUNCTION record + RUNTIME_FUNCTION uw; + uw.BeginAddress = static_cast(addr - base); + uw.EndAddress = static_cast(next - base); + uw.UnwindData = static_cast((u64)bits - base); + s_unwind.emplace_back(uw); + + // Parse .xdata UNWIND_INFO record + const u8 flags = *bits++; // Version and flags + const u8 prolog = *bits++; // Size of prolog + const u8 count = *bits++; // Count of unwind codes + const u8 frame = *bits++; // Frame Reg + Off + bits += ::align(std::max(1, count), 2) * sizeof(u16); // UNWIND_CODE array + + if (flags != 1) + { + LOG_ERROR(GENERAL, "LLVM: unsupported UNWIND_INFO version/flags (0x%02x)", flags); + break; + } + + LOG_TRACE(GENERAL, "LLVM: .xdata at 0x%llx: function 0x%x..0x%x: p0x%02x, c0x%02x, f0x%02x", uw.UnwindData + base, uw.BeginAddress + base, uw.EndAddress + base, prolog, count, frame); + } + + if (s_unwind_info + s_unwind_size != bits) + { + LOG_FATAL(GENERAL, "LLVM: .xdata analysis failed! (0x%p != 0x%p)", s_unwind_info + s_unwind_size, bits); + } + else if (!RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base)) + { + LOG_FATAL(GENERAL, "RtlAddFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError()); + } + else + { + LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size); + } +#endif +} + +jit_compiler::~jit_compiler() +{ +} + +#endif diff --git a/Utilities/JIT.h b/Utilities/JIT.h new file mode 100644 index 0000000000..24c27d068d --- /dev/null +++ b/Utilities/JIT.h @@ -0,0 +1,50 @@ +#pragma once + +#ifdef LLVM_AVAILABLE + +#include +#include +#include + +#include "types.h" + +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +extern llvm::LLVMContext g_llvm_ctx; + +// Temporary compiler interface +class jit_compiler final +{ + // Execution instance + std::unique_ptr m_engine; + + // Compiled functions + std::unordered_map m_map; + +public: + jit_compiler(std::unique_ptr&&, std::unordered_map&&); + ~jit_compiler(); + + // Get compiled function address + std::uintptr_t get(const std::string& name) const + { + const auto found = m_map.find(name); + + if (found != m_map.end()) + { + return found->second; + } + + return 0; + } +}; + +#endif diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 8812204613..0929157806 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -146,6 +146,9 @@ else() else() llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize ipo x86codegen x86disassembler mcdisassembler) endif() + if (NOT MSVC) + set_source_files_properties(${RPCS3_SRC_DIR}/../Utilities/JIT.cpp PROPERTIES COMPILE_FLAGS -fno-rtti) + endif() endif() link_directories( diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 93033fed96..eb50c750c6 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -8,6 +8,36 @@ #include "PPUAnalyser.h" #include "PPUModule.h" +#ifdef LLVM_AVAILABLE +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/Support/FormattedStream.h" +#include "llvm/IR/LLVMContext.h" +//#include "llvm/IR/Dominators.h" +#include "llvm/IR/Verifier.h" +//#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LegacyPassManager.h" +//#include "llvm/IR/Module.h" +//#include "llvm/IR/Function.h" +//#include "llvm/Analysis/Passes.h" +//#include "llvm/Analysis/BasicAliasAnalysis.h" +//#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +//#include "llvm/Analysis/LoopInfo.h" +//#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Vectorize.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#include "Utilities/JIT.h" +#include "PPUTranslator.h" +#endif + enum class ppu_decoder_type { precise, @@ -322,54 +352,6 @@ void PPUThread::fast_call(u32 addr, u32 rtoc) //} } -#ifdef LLVM_AVAILABLE -#ifdef _MSC_VER -#pragma warning(push, 0) -#endif -//#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -//#include "llvm/Support/Host.h" -#include "llvm/Support/FormattedStream.h" -//#include "llvm/Support/Debug.h" -//#include "llvm/CodeGen/CommandFlags.h" -//#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/LLVMContext.h" -//#include "llvm/IR/Dominators.h" -#include "llvm/IR/Verifier.h" -//#include "llvm/IR/InstIterator.h" -#include "llvm/IR/LegacyPassManager.h" -//#include "llvm/IR/Module.h" -//#include "llvm/IR/Function.h" -//#include "llvm/Analysis/Passes.h" -//#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -//#include "llvm/Analysis/LoopInfo.h" -//#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/Lint.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Vectorize.h" - -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" -#include "llvm/ExecutionEngine/JITEventListener.h" -//#include "llvm/Object/ObjectFile.h" -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#include "PPUTranslator.h" - -#ifdef _WIN32 -#include -#else -#include -#include -#include -#include -#include -#endif - const ppu_decoder s_ppu_itype; extern u64 get_timebased_time(); @@ -386,326 +368,159 @@ extern __m128i sse_cellbe_lvrx(u64 addr); extern void sse_cellbe_stvlx(u64 addr, __m128i a); extern void sse_cellbe_stvrx(u64 addr, __m128i a); -struct Listener final : llvm::JITEventListener +[[noreturn]] static void ppu_trap(u64 addr) { - virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override - { - const llvm::StringRef elf = obj.getData(); - fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite) - .write(elf.data(), elf.size()); - } -}; + throw fmt::exception("Trap! (0x%llx)", addr); +} -static Listener s_listener; - -// Memory size: 512 MB -static const u64 s_memory_size = 0x20000000; - -// Try to reserve a portion of virtual memory in the first 2 GB address space, if possible. -static void* const s_memory = []() -> void* +static void ppu_trace(u64 addr) { -#ifdef _WIN32 - for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000) + LOG_NOTICE(PPU, "Trace: 0x%llx", addr); +} + +static void ppu_hlecall(PPUThread& ppu, u32 index) +{ + ppu_execute_function(ppu, index); + if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily +} + +static void ppu_syscall(PPUThread& ppu, u64 code) +{ + ppu_execute_syscall(ppu, code); + if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily +} + +static u32 ppu_tbl() +{ + return (u32)get_timebased_time(); +} + +static void ppu_call(PPUThread& ppu, u32 addr) +{ + const auto found = s_ppu_compiled.find(addr); + + if (found != s_ppu_compiled.end()) { - if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS)) - { - return (void*)addr; - } + return found->second(ppu); } - return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS); + const auto op = vm::read32(addr).value(); + const auto itype = s_ppu_itype.decode(op); + + // Allow HLE callbacks without compiling them + if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) + { + return ppu_hlecall(ppu, op & 0x3ffffff); + } + + ppu_trap(addr); +} + +static __m128 sse_rcp_ps(__m128 A) +{ + return _mm_rcp_ps(A); +} + +static __m128 sse_rsqrt_ps(__m128 A) +{ + return _mm_rsqrt_ps(A); +} + +static float sse_rcp_ss(float A) +{ + _mm_store_ss(&A, _mm_rcp_ss(_mm_load_ss(&A))); + return A; +} + +static float sse_rsqrt_ss(float A) +{ + _mm_store_ss(&A, _mm_rsqrt_ss(_mm_load_ss(&A))); + return A; +} + +static u32 ppu_lwarx(u32 addr) +{ + be_t reg_value; + vm::reservation_acquire(®_value, addr, sizeof(reg_value)); + return reg_value; +} + +static u64 ppu_ldarx(u32 addr) +{ + be_t reg_value; + vm::reservation_acquire(®_value, addr, sizeof(reg_value)); + return reg_value; +} + +static bool ppu_stwcx(u32 addr, u32 reg_value) +{ + const be_t data = reg_value; + return vm::reservation_update(addr, &data, sizeof(data)); +} + +static bool ppu_stdcx(u32 addr, u64 reg_value) +{ + const be_t data = reg_value; + return vm::reservation_update(addr, &data, sizeof(data)); +} + +static bool adde_carry(u64 a, u64 b, bool c) +{ +#ifdef _MSC_VER + return _addcarry_u64(c, a, b, nullptr) != 0; #else - return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + bool result; + __asm__("addb $0xff, %[c] \n adcq %[a], %[b] \n setb %[result]" : [a] "+&r" (a), [b] "+&r" (b), [c] "+&r" (c), [result] "=r" (result)); + return result; #endif -}(); +} -// EH frames -static u8* s_unwind_info; -static u64 s_unwind_size; - -#ifdef _WIN32 -// Custom .pdata section replacement -static std::vector s_unwind; -#endif - -struct MemoryManager final : llvm::RTDyldMemoryManager +// Interpreter call for simple vector instructions +static __m128i ppu_vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c) { - static PPUThread* context(u64 addr) // Unused + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + ppu.VR[21].vi = _a; + ppu.VR[22].vi = _b; + ppu.VR[23].vi = _c; + + ppu_opcode_t op{}; + op.vd = 20; + op.va = 21; + op.vb = 22; + op.vc = 23; + func(ppu, op); + + return ppu.VR[20].vi; +} + +extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) +{ + if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty()) { - //trace(addr); - return static_cast(get_current_cpu_thread()); + return; } - [[noreturn]] static void trap(u64 addr) + std::unordered_map link_table { - LOG_ERROR(PPU, "Trap! (0x%llx)", addr); - throw fmt::exception("Trap! (0x%llx)", addr); - } - - static void trace(u64 addr) - { - LOG_NOTICE(PPU, "Trace: 0x%llx", addr); - } - - static void hlecall(PPUThread& ppu, u32 index) - { - ppu_execute_function(ppu, index); - if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily - } - - static void syscall(PPUThread& ppu, u64 code) - { - ppu_execute_syscall(ppu, code); - if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily - } - - static u32 tbl() - { - return (u32)get_timebased_time(); - } - - static void call(PPUThread& ppu, u32 addr) - { - const auto found = s_ppu_compiled.find(addr); - - if (found != s_ppu_compiled.end()) - { - return found->second(ppu); - } - - const auto op = vm::read32(addr).value(); - const auto itype = s_ppu_itype.decode(op); - - // Allow HLE callbacks without compiling them - if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) - { - return hlecall(ppu, op & 0x3ffffff); - } - - trap(addr); - } - - static __m128 sse_rcp_ps(__m128 A) - { - return _mm_rcp_ps(A); - } - - static __m128 sse_rsqrt_ps(__m128 A) - { - return _mm_rsqrt_ps(A); - } - - static float sse_rcp_ss(float A) - { - _mm_store_ss(&A, _mm_rcp_ss(_mm_load_ss(&A))); - return A; - } - - static float sse_rsqrt_ss(float A) - { - _mm_store_ss(&A, _mm_rsqrt_ss(_mm_load_ss(&A))); - return A; - } - - static u32 lwarx(u32 addr) - { - be_t reg_value; - vm::reservation_acquire(®_value, addr, sizeof(reg_value)); - return reg_value; - } - - static u64 ldarx(u32 addr) - { - be_t reg_value; - vm::reservation_acquire(®_value, addr, sizeof(reg_value)); - return reg_value; - } - - static bool stwcx(u32 addr, u32 reg_value) - { - const be_t data = reg_value; - return vm::reservation_update(addr, &data, sizeof(data)); - } - - static bool stdcx(u32 addr, u64 reg_value) - { - const be_t data = reg_value; - return vm::reservation_update(addr, &data, sizeof(data)); - } - - static bool adde_carry(u64 a, u64 b, bool c) - { - return _addcarry_u64(c, a, b, nullptr) != 0; - } - - // Interpreter call for simple vector instructions - static __m128i vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - ppu.VR[21].vi = _a; - ppu.VR[22].vi = _b; - ppu.VR[23].vi = _c; - - ppu_opcode_t op{}; - op.vd = 20; - op.va = 21; - op.vb = 22; - op.vc = 23; - func(ppu, op); - - return ppu.VR[20].vi; - } - - // Interpreter call for simple vector instructions with immediate - static __m128i veciop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, __m128i _b) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - ppu.VR[22].vi = _b; - - op.vd = 20; - op.vb = 22; - func(ppu, op); - - return ppu.VR[20].vi; - } - - // Interpreter call for FP instructions - static f64 fpop(decltype(&ppu_interpreter::UNK) func, f64 _a, f64 _b, f64 _c) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - ppu.FPR[21] = _a; - ppu.FPR[22] = _b; - ppu.FPR[23] = _c; - - ppu_opcode_t op{}; - op.frd = 20; - op.fra = 21; - op.frb = 22; - op.frc = 23; - func(ppu, op); - - return ppu.FPR[20]; - } - - // Interpreter call for GPR instructions writing result to RA - static u64 aimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 s = ppu.GPR[op.rs]; - ppu.GPR[op.rs] = _s; - - func(ppu, op); - - const u64 r = ppu.GPR[op.ra]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rs] = s; - return r; - } - - // Interpreter call for GPR instructions writing result to RA - static u64 aimmbop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _b) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 s = ppu.GPR[op.rs]; - const u64 b = ppu.GPR[op.rb]; - ppu.GPR[op.rs] = _s; - ppu.GPR[op.rb] = _b; - - func(ppu, op); - - const u64 r = ppu.GPR[op.ra]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rs] = s; - ppu.GPR[op.rb] = b; - return r; - } - - // Interpreter call for GPR instructions writing result to RA (destructive) - static u64 aaimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _a) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 s = ppu.GPR[op.rs]; - const u64 a = ppu.GPR[op.ra]; - ppu.GPR[op.rs] = _s; - ppu.GPR[op.ra] = _a; - - func(ppu, op); - - const u64 r = ppu.GPR[op.ra]; - ppu.GPR[op.rs] = s; - ppu.GPR[op.ra] = a; - return r; - } - - static u64 immaop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 d = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = _a; - - func(ppu, op); - - const u64 r = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rd] = d; - return r; - } - - static u64 immabop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a, u64 _b) - { - PPUThread& ppu = static_cast(*get_current_cpu_thread()); - const u64 a = ppu.GPR[op.ra]; - const u64 b = ppu.GPR[op.rb]; - const u64 d = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = _a; - ppu.GPR[op.rb] = _b; - - func(ppu, op); - - const u64 r = ppu.GPR[op.rd]; - ppu.GPR[op.ra] = a; - ppu.GPR[op.rb] = b; - ppu.GPR[op.rd] = d; - return r; - } - - // No operation on specific u64 value (silly optimization barrier) - static u64 nop64(u64 value) - { - return value; - } - - std::unordered_map table - { - { "__memory", (u64)vm::base(0) }, - { "__trap", (u64)&trap }, - { "__trace", (u64)&trace }, - { "__hlecall", (u64)&hlecall }, - { "__syscall", (u64)&syscall }, - { "__get_tbl", (u64)&tbl }, - { "__call", (u64)&call }, - { "__lwarx", (u64)&lwarx }, - { "__ldarx", (u64)&ldarx }, - { "__stwcx", (u64)&stwcx }, - { "__stdcx", (u64)&stdcx }, + { "__memory", (u64)vm::g_base_addr }, + { "__memptr", (u64)&vm::g_base_addr }, + { "__trap", (u64)&ppu_trap }, + { "__trace", (u64)&ppu_trace }, + { "__hlecall", (u64)&ppu_hlecall }, + { "__syscall", (u64)&ppu_syscall }, + { "__get_tbl", (u64)&ppu_tbl }, + { "__call", (u64)&ppu_call }, + { "__lwarx", (u64)&ppu_lwarx }, + { "__ldarx", (u64)&ppu_ldarx }, + { "__stwcx", (u64)&ppu_stwcx }, + { "__stdcx", (u64)&ppu_stdcx }, + { "__vec3op", (u64)&ppu_vec3op }, { "__adde_get_ca", (u64)&adde_carry }, { "__vexptefp", (u64)&sse_exp2_ps }, { "__vlogefp", (u64)&sse_log2_ps }, { "__vperm", (u64)&sse_altivec_vperm }, { "__vrefp", (u64)&sse_rcp_ps }, { "__vrsqrtefp", (u64)&sse_rsqrt_ps }, - { "__vec3op", (u64)&vec3op }, - { "__veciop", (u64)&veciop }, - { "__aimmop", (u64)&aimmop }, - { "__aimmbop", (u64)&aimmbop }, - { "__aaimmop", (u64)&aaimmop }, - { "__immaop", (u64)&immaop }, - { "__immabop", (u64)&immabop }, - { "__fpop", (u64)&fpop }, - { "__nop64", (u64)&nop64 }, { "__lvsl", (u64)&sse_altivec_lvsl }, { "__lvsr", (u64)&sse_altivec_lvsr }, { "__lvlx", (u64)&sse_cellbe_lvlx }, @@ -716,142 +531,20 @@ struct MemoryManager final : llvm::RTDyldMemoryManager { "__frsqrte", (u64)&sse_rsqrt_ss }, }; - virtual u64 getSymbolAddress(const std::string& name) override - { - if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name)) - { - LOG_ERROR(GENERAL, "LLVM: Linkage requested %s -> 0x%016llx", name, addr); - return addr; - } - - const auto found = table.find(name); - - if (found != table.end()) - { - return found->second; - } - - LOG_FATAL(GENERAL, "LLVM: Linkage failed for %s", name); - return (u64)trap; - } - - virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override - { - // Simple allocation (TODO) - const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096); - -#ifdef _WIN32 - if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) -#else - if (::mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC)) -#endif - { - LOG_FATAL(GENERAL, "LLVM: Failed to allocate code section '%s'", sec_name.data()); - return nullptr; - } - - LOG_SUCCESS(GENERAL, "LLVM: Code section '%s' allocated -> 0x%p", sec_name.data(), ptr); - return (u8*)ptr; - } - - virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override - { - // Simple allocation (TODO) - const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096); - -#ifdef _WIN32 - if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE)) -#else - if (::mprotect(ptr, size, PROT_READ | PROT_WRITE)) -#endif - { - LOG_FATAL(GENERAL, "LLVM: Failed to allocate data section '%s'", sec_name.data()); - return nullptr; - } - - LOG_SUCCESS(GENERAL, "LLVM: Data section '%s' allocated -> 0x%p", sec_name.data(), ptr); - return (u8*)ptr; - } - - virtual bool finalizeMemory(std::string* = nullptr) override - { - // TODO: make sections read-only when necessary - return false; - } - - virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override - { - s_unwind_info = addr; - s_unwind_size = size; - - return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size); - } - - virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override - { - LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected - - return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size); - } - - ~MemoryManager() - { -#ifdef _WIN32 - if (!RtlDeleteFunctionTable(s_unwind.data())) - { - LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError()); - } - - if (!VirtualFree(s_memory, 0, MEM_DECOMMIT)) - { - LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError()); - } -#else - if (::mprotect(s_memory, s_memory_size, PROT_NONE)) - { - LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno); - } - - // TODO: unregister EH frames if necessary -#endif - } - -private: - void* m_next = s_memory; -}; - -llvm::LLVMContext g_context; - -extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) -{ - if (!s_memory) - { - throw std::runtime_error("LLVM: Memory not allocated, report to the developers." HERE); - } - - if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty()) - { - return; - } - +#ifdef LLVM_AVAILABLE using namespace llvm; - // Initialization - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - LLVMLinkInMCJIT(); - // Create LLVM module - std::unique_ptr module = std::make_unique(name, g_context); + std::unique_ptr module = std::make_unique(name, g_llvm_ctx); // Initialize target module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); // Initialize translator - std::unique_ptr translator = std::make_unique(g_context, module.get(), 0, entry); + std::unique_ptr translator = std::make_unique(g_llvm_ctx, module.get(), 0, entry); // Define some types - const auto _void = Type::getVoidTy(g_context); + const auto _void = Type::getVoidTy(g_llvm_ctx); const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false); // Initialize function list @@ -859,7 +552,9 @@ extern void ppu_initialize(const std::string& name, const std::vectorAddFunction(info.first, cast(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func))); + const auto f = cast(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func)); + f->addAttribute(1, Attribute::NoAlias); + translator->AddFunction(info.first, f); } translator->AddBlockInfo(info.first); @@ -874,18 +569,16 @@ extern void ppu_initialize(const std::string& name, const std::vectorTranslateToIR(pseudo_addr, pseudo_addr, nullptr, [](PPUTranslator* _this) - // { - // (_this->*s_current)(op); - // _this->ReturnFromFunction(); - // }); - - // pm.run(*func); - //} - legacy::PassManager mpm; // Remove unused functions, structs, global variables, etc mpm.add(createStripDeadPrototypesPass()); + //mpm.add(createFunctionInliningPass()); mpm.run(*module); + // TODO: replacing __syscall/__hlecall + // TODO: improve __call and s_ppu_compiled + std::string result; raw_string_ostream out(result); @@ -941,25 +622,14 @@ extern void ppu_initialize(const std::string& name, const std::vector engine(EngineBuilder(std::move(module)) - .setErrorStr(&result) - .setMCJITMemoryManager(std::make_unique()) - .setOptLevel(llvm::CodeGenOpt::Aggressive) - .setRelocationModel(Reloc::PIC_) - .setCodeModel((u64)s_memory <= 0x60000000 ? CodeModel::Medium : CodeModel::Large) - .setMCPU(sys::getHostCPUName()) - .create()); + const auto jit = fxm::make(std::move(module), std::move(link_table)); - if (!engine) + if (!jit) { - throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result); + LOG_FATAL(PPU, "LLVM: Multiple modules are not yet supported"); + return; } - engine->setProcessAllSections(true); - //engine->setVerifyModules(true); - engine->RegisterJITEventListener(&s_listener); - engine->finalizeObject(); - s_ppu_compiled.clear(); // Get function addresses @@ -967,72 +637,13 @@ extern void ppu_initialize(const std::string& name, const std::vectorgetFunctionAddress(fmt::format("__sub_%x", info.first)); + const std::uintptr_t link = jit->get(fmt::format("__sub_%x", info.first)); s_ppu_compiled.emplace(info.first, (void(*)(PPUThread&))link); LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second); } } - // Delete IR to lower memory consumption - for (auto& func : module_ptr->functions()) - { - func.deleteBody(); - } - -#ifdef _WIN32 - // Register .xdata UNWIND_INFO (.pdata section is empty for some reason) - std::set func_set; - - for (const auto& pair : s_ppu_compiled) - { - // Get addresses - func_set.emplace((u64)pair.second); - } - - func_set.emplace(::align(*--func_set.end() + 4096, 4096)); - - const u64 base = (u64)s_memory; - const u8* bits = s_unwind_info; - - s_unwind.clear(); - s_unwind.reserve(s_ppu_compiled.size()); - - for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++) - { - const u64 addr = *it; - const u64 next = *func_set.upper_bound(addr); - - // Generate RUNTIME_FUNCTION record - RUNTIME_FUNCTION uw; - uw.BeginAddress = static_cast(addr - base); - uw.EndAddress = static_cast(next - base); - uw.UnwindData = static_cast((u64)bits - base); - s_unwind.emplace_back(uw); - - // Parse .xdata record - VERIFY(*bits++ == 1); // Version and flags - bits++; // Size of prolog - const u8 count = *bits++; // Count of unwind codes - bits++; // Frame Reg + Off - bits += ::align(count, 2) * sizeof(u16); // UNWIND_CODE array - while (!*bits && bits < s_unwind_info + s_unwind_size) bits++; // Skip strange zero padding (???) - } - - VERIFY(bits == s_unwind_info + s_unwind_size); - VERIFY(RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base)); - LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size); -#endif - - fxm::import(WRAP_EXPR(engine)); - LOG_SUCCESS(PPU, "LLVM: Compilation finished (%s)", sys::getHostCPUName().data()); -} - -#else - -extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) -{ -} - #endif +} diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 8fa4664b20..e56ca7a31c 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -1,5 +1,7 @@ #pragma once +#ifdef LLVM_AVAILABLE + #include #include #include @@ -830,3 +832,5 @@ public: void UNK(ppu_opcode_t op); }; + +#endif diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 0e10df8f90..54c248e965 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -64,36 +64,19 @@ ..\llvm\include;..\llvm_build\include; - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h - - - %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" - Updating git-version.h + %windir%\sysnative\cmd.exe /c "$(SolutionDir)\Utilities\git-version-gen.cmd" + Updating git-version.h - NotUsing - NotUsing - NotUsing - NotUsing - NotUsing + NotUsing + + NotUsing + NotUsing @@ -404,6 +387,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index bdc7dfe983..c0420a3774 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -872,6 +872,15 @@ Source Files + + Source Files + + + Source Files + + + Utilities + @@ -1666,5 +1675,14 @@ Header Files + + Header Files + + + Header Files + + + Utilities + \ No newline at end of file