From aea094730bf926ffe26e2ad418dd9c7d4ab4d19f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 23 Jun 2017 00:52:09 +0300 Subject: [PATCH] PPU LLVM: paradigm shift For now, compile only one block at time Use tail calls to move between blocks Fully write PPU context (except CIA) This fixes many compatibility problems --- Utilities/JIT.cpp | 189 +++++---- Utilities/JIT.h | 25 +- rpcs3/Emu/Cell/PPUAnalyser.cpp | 218 +++++++++- rpcs3/Emu/Cell/PPUAnalyser.h | 4 + rpcs3/Emu/Cell/PPUFunction.h | 7 +- rpcs3/Emu/Cell/PPUInterpreter.cpp | 3 +- rpcs3/Emu/Cell/PPUModule.cpp | 6 - rpcs3/Emu/Cell/PPUModule.h | 2 +- rpcs3/Emu/Cell/PPUThread.cpp | 671 +++++++++++++++++++----------- rpcs3/Emu/Cell/PPUTranslator.cpp | 665 ++++++++++------------------- rpcs3/Emu/Cell/PPUTranslator.h | 135 +++--- rpcs3/Emu/Cell/lv2/lv2.cpp | 1 + rpcs3/Emu/Memory/vm.cpp | 6 +- 13 files changed, 1076 insertions(+), 856 deletions(-) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 90e133159c..21886630bf 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "types.h" #include "StrFmt.h" @@ -20,6 +21,7 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/ObjectCache.h" #ifdef _MSC_VER #pragma warning(pop) #endif @@ -50,11 +52,13 @@ static void* const s_memory = []() -> void* return utils::memory_reserve(s_memory_size); }(); +static void* s_next; + // Code section static u8* s_code_addr; -static u64 s_code_size; #ifdef _WIN32 +static std::deque> s_unwater; static std::vector> s_unwind; // .pdata #endif @@ -67,9 +71,9 @@ struct MemoryManager final : llvm::RTDyldMemoryManager MemoryManager(std::unordered_map& table) : m_link(table) - , m_next(s_memory) , m_tramps(nullptr) { + s_next = s_memory; } [[noreturn]] static void null() @@ -77,7 +81,7 @@ struct MemoryManager final : llvm::RTDyldMemoryManager fmt::throw_exception("Null function" HERE); } - virtual u64 getSymbolAddress(const std::string& name) override + llvm::JITSymbol findSymbol(const std::string& name) override { auto& addr = m_link[name]; @@ -92,7 +96,6 @@ struct MemoryManager final : llvm::RTDyldMemoryManager } else { - // It's fine if some function is never called, for example. LOG_ERROR(GENERAL, "LLVM: Linkage failed: %s", name); addr = (u64)null; } @@ -104,9 +107,9 @@ struct MemoryManager final : llvm::RTDyldMemoryManager // Allocate memory for trampolines if (!m_tramps) { - m_tramps = reinterpret_cast(m_next); - utils::memory_commit(m_next, 4096, utils::protection::wx); - m_next = (u8*)((u64)m_next + 4096); + m_tramps = reinterpret_cast(s_next); + utils::memory_commit(s_next, 4096, utils::protection::wx); + s_next = (u8*)((u64)s_next + 4096); } // Create a trampoline @@ -129,13 +132,13 @@ struct MemoryManager final : llvm::RTDyldMemoryManager } } - return addr; + return {addr, llvm::JITSymbolFlags::Exported}; } virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override { // Simple allocation - const u64 next = ::align((u64)m_next + size, 4096); + const u64 next = ::align((u64)s_next + size, 4096); if (next > (u64)s_memory + s_memory_size) { @@ -143,18 +146,17 @@ struct MemoryManager final : llvm::RTDyldMemoryManager return nullptr; } - utils::memory_commit(m_next, size, utils::protection::wx); - s_code_addr = (u8*)m_next; - s_code_size = size; + utils::memory_commit(s_next, size, utils::protection::wx); + s_code_addr = (u8*)s_next; - LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align); - return (u8*)std::exchange(m_next, (void*)next); + LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), s_next, size, align); + return (u8*)std::exchange(s_next, (void*)next); } virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override { // Simple allocation - const u64 next = ::align((u64)m_next + size, 4096); + const u64 next = ::align((u64)s_next + size, 4096); if (next > (u64)s_memory + s_memory_size) { @@ -167,10 +169,10 @@ struct MemoryManager final : llvm::RTDyldMemoryManager LOG_ERROR(GENERAL, "LLVM: Writeable data section not supported!"); } - utils::memory_commit(m_next, size); + utils::memory_commit(s_next, size); - LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw"); - return (u8*)std::exchange(m_next, (void*)next); + LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), s_next, size, align, is_ro ? "ro" : "rw"); + return (u8*)std::exchange(s_next, (void*)next); } virtual bool finalizeMemory(std::string* = nullptr) override @@ -191,17 +193,15 @@ struct MemoryManager final : llvm::RTDyldMemoryManager { #ifdef _WIN32 // Use s_memory as a BASE, compute the difference - const u64 code_diff = (u64)s_code_addr - (u64)s_memory; const u64 unwind_diff = (u64)addr - (u64)s_memory; // Fix RUNTIME_FUNCTION records (.pdata section) - auto& pdata = s_unwind.back(); + auto pdata = std::move(s_unwater.front()); + s_unwater.pop_front(); for (auto& rf : pdata) { - rf.BeginAddress += static_cast(code_diff); - rf.EndAddress += static_cast(code_diff); - rf.UnwindData += static_cast(unwind_diff); + rf.UnwindData += static_cast(unwind_diff); } // Register .xdata UNWIND_INFO structs @@ -209,6 +209,10 @@ struct MemoryManager final : llvm::RTDyldMemoryManager { LOG_ERROR(GENERAL, "RtlAddFunctionTable() failed! Error %u", GetLastError()); } + else + { + s_unwind.emplace_back(std::move(pdata)); + } #endif return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size); @@ -239,24 +243,13 @@ struct MemoryManager final : llvm::RTDyldMemoryManager utils::memory_decommit(s_memory, s_memory_size); } - -private: - void* m_next; }; // Helper class struct EventListener final : llvm::JITEventListener { - std::string path; - virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override { - if (!path.empty()) - { - const llvm::StringRef elf = obj.getData(); - fs::file(path, fs::rewrite).write(elf.data(), elf.size()); - } - #ifdef _WIN32 for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it) { @@ -282,7 +275,17 @@ struct EventListener final : llvm::JITEventListener } } - s_unwind.emplace_back(std::move(rfs)); + // Use s_memory as a BASE, compute the difference + const u64 code_diff = (u64)s_code_addr - (u64)s_memory; + + // Fix RUNTIME_FUNCTION records (.pdata section) + for (auto& rf : rfs) + { + rf.BeginAddress += static_cast(code_diff); + rf.EndAddress += static_cast(code_diff); + } + + s_unwater.emplace_back(std::move(rfs)); } } #endif @@ -291,6 +294,46 @@ struct EventListener final : llvm::JITEventListener static EventListener s_listener; +// Helper class +class ObjectCache final : public llvm::ObjectCache +{ + const std::string& m_path; + +public: + ObjectCache(const std::string& path) + : m_path(path) + { + } + + ~ObjectCache() override = default; + + void notifyObjectCompiled(const llvm::Module* module, llvm::MemoryBufferRef obj) override + { + std::string name = m_path; + name.append(module->getName()); + fs::file(name, fs::rewrite).write(obj.getBufferStart(), obj.getBufferSize()); + LOG_SUCCESS(GENERAL, "LLVM: Created module: %s", module->getName().data()); + } + + std::unique_ptr getObject(const llvm::Module* module) override + { + std::string name = m_path; + name.append(module->getName()); + + if (fs::file cached{name, fs::read}) + { + auto buf = llvm::MemoryBuffer::getNewUninitMemBuffer(cached.size()); + cached.read(const_cast(buf->getBufferStart()), buf->getBufferSize()); + LOG_SUCCESS(GENERAL, "LLVM: Loaded module: %s", module->getName().data()); + return buf; + } + else + { + return nullptr; + } + } +}; + jit_compiler::jit_compiler(std::unordered_map init_linkage_info, std::string _cpu) : m_link(std::move(init_linkage_info)) , m_cpu(std::move(_cpu)) @@ -321,58 +364,54 @@ jit_compiler::jit_compiler(std::unordered_map init_ } m_engine->RegisterJITEventListener(&s_listener); + + LOG_SUCCESS(GENERAL, "LLVM: JIT initialized (%s)", m_cpu); } -void jit_compiler::load(std::unique_ptr module, std::unique_ptr object) +void jit_compiler::add(std::unique_ptr module, const std::string& path) { - s_listener.path.clear(); - - auto* module_ptr = module.get(); + ObjectCache cache{path}; + m_engine->setObjectCache(&cache); + const auto ptr = module.get(); m_engine->addModule(std::move(module)); - m_engine->addObjectFile(std::move(object)); - m_engine->finalizeObject(); + m_engine->generateCodeForModule(ptr); + m_engine->setObjectCache(nullptr); - m_map.clear(); - - for (auto& func : module_ptr->functions()) + for (auto& func : ptr->functions()) { - const std::string& name = func.getName(); - - if (!m_link.count(name)) - { - // Register compiled function - m_map[name] = m_engine->getFunctionAddress(name); - } - } -} - -void jit_compiler::make(std::unique_ptr module, std::string path) -{ - s_listener.path = std::move(path); - - auto* module_ptr = module.get(); - - m_engine->addModule(std::move(module)); - m_engine->finalizeObject(); - - m_map.clear(); - - for (auto& func : module_ptr->functions()) - { - if (!func.empty()) - { - const std::string& name = func.getName(); - - // Register compiled function - m_map[name] = m_engine->getFunctionAddress(name); - } - // Delete IR to lower memory consumption func.deleteBody(); } } +void jit_compiler::fin(const std::string& path) +{ + m_engine->finalizeObject(); +} + +void jit_compiler::add(std::unordered_map data) +{ + std::size_t size = 0; + + for (auto&& pair : data) + { + size += ::align(pair.second.size(), 16); + } + + utils::memory_commit(s_next, size, utils::protection::wx); + std::memset(s_next, 0xc3, ::align(size, 4096)); + + for (auto&& pair : data) + { + std::memcpy(s_next, pair.second.data(), pair.second.size()); + m_link.emplace(pair.first, (u64)s_next); + s_next = (void*)::align((u64)s_next + pair.second.size(), 16); + } + + s_next = (void*)::align((u64)s_next, 4096); +} + jit_compiler::~jit_compiler() { } diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 291c76691f..eeb3a2a7c0 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -28,27 +28,30 @@ class jit_compiler final // Execution instance std::unique_ptr m_engine; - // Compiled functions - std::unordered_map m_map; - // Linkage cache - std::unordered_map m_link; + std::unordered_map m_link; + + // Compiled functions + std::unordered_map m_map; // Arch std::string m_cpu; public: - jit_compiler(std::unordered_map, std::string _cpu); + jit_compiler(std::unordered_map, std::string _cpu); ~jit_compiler(); - // Compile module - void make(std::unique_ptr, std::string); + // Add module + void add(std::unique_ptr module, const std::string& path); - // Load object - void load(std::unique_ptr, std::unique_ptr); + // Finalize + void fin(const std::string& path); + + // Add functions directly (name -> code) + void add(std::unordered_map); // Get compiled function address - std::uintptr_t get(const std::string& name) const + u64 get(const std::string& name) const { const auto found = m_map.find(name); @@ -57,7 +60,7 @@ public: return found->second; } - return 0; + return m_engine->getFunctionAddress(name); } // Get CPU info diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index 42c2713ff4..cdc9b02139 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -324,6 +324,205 @@ namespace ppu_patterns abort1, abort2, }; + + const ppu_pattern get_context[] + { + ADDI(r3, r3, 0xf), + CLRRDI(r3, r3, 4), + STD(r1, r3, 0), + STD(r2, r3, 8), + STD(r14, r3, 0x18), + STD(r15, r3, 0x20), + STD(r16, r3, 0x28), + STD(r17, r3, 0x30), + STD(r18, r3, 0x38), + STD(r19, r3, 0x40), + STD(r20, r3, 0x48), + STD(r21, r3, 0x50), + STD(r22, r3, 0x58), + STD(r23, r3, 0x60), + STD(r24, r3, 0x68), + STD(r25, r3, 0x70), + STD(r26, r3, 0x78), + STD(r27, r3, 0x80), + STD(r28, r3, 0x88), + STD(r29, r3, 0x90), + STD(r30, r3, 0x98), + STD(r31, r3, 0xa0), + MFLR(r0), + STD(r0, r3, 0xa8), + 0x7c000026, // mfcr r0 + STD(r0, r3, 0xb0), + STFD(f14, r3, 0xb8), + STFD(f15, r3, 0xc0), + STFD(F16, r3, 0xc8), + STFD(f17, r3, 0xd0), + STFD(f18, r3, 0xd8), + STFD(f19, r3, 0xe0), + STFD(f20, r3, 0xe8), + STFD(f21, r3, 0xf0), + STFD(f22, r3, 0xf8), + STFD(f23, r3, 0x100), + STFD(f24, r3, 0x108), + STFD(f25, r3, 0x110), + STFD(f26, r3, 0x118), + STFD(f27, r3, 0x120), + STFD(f28, r3, 0x128), + STFD(f29, r3, 0x130), + STFD(f30, r3, 0x138), + STFD(f31, r3, 0x140), + 0x7c0042A6, // mfspr r0, vrsave + STD(r0, r3, 0x148), + ADDI(r4, r3, 0x150), + ADDI(r5, r3, 0x160), + ADDI(r6, r3, 0x170), + ADDI(r7, r3, 0x180), + STVX(v20, r0, r4), + STVX(v21, r0, r5), + STVX(v22, r0, r6), + STVX(v23, r0, r7), + ADDI(r4, r4, 0x40), + ADDI(r5, r5, 0x40), + ADDI(r6, r6, 0x40), + ADDI(r7, r7, 0x40), + STVX(v24, r0, r4), + STVX(v25, r0, r5), + STVX(v26, r0, r6), + STVX(v27, r0, r7), + ADDI(r4, r4, 0x40), + ADDI(r5, r5, 0x40), + ADDI(r6, r6, 0x40), + ADDI(r7, r7, 0x40), + STVX(v28, r0, r4), + STVX(v29, r0, r5), + STVX(v30, r0, r6), + STVX(v31, r0, r7), + LI(r3, 0), + BLR(), + }; + + const ppu_pattern set_context[] + { + ADDI(r3, r3, 0xf), + CLRRDI(r3, r3, 4), + LD(r1, r3, 0), + LD(r2, r3, 8), + LD(r14, r3, 0x18), + LD(r15, r3, 0x20), + LD(r16, r3, 0x28), + LD(r17, r3, 0x30), + LD(r18, r3, 0x38), + LD(r19, r3, 0x40), + LD(r20, r3, 0x48), + LD(r21, r3, 0x50), + LD(r22, r3, 0x58), + LD(r23, r3, 0x60), + LD(r24, r3, 0x68), + LD(r25, r3, 0x70), + LD(r26, r3, 0x78), + LD(r27, r3, 0x80), + LD(r28, r3, 0x88), + LD(r29, r3, 0x90), + LD(r30, r3, 0x98), + LD(r31, r3, 0xa0), + LD(r0, r3, 0xa8), + MTLR(r0), + LD(r0, r3, 0xb0), + 0x7c101120, // mtocrf 1, r0 + 0x7c102120, // mtocrf 2, r0 + 0x7c104120, // mtocrf 4, r0 + 0x7c108120, // mtocrf 8, r0 + 0x7c110120, // mtocrf 0x10, r0 + 0x7c120120, // mtocrf 0x20, r0 + 0x7c140120, // mtocrf 0x40, r0 + 0x7c180120, // mtocrf 0x80, r0 + LFD(f14, r3, 0xb8), + LFD(f15, r3, 0xc0), + LFD(F16, r3, 0xc8), + LFD(f17, r3, 0xd0), + LFD(f18, r3, 0xd8), + LFD(f19, r3, 0xe0), + LFD(f20, r3, 0xe8), + LFD(f21, r3, 0xf0), + LFD(f22, r3, 0xf8), + LFD(f23, r3, 0x100), + LFD(f24, r3, 0x108), + LFD(f25, r3, 0x110), + LFD(f26, r3, 0x118), + LFD(f27, r3, 0x120), + LFD(f28, r3, 0x128), + LFD(f29, r3, 0x130), + LFD(f30, r3, 0x138), + LFD(f31, r3, 0x140), + LD(r0, r3, 0x148), + 0x7c0043A6, //mtspr vrsave, r0 + ADDI(r5, r3, 0x150), + ADDI(r6, r3, 0x160), + ADDI(r7, r3, 0x170), + ADDI(r8, r3, 0x180), + LVX(v20, r0, r5), + LVX(v21, r0, r6), + LVX(v22, r0, r7), + LVX(v23, r0, r8), + ADDI(r5, r5, 0x40), + ADDI(r6, r6, 0x40), + ADDI(r7, r7, 0x40), + ADDI(r8, r8, 0x40), + LVX(v24, r0, r5), + LVX(v25, r0, r6), + LVX(v26, r0, r7), + LVX(v27, r0, r8), + ADDI(r5, r5, 0x40), + ADDI(r6, r6, 0x40), + ADDI(r7, r7, 0x40), + ADDI(r8, r8, 0x40), + LVX(v28, r0, r5), + LVX(v29, r0, r6), + LVX(v30, r0, r7), + LVX(v31, r0, r8), + LI(r3, 0), + 0x7c041810, // subfc r0, r4, r3 + 0x7c640194, // addze r3, r4 + BLR(), + }; + + const ppu_pattern x26c[] + { + LI(r9, 0), + STD(r9, r6, 0), + MR(r1, r6), + STDU(r1, r1, -0x70), + STD(r9, r1, 0), + CLRLDI(r7, r3, 32), + LWZ(r0, r7, 0), + MTCTR(r0), + LWZ(r2, r7, 4), + MR(r3, r4), + MR(r4, r5), + BCTRL(), + }; + + const ppu_pattern x2a0[] + { + MR(r8, r1), + 0x7d212850, // subf r9, r1, r5 + 0x7c21496a, // stdux r1, r1, r9 + MFLR(r0), + STD(r0, r8, 0x10), + STD(r2, r1, 0x28), + CLRLDI(r7, r3, 32), + LWZ(r0, r7, 0), + MTCTR(r0), + LWZ(r2, r7, 4), + MR(r3, r4), + BCTRL(), + LD(r2, r1, 0x28), + LD(r9, r1, 0x0), + LD(r0, r9, 0x10), + MTLR(r0), + MR(r1, r9), + BLR(), + }; } std::vector ppu_analyse(const std::vector>& segs, const std::vector>& secs, u32 lib_toc, u32 entry) @@ -374,6 +573,7 @@ std::vector ppu_analyse(const std::vector>& se func_queue.emplace_back(func); func.addr = addr; func.toc = toc; + func.name = fmt::format("__0x%x", func.addr); LOG_TRACE(PPU, "Function 0x%x added (toc=0x%x)", addr, toc); return func; }; @@ -1009,7 +1209,7 @@ std::vector ppu_analyse(const std::vector>& se } func.attr += ppu_attr::no_size; - add_block(iaddr); + add_block(jt_addr); block_queue.clear(); } else @@ -1031,6 +1231,20 @@ std::vector ppu_analyse(const std::vector>& se block.second = _ptr.addr() - block.first; break; } + else if (type == ppu_itype::SC) + { + add_block(_ptr.addr()); + block.second = _ptr.addr() - block.first; + break; + } + else if (type == ppu_itype::STDU && test(func.attr, ppu_attr::no_size) && (op.opcode == *_ptr || *_ptr == ppu_instructions::BLR())) + { + // Hack + LOG_SUCCESS(PPU, "[0x%x] Instruction repetition: 0x%08x", iaddr, op.opcode); + add_block(_ptr.addr()); + block.second = _ptr.addr() - block.first; + break; + } } } @@ -1228,7 +1442,7 @@ std::vector ppu_analyse(const std::vector>& se for (auto&& pair : funcs) { auto& func = pair.second; - LOG_TRACE(PPU, "Function __0x%x (size=0x%x, toc=0x%x, attr %#x)", func.addr, func.size, func.toc, func.attr); + LOG_TRACE(PPU, "Function %s (size=0x%x, toc=0x%x, attr %#x)", func.name, func.size, func.toc, func.attr); result.emplace_back(std::move(func)); } diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index e82445e823..ac2f04aec8 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -17,6 +17,9 @@ enum class ppu_attr : u32 uses_r0, entry_point, complex_stack, + special, + //call_use_context, + //call_trace, __bitset_enum_max }; @@ -35,6 +38,7 @@ struct ppu_function std::map blocks; // Basic blocks: addr -> size std::set calls; // Set of called functions std::set callers; + std::string name; // Function name }; // PPU Module Information diff --git a/rpcs3/Emu/Cell/PPUFunction.h b/rpcs3/Emu/Cell/PPUFunction.h index 80405d92b8..0e476af4bc 100644 --- a/rpcs3/Emu/Cell/PPUFunction.h +++ b/rpcs3/Emu/Cell/PPUFunction.h @@ -5,13 +5,14 @@ using ppu_function_t = bool(*)(ppu_thread&); // BIND_FUNC macro "converts" any appropriate HLE function to ppu_function_t, binding it to PPU thread context. -#define BIND_FUNC(func) (static_cast([](ppu_thread& ppu) -> bool {\ +#define BIND_FUNC(func, ...) (static_cast([](ppu_thread& ppu) -> bool {\ const auto old_f = ppu.last_function;\ ppu.last_function = #func;\ ppu_func_detail::do_call(ppu, func);\ - ppu.test_state();\ ppu.last_function = old_f;\ - return true;\ + ppu.cia += 4;\ + __VA_ARGS__;\ + return false;\ })) struct ppu_va_args_t diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index c532c398c1..6596b27ea6 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -261,7 +261,6 @@ static add_flags_result_t add64_flags(u64 a, u64 b, bool c) extern u64 get_timebased_time(); extern void ppu_execute_syscall(ppu_thread& ppu, u64 code); -extern void ppu_execute_function(ppu_thread& ppu, u32 index); extern u32 ppu_lwarx(ppu_thread& ppu, u32 addr); extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr); @@ -2875,7 +2874,7 @@ bool ppu_interpreter::SC(ppu_thread& ppu, ppu_opcode_t op) } ppu_execute_syscall(ppu, ppu.gpr[11]); - return true; + return false; } bool ppu_interpreter::B(ppu_thread& ppu, ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index a77550006b..f6e9802d8d 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -621,12 +621,6 @@ static void ppu_load_imports(const std::shared_ptr& link, u32 std::shared_ptr ppu_load_prx(const ppu_prx_object& elf, const std::string& name) { - if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm && name == "libfiber.sprx") - { - LOG_FATAL(PPU, "libfiber.sprx is not compatible with PPU LLVM Recompiler. Use PPU Interpreter."); - Emu.Pause(); - } - std::vector> segments; std::vector> sections; diff --git a/rpcs3/Emu/Cell/PPUModule.h b/rpcs3/Emu/Cell/PPUModule.h index 3df05d87da..8c88b839a9 100644 --- a/rpcs3/Emu/Cell/PPUModule.h +++ b/rpcs3/Emu/Cell/PPUModule.h @@ -236,7 +236,7 @@ inline RT ppu_execute_function_or_callback(const char* name, ppu_thread& ppu, Ar #define CALL_FUNC(ppu, func, ...) ppu_execute_function_or_callback(#func, ppu, __VA_ARGS__) -#define REG_FNID(module, nid, func) ppu_module_manager::register_static_function(#module, ppu_select_name(#func, nid), BIND_FUNC(func), ppu_generate_id(nid)) +#define REG_FNID(module, nid, func) ppu_module_manager::register_static_function(#module, ppu_select_name(#func, nid), BIND_FUNC(func, ppu.cia = (u32)ppu.lr & ~3), ppu_generate_id(nid)) #define REG_FUNC(module, func) REG_FNID(module, #func, func) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index acc7c7e204..0dd858f106 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -102,6 +102,7 @@ const ppu_decoder s_ppu_interpreter_fast; extern void ppu_initialize(); extern void ppu_initialize(const ppu_module& info); +static void ppu_initialize2(const ppu_module& info); extern void ppu_execute_syscall(ppu_thread& ppu, u64 code); // Get pointer to executable cache @@ -371,7 +372,7 @@ std::string ppu_thread::dump() const fmt::append(ret, "XER = [CA=%u | OV=%u | SO=%u | CNT=%u]\n", xer.ca, xer.ov, xer.so, xer.cnt); fmt::append(ret, "VSCR = [SAT=%u | NJ=%u]\n", sat, nj); fmt::append(ret, "FPSCR = [FL=%u | FG=%u | FE=%u | FU=%u]\n", fpscr.fl, fpscr.fg, fpscr.fe, fpscr.fu); - fmt::append(ret, "\nCall stack:\n=========\n0x%08x (0x0) called\n", g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? 0 : cia); + fmt::append(ret, "\nCall stack:\n=========\n0x%08x (0x0) called\n", cia); // Determine stack range u32 stack_ptr = static_cast(gpr[1]); @@ -474,7 +475,11 @@ void ppu_thread::exec_task() { if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm) { - reinterpret_cast(static_cast(ppu_ref(cia)))(*this); + while (!test(state, cpu_flag::ret + cpu_flag::exit + cpu_flag::stop)) + { + reinterpret_cast(static_cast(ppu_ref(cia)))(*this); + } + return; } @@ -769,19 +774,21 @@ extern __m128i sse_cellbe_lvrx(u64 addr); extern void sse_cellbe_stvlx(u64 addr, __m128i a); extern void sse_cellbe_stvrx(u64 addr, __m128i a); -[[noreturn]] static void ppu_trap(u64 addr) +[[noreturn]] static void ppu_trap(ppu_thread& ppu, u64 addr) { + ppu.cia = ::narrow(addr); fmt::throw_exception("Trap! (0x%llx)", addr); } -[[noreturn]] static void ppu_unreachable(u64 addr) +[[noreturn]] static void ppu_error(ppu_thread& ppu, u64 addr, u32 op) { - fmt::throw_exception("Unreachable! (0x%llx)", addr); + ppu.cia = ::narrow(addr); + fmt::throw_exception("Unknown/Illegal opcode 0x08x (0x%llx)", op, addr); } static void ppu_check(ppu_thread& ppu, u64 addr) { - ppu.cia = addr; + ppu.cia = ::narrow(addr); ppu.test_state(); } @@ -867,6 +874,141 @@ static bool adde_carry(u64 a, u64 b, bool c) #endif } +static std::string ppu_context_prologue() +{ + std::string c; + //c += "\xCC"; +#ifndef _WIN32 + c += "\x48\x89\xF9"; // mov rcx, rdi +#endif + c += "\x48\xB8"; // mov rax, imm64 + uptr ptr = (uptr)&vm::g_base_addr; + c.append((const char*)&ptr, 8); + c += "\x48\x8B"; // mov rax, [rax] + c += '\0'; + c += "\x48\x03\x41"; // add rax, [ppu+r3] + c += char(offset32(&ppu_thread::gpr, 3)); + c += "\x48\x83\xC0\x0F"; // add rax, 15 + c += "\x48\x83\xE0\xF0"; // and rax, -16 + return c; +} + +const auto ppu_get_context = []() -> std::string +{ + std::string c = ppu_context_prologue(); + c += "\x48\x8B\x51"; // mov rdx, [rcx+r1] + c += char(offset32(&ppu_thread::gpr, 1)); + c += "\x48\x89\x10"; // mov [rax], rdx + c += "\x48\x8B\x51"; // mov rdx, [rcx+r2] + c += char(offset32(&ppu_thread::gpr, 2)); + c += "\x48\x89\x50\x08"; // mov [rax+8], rdx + c += "\x48\x8B\x54\x24\xF8"; // mov rdx, [rsp-8] + c += "\x48\x89\x50\x10"; // mov [rax+0x10], rdx + + c += "\x48\x89\x60\x18"; // mov [rax+0x18], rsp + c += "\x48\x89\x58\x20"; // mov [rax+0x20], rbx + c += "\x48\x89\x68\x28"; // mov [rax+0x28], rbp +#ifdef _WIN32 + c += "\x48\x89\x70\x30"; // mov [rax+0x30], rsi + c += "\x48\x89\x78\x38"; // mov [rax+0x38], rdi +#endif + c += "\x4C\x89\x60\x40"; // mov [rax+0x40], r12 + c += "\x4C\x89\x68\x48"; // mov [rax+0x48], r13 + c += "\x4C\x89\x70\x50"; // mov [rax+0x50], r14 + c += "\x4C\x89\x78\x58"; // mov [rax+0x58], r15 + +#ifdef _WIN32 + c += "\x66\x0F\x7F\x70\x60"; // movdqa [rax+0x60], xmm6 + c += "\x66\x0F\x7F\x78\x70"; // movdqa [rax+0x70], xmm7 + c += "\x66\x44\x0F\x7F\x80\x80\x00\x00\x00"s; // ... + c += "\x66\x44\x0F\x7F\x88\x90\x00\x00\x00"s; + c += "\x66\x44\x0F\x7F\x90\xA0\x00\x00\x00"s; + c += "\x66\x44\x0F\x7F\x98\xB0\x00\x00\x00"s; + c += "\x66\x44\x0F\x7F\xA0\xC0\x00\x00\x00"s; + c += "\x66\x44\x0F\x7F\xA8\xD0\x00\x00\x00"s; + c += "\x66\x44\x0F\x7F\xB0\xE0\x00\x00\x00"s; + c += "\x66\x44\x0F\x7F\xB8\xF0\x00\x00\x00"s; +#endif + + c += "\x48\xC7\x41"; // mov [rcx+r3], 0 + c += char(offset32(&ppu_thread::gpr, 3)); + c.append(4, '\0'); + //c += "\xCC"; + c += "\xC3"; // ret + return c; +}(); + +const auto ppu_set_context = []() -> std::string +{ + std::string c = ppu_context_prologue(); + c += "\xCC"; + c += "\x48\x8B\x10"; // mov rdx, [rax] + c += "\x48\x89\x51"; // mov [rcx+r1], rdx + c += char(offset32(&ppu_thread::gpr, 1)); + + c += "\x48\x8B\x50\x08"; // mov rdx, [rax+8] + c += "\x48\x89\x51"; // mov [rcx+r2], rdx + c += char(offset32(&ppu_thread::gpr, 2)); + + c += "\x48\x8B\x60\x18"; // mov rsp, [rax+0x18] + c += "\x48\x8B\x58\x20"; // mov rbx, [rax+0x20] + c += "\x48\x8B\x68\x28"; // mov rbp, [rax+0x28] +#ifdef _WIN32 + c += "\x48\x8B\x70\x30"; // mov rsi, [rax+0x30] + c += "\x48\x8B\x78\x38"; // mov rdi, [rax+0x38] +#endif + c += "\x4C\x8B\x60\x40"; // mov r12, [rax+0x40] + c += "\x4C\x8B\x68\x48"; // mov r13, [rax+0x48] + c += "\x4C\x8B\x70\x50"; // mov r14, [rax+0x50] + c += "\x4C\x8B\x78\x58"; // mov r15, [rax+0x58] + +#ifdef _WIN32 + c += "\x66\x0F\x6F\x70\x60"; // movdqa xmm6, [rax+0x60] + c += "\x66\x0F\x6F\x78\x70"; // movdqa xmm7, [rax+0x70] + c += "\x66\x44\x0F\x6F\x80\x80\x00\x00\x00"s; // ... + c += "\x66\x44\x0F\x6F\x88\x90\x00\x00\x00"s; + c += "\x66\x44\x0F\x6F\x90\xA0\x00\x00\x00"s; + c += "\x66\x44\x0F\x6F\x98\xB0\x00\x00\x00"s; + c += "\x66\x44\x0F\x6F\xA0\xC0\x00\x00\x00"s; + c += "\x66\x44\x0F\x6F\xA8\xD0\x00\x00\x00"s; + c += "\x66\x44\x0F\x6F\xB0\xE0\x00\x00\x00"s; + c += "\x66\x44\x0F\x6F\xB8\xF0\x00\x00\x00"s; +#endif + + c += "\x48\x8B\x50\x10"; // mov rdx, [rax+0x10] + c += "\x48\x89\x54\x24\xF8"; // mov [rsp-8], rdx + c += "\x48\x8B\x51"; // mov rdx, [rcx+r4] + c += char(offset32(&ppu_thread::gpr, 4)); + c += "\x48\x85\xD2"; // test rdx, rdx + c += "\x0F\x94\xC2"; // setz dl + c += "\x48\x0F\xB6\xD2"; // movzx rdx, dl + c += "\x48\x89\x51"; // mov [rcx+r3], rdx + c += char(offset32(&ppu_thread::gpr, 3)); + c += "\xC3"; // ret + return c; +}(); + +const auto ppu_use_context = []() -> std::string +{ + std::string c; + c += "\x48\xB8"; // mov rax, imm64 + uptr ptr = (uptr)&vm::g_exec_addr; + c.append((const char*)&ptr, 8); + c += "\x48\x8B\x20"; // mov rsp, [rax] +#ifdef _WIN32 + c += "\x48\x01\xD4"; // add rsp,rdx +#else + c += "\x48\x01\xFC"; // add rsp,rsi +#endif + //c += "\x48\x83\xE4\xE0"; // and rsp, -0x20 +#ifdef _WIN32 + c += "\x41\xFF\xD0"; // call r8 +#else + c += "\xFF\xD2"; // call rdx +#endif + return c; +}(); + extern void ppu_initialize() { const auto _funcs = fxm::withdraw>(); @@ -876,25 +1018,8 @@ extern void ppu_initialize() return; } - std::size_t fpos = 0; - - while (fpos < _funcs->size()) - { - // Split module (TODO) - ppu_module info; - info.name = fmt::format("%05X", _funcs->at(fpos).addr); - info.funcs.reserve(2000); - - while (fpos < _funcs->size() && info.funcs.size() < 2000) - { - info.funcs.emplace_back(std::move(_funcs->at(fpos++))); - } - - if (!Emu.IsStopped()) - { - ppu_initialize(info); - } - } + // Initialize main module + ppu_initialize({"", std::move(*_funcs)}); std::vector prx_list; @@ -903,12 +1028,10 @@ extern void ppu_initialize() prx_list.emplace_back(&prx); }); + // Initialize preloaded libraries for (auto ptr : prx_list) { - if (!Emu.IsStopped()) - { - ppu_initialize(*ptr); - } + ppu_initialize(*ptr); } } @@ -936,6 +1059,136 @@ extern void ppu_initialize(const ppu_module& info) return; } +#ifdef LLVM_AVAILABLE + using namespace llvm; + + // Initialize JIT compiler + if (!fxm::check()) + { + std::unordered_map link_table + { + { "__mptr", (u64)&vm::g_base_addr }, + { "__cptr", (u64)&vm::g_exec_addr }, + { "__trap", (u64)&ppu_trap }, + { "__error", (u64)&ppu_error }, + { "__check", (u64)&ppu_check }, + { "__trace", (u64)&ppu_trace }, + { "__syscall", (u64)&ppu_execute_syscall }, + { "__get_tb", (u64)&get_timebased_time }, + { "__lwarx", (u64)&ppu_lwarx }, + { "__ldarx", (u64)&ppu_ldarx }, + { "__stwcx", (u64)&ppu_stwcx }, + { "__stdcx", (u64)&ppu_stdcx }, + { "__vexptefp", (u64)&sse_exp2_ps }, + { "__vlogefp", (u64)&sse_log2_ps }, + { "__vperm", (u64)&sse_altivec_vperm }, + { "__lvsl", (u64)&sse_altivec_lvsl }, + { "__lvsr", (u64)&sse_altivec_lvsr }, + { "__lvlx", (u64)&sse_cellbe_lvlx }, + { "__lvrx", (u64)&sse_cellbe_lvrx }, + { "__stvlx", (u64)&sse_cellbe_stvlx }, + { "__stvrx", (u64)&sse_cellbe_stvrx }, + }; + + for (u64 index = 0; index < 1024; index++) + { + if (auto sc = ppu_get_syscall(index)) + { + link_table.emplace(ppu_get_syscall_name(index), (u64)sc); + } + } + + fxm::make(std::move(link_table), g_cfg.core.llvm_cpu); + } +#endif + + // Split module into fragments <= 1 MiB + std::size_t fpos = 0; + + ppu_module part; + part.funcs.reserve(65536); + + while (fpos < info.funcs.size()) + { + const auto fstart = fpos; + + std::size_t bsize = 0; + + part.funcs.clear(); + + while (fpos < info.funcs.size()) + { + auto& func = info.funcs[fpos]; + + if (bsize + func.size > 1024 * 1024 && bsize) + { + break; + } + + for (auto&& block : func.blocks) + { + bsize += block.second; + + // Also split functions blocks into functions (TODO) + ppu_function entry; + entry.addr = block.first; + entry.size = block.second; + entry.toc = func.toc; + fmt::append(entry.name, "__0x%x", block.first); + part.funcs.emplace_back(std::move(entry)); + } + + fpos++; + } + + part.name.clear(); + + if (info.name.size()) + { + part.name += '-'; + part.name += info.name; + } + + if (fstart) + { + fmt::append(part.name, "+%06X", info.funcs.at(fstart).addr); + } + else if (fpos < info.funcs.size()) + { + part.name.append("+0"); + } + + ppu_initialize2(part); + } + +#ifdef LLVM_AVAILABLE + const auto jit = fxm::check_unlocked(); + + jit->fin(Emu.GetCachePath()); + + // Get and install function addresses + for (const auto& func : info.funcs) + { + if (!func.size) continue; + + for (const auto& block : func.blocks) + { + if (block.second) + { + ppu_ref(block.first) = ::narrow(jit->get(fmt::format("__0x%x", block.first))); + } + } + } +#endif +} + +static void ppu_initialize2(const ppu_module& module_part) +{ + if (Emu.IsStopped()) + { + return; + } + // Compute module hash std::string obj_name; { @@ -943,7 +1196,7 @@ extern void ppu_initialize(const ppu_module& info) u8 output[20]; sha1_starts(&ctx); - for (const auto& func : info.funcs) + for (const auto& func : module_part.funcs) { if (func.size == 0) { @@ -964,59 +1217,19 @@ extern void ppu_initialize(const ppu_module& info) sha1_update(&ctx, vm::ps3::_ptr(block.first), block.second); } + + sha1_update(&ctx, vm::ps3::_ptr(func.addr), func.size); } sha1_finish(&ctx, output); // Version, module name and hash: vX-liblv2.sprx-0123456789ABCDEF.obj - fmt::append(obj_name, "v1-%s-%016X.obj", info.name, reinterpret_cast&>(output)); + fmt::append(obj_name, "b1%s-%016X.obj", module_part.name, reinterpret_cast&>(output)); } #ifdef LLVM_AVAILABLE using namespace llvm; - if (!fxm::check()) - { - std::unordered_map link_table - { - { "__mptr", (u64)&vm::g_base_addr }, - { "__cptr", (u64)&vm::g_exec_addr }, - { "__trap", (u64)&ppu_trap }, - { "__end", (u64)&ppu_unreachable }, - { "__check", (u64)&ppu_check }, - { "__trace", (u64)&ppu_trace }, - { "__syscall", (u64)&ppu_execute_syscall }, - { "__get_tb", (u64)&get_timebased_time }, - { "__lwarx", (u64)&ppu_lwarx }, - { "__ldarx", (u64)&ppu_ldarx }, - { "__stwcx", (u64)&ppu_stwcx }, - { "__stdcx", (u64)&ppu_stdcx }, - { "__adde_get_ca", (u64)&adde_carry }, - { "__vexptefp", (u64)&sse_exp2_ps }, - { "__vlogefp", (u64)&sse_log2_ps }, - { "__vperm", (u64)&sse_altivec_vperm }, - { "__lvsl", (u64)&sse_altivec_lvsl }, - { "__lvsr", (u64)&sse_altivec_lvsr }, - { "__lvlx", (u64)&sse_cellbe_lvlx }, - { "__lvrx", (u64)&sse_cellbe_lvrx }, - { "__stvlx", (u64)&sse_cellbe_stvlx }, - { "__stvrx", (u64)&sse_cellbe_stvrx }, - }; - - for (u64 index = 0; index < 1024; index++) - { - if (auto sc = ppu_get_syscall(index)) - { - link_table.emplace(ppu_get_syscall_name(index), (u64)sc); - } - } - - const auto jit = fxm::make(std::move(link_table), g_cfg.core.llvm_cpu); - - LOG_SUCCESS(PPU, "LLVM: JIT initialized (%s)", jit->cpu()); - } - - // Initialize compiler const auto jit = fxm::get(); // Create LLVM module @@ -1030,216 +1243,186 @@ extern void ppu_initialize(const ppu_module& info) // Define some types const auto _void = Type::getVoidTy(g_llvm_ctx); - const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false); + const auto _func = FunctionType::get(_void, {translator->GetContextType()->getPointerTo()}, false); // Initialize function list - for (const auto& func : info.funcs) + for (const auto& func : module_part.funcs) { if (func.size) { - const auto f = cast(module->getOrInsertFunction(fmt::format("__0x%x", func.addr), _func)); + const auto f = cast(module->getOrInsertFunction(func.name, _func)); f->addAttribute(1, Attribute::NoAlias); - translator->AddFunction(func.addr, f); } } - if (fs::file cached{Emu.GetCachePath() + obj_name}) - { - std::string buf; - buf.reserve(cached.size()); - cached.read(buf, cached.size()); - auto buffer = llvm::MemoryBuffer::getMemBuffer(buf, obj_name); - auto result = llvm::object::ObjectFile::createObjectFile(*buffer); - - if (result) - { - jit->load(std::move(module), std::move(result.get())); + std::shared_ptr dlg; - for (const auto& func : info.funcs) + // Check cached file + if (!fs::is_file(Emu.GetCachePath() + obj_name)) + { + legacy::FunctionPassManager pm(module.get()); + + // Basic optimizations + pm.add(createCFGSimplificationPass()); + pm.add(createPromoteMemoryToRegisterPass()); + pm.add(createEarlyCSEPass()); + pm.add(createTailCallEliminationPass()); + pm.add(createReassociatePass()); + pm.add(createInstructionCombiningPass()); + //pm.add(createBasicAAWrapperPass()); + //pm.add(new MemoryDependenceAnalysis()); + pm.add(createLICMPass()); + pm.add(createLoopInstSimplifyPass()); + pm.add(createNewGVNPass()); + pm.add(createDeadStoreEliminationPass()); + pm.add(createSCCPPass()); + pm.add(createInstructionCombiningPass()); + pm.add(createInstructionSimplifierPass()); + pm.add(createAggressiveDCEPass()); + pm.add(createCFGSimplificationPass()); + //pm.add(createLintPass()); // Check + + // Initialize message dialog + dlg = Emu.GetCallbacks().get_msg_dialog(); + dlg->type.se_normal = true; + dlg->type.bg_invisible = true; + dlg->type.progress_bar_count = 1; + dlg->on_close = [](s32 status) + { + Emu.CallAfter([]() { - if (func.size) - { - const std::uintptr_t uptr = jit->get(fmt::format("__0x%x", func.addr)); - ppu_ref(func.addr) = ::narrow(uptr); - } - } - - LOG_SUCCESS(PPU, "LLVM: Loaded executable: %s", obj_name); - return; - } - - LOG_ERROR(PPU, "LLVM: Failed to load executable: %s", obj_name); - } - - legacy::FunctionPassManager pm(module.get()); - - // Basic optimizations - pm.add(createCFGSimplificationPass()); - pm.add(createPromoteMemoryToRegisterPass()); - pm.add(createEarlyCSEPass()); - pm.add(createTailCallEliminationPass()); - pm.add(createReassociatePass()); - pm.add(createInstructionCombiningPass()); - //pm.add(createBasicAAWrapperPass()); - //pm.add(new MemoryDependenceAnalysis()); - pm.add(createLICMPass()); - pm.add(createLoopInstSimplifyPass()); - pm.add(createNewGVNPass()); - pm.add(createDeadStoreEliminationPass()); - pm.add(createSCCPPass()); - pm.add(createInstructionCombiningPass()); - pm.add(createInstructionSimplifierPass()); - pm.add(createAggressiveDCEPass()); - pm.add(createCFGSimplificationPass()); - //pm.add(createLintPass()); // Check - - // Initialize message dialog - const auto dlg = Emu.GetCallbacks().get_msg_dialog(); - dlg->type.se_normal = true; - dlg->type.bg_invisible = true; - dlg->type.progress_bar_count = 1; - dlg->on_close = [](s32 status) - { - Emu.CallAfter([]() - { - // Abort everything - Emu.Stop(); - }); - }; - - Emu.CallAfter([=]() - { - dlg->Create("Compiling PPU executable: " + info.name + "\nPlease wait..."); - }); - - // Translate functions - for (size_t fi = 0, fmax = info.funcs.size(); fi < fmax; fi++) - { - if (Emu.IsStopped()) - { - LOG_SUCCESS(PPU, "LLVM: Translation cancelled"); - return; - } - - if (info.funcs[fi].size) - { - // Update dialog - Emu.CallAfter([=, max = info.funcs.size()]() - { - dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax)); - - if (fi * 100 / fmax != (fi + 1) * 100 / fmax) - dlg->ProgressBarInc(0, 1); + // Abort everything + Emu.Stop(); }); + }; - // Translate - const auto func = translator->TranslateToIR(info.funcs[fi], vm::_ptr(info.funcs[fi].addr)); + Emu.CallAfter([=]() + { + dlg->Create("Compiling PPU module " + obj_name + "\nPlease wait..."); + }); - // Run optimization passes - pm.run(*func); - - const auto _syscall = module->getFunction("__syscall"); - - for (auto i = inst_begin(*func), end = inst_end(*func); i != end;) + // Translate functions + for (size_t fi = 0, fmax = module_part.funcs.size(); fi < fmax; fi++) + { + if (Emu.IsStopped()) { - const auto inst = &*i++; + LOG_SUCCESS(PPU, "LLVM: Translation cancelled"); + return; + } - if (const auto ci = dyn_cast(inst)) + if (module_part.funcs[fi].size && !test(module_part.funcs[fi].attr & ppu_attr::special)) + { + // Update dialog + Emu.CallAfter([=, max = module_part.funcs.size()]() { - const auto cif = ci->getCalledFunction(); - const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr; + dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax)); - if (cif == _syscall && op1 && isa(op1)) + if (fi * 100 / fmax != (fi + 1) * 100 / fmax) + dlg->ProgressBarInc(0, 1); + }); + + // Translate + const auto func = translator->Translate(module_part.funcs[fi]); + + // Run optimization passes + pm.run(*func); + + const auto _syscall = module->getFunction("__syscall"); + + for (auto i = inst_begin(*func), end = inst_end(*func); i != end;) + { + const auto inst = &*i++; + + if (const auto ci = dyn_cast(inst)) { - // Try to determine syscall using the value from r11 (requires constant propagation) - const u64 index = cast(op1)->getZExtValue(); + const auto cif = ci->getCalledFunction(); + const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr; - if (const auto ptr = ppu_get_syscall(index)) + if (cif == _syscall && op1 && isa(op1)) { - const auto n = ppu_get_syscall_name(index); - const auto f = cast(module->getOrInsertFunction(n, _func)); + // Try to determine syscall using the value from r11 (requires constant propagation) + const u64 index = cast(op1)->getZExtValue(); - // Call the syscall directly - ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)})); + if (const auto ptr = ppu_get_syscall(index)) + { + const auto n = ppu_get_syscall_name(index); + const auto f = cast(module->getOrInsertFunction(n, _func)); + + // Call the syscall directly + ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)})); + } } + + continue; } - continue; - } - - if (const auto li = dyn_cast(inst)) - { - // TODO: more careful check - if (li->getNumUses() == 0) + if (const auto li = dyn_cast(inst)) { - // Remove unreferenced volatile loads - li->eraseFromParent(); + // TODO: more careful check + if (li->getNumUses() == 0) + { + // Remove unreferenced volatile loads + li->eraseFromParent(); + } + + continue; } - continue; - } - - if (const auto si = dyn_cast(inst)) - { - // TODO: more careful check - if (isa(si->getOperand(0)) && si->getParent() == &func->getEntryBlock()) + if (const auto si = dyn_cast(inst)) { - // Remove undef volatile stores - si->eraseFromParent(); - } + // TODO: more careful check + if (isa(si->getOperand(0)) && si->getParent() == &func->getEntryBlock()) + { + // Remove undef volatile stores + si->eraseFromParent(); + } - continue; + continue; + } } } } - } - legacy::PassManager mpm; + legacy::PassManager mpm; - // Remove unused functions, structs, global variables, etc - mpm.add(createStripDeadPrototypesPass()); - //mpm.add(createFunctionInliningPass()); - mpm.add(createDeadInstEliminationPass()); - mpm.run(*module); + // Remove unused functions, structs, global variables, etc + mpm.add(createStripDeadPrototypesPass()); + //mpm.add(createFunctionInliningPass()); + mpm.add(createDeadInstEliminationPass()); + mpm.run(*module); - // Update dialog - Emu.CallAfter([=]() - { - dlg->ProgressBarSetMsg(0, "Generating code..."); - dlg->ProgressBarInc(0, 100); - }); - - std::string result; - raw_string_ostream out(result); - - if (g_cfg.core.llvm_logs) - { - out << *module; // print IR - fs::file(Emu.GetCachePath() + obj_name + ".log", fs::rewrite).write(out.str()); - result.clear(); - } - - if (verifyModule(*module, &out)) - { - out.flush(); - LOG_ERROR(PPU, "LLVM: Verification failed for %s:\n%s", obj_name, result); - return; - } - - LOG_NOTICE(PPU, "LLVM: %zu functions generated", module->getFunctionList().size()); - - jit->make(std::move(module), Emu.GetCachePath() + obj_name); - - // Get and install function addresses - for (const auto& func : info.funcs) - { - if (func.size) + // Update dialog + Emu.CallAfter([=]() { - const std::uintptr_t uptr = jit->get(fmt::format("__0x%x", func.addr)); - ppu_ref(func.addr) = ::narrow(uptr); + dlg->ProgressBarSetMsg(0, "Generating code, this may take a long time..."); + dlg->ProgressBarInc(0, 100); + }); + + std::string result; + raw_string_ostream out(result); + + if (g_cfg.core.llvm_logs) + { + out << *module; // print IR + fs::file(Emu.GetCachePath() + obj_name + ".log", fs::rewrite).write(out.str()); + result.clear(); } + + if (verifyModule(*module, &out)) + { + out.flush(); + LOG_ERROR(PPU, "LLVM: Verification failed for %s:\n%s", obj_name, result); + return; + } + + LOG_NOTICE(PPU, "LLVM: %zu functions generated", module->getFunctionList().size()); } - LOG_SUCCESS(PPU, "LLVM: Created executable: %s", obj_name); + // Access JIT compiler + if (const auto jit = fxm::check_unlocked()) + { + // Load or compile module + jit->add(std::move(module), Emu.GetCachePath()); + } #endif } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 99c52bcf04..7eff3e4fc2 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -85,6 +85,10 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, u64 base) thread_struct.insert(thread_struct.end(), 32, GetType()); // vr[0..31] thread_struct.insert(thread_struct.end(), 32, GetType()); // cr[0..31] thread_struct.insert(thread_struct.end(), 2, GetType()); // lr, ctr + thread_struct.insert(thread_struct.end(), 2, GetType()); // vrsave, cia + thread_struct.insert(thread_struct.end(), 3, GetType()); // so, ov, ca + thread_struct.insert(thread_struct.end(), 1, GetType()); // cnt + thread_struct.insert(thread_struct.end(), 6, GetType()); // sat, nj, FPCC m_thread_type = StructType::create(m_context, thread_struct, "context_t"); @@ -109,212 +113,48 @@ Type* PPUTranslator::GetContextType() return m_thread_type; } -void PPUTranslator::AddFunction(u64 addr, Function* func, FunctionType* type) +Function* PPUTranslator::Translate(const ppu_function& info) { - if (!m_func_types.emplace(addr, type).second || !m_func_list.emplace(addr, func).second) - { - fmt::throw_exception("AddFunction(0x%08llx: %s) failed: function already exists", addr, func->getName().data()); - } -} - -Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t* bin, void(*custom)(PPUTranslator*)) -{ - m_function = m_func_list[info.addr]; - m_function_type = m_func_types[info.addr]; + m_function = m_module->getFunction(info.name); m_start_addr = info.addr; m_end_addr = info.addr + info.size; - m_blocks.clear(); + std::fill(std::begin(m_globals), std::end(m_globals), nullptr); std::fill(std::begin(m_locals), std::end(m_locals), nullptr); + std::fill(std::begin(m_writes), std::end(m_writes), false); + std::fill(std::begin(m_reads), std::end(m_reads), false); - IRBuilder<> builder(BasicBlock::Create(m_context, "__entry", m_function)); - m_ir = &builder; + /* Create builders */ + IRBuilder<> irb(m_entry = BasicBlock::Create(m_context, "__entry", m_function)); + m_ir = &irb; + + m_body = BasicBlock::Create(m_context, "__body", m_function); + irb.SetInsertPoint(m_body); /* Create context variables */ - //m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(info.addr)); m_thread = &*m_function->getArgumentList().begin(); m_base_loaded = m_ir->CreateLoad(m_base); - - // Non-volatile registers with special meaning (TODO) - if (test(info.attr, ppu_attr::uses_r0)) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 0, ".r0g"); - m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 1, ".spg"); - m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 2, ".rtoc"); - m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 13, ".tls"); - m_g_lr = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 128, ".glr"); - m_gpr[1] = m_ir->CreateAlloca(GetType(), nullptr, ".sp"); - - // Registers used for args or results (TODO) - for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + i, fmt::format(".r%u", i)); - for (u32 i = 1; i <= 13; i++) m_g_fpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 35 + i, fmt::format(".f%u", i)); - for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 67 + i, fmt::format(".v%u", i)); - - /* Create local variables */ - for (u32 i = 0; i < 32; i++) if (!m_gpr[i]) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType(), nullptr, fmt::format(".r%d", i)); - for (u32 i = 0; i < 32; i++) if (!m_fpr[i]) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType(), nullptr, fmt::format(".f%d", i)); - for (u32 i = 0; i < 32; i++) if (!m_vr[i]) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType(), nullptr, 16, fmt::format(".v%d", i))); - - for (u32 i = 0; i < 32; i++) - { - static const char* const names[] - { - "lt", - "gt", - "eq", - "so", - }; - - //m_cr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 99 + i, fmt::format("cr%u.%s", i / 4, names[i % 4])); - m_cr[i] = m_ir->CreateAlloca(GetType(), 0, fmt::format("cr%u.%s", i / 4, names[i % 4])); - } - - m_reg_lr = m_ir->CreateAlloca(GetType(), nullptr, ".lr"); - m_reg_ctr = m_ir->CreateAlloca(GetType(), nullptr, ".ctr"); - m_reg_vrsave = m_ir->CreateAlloca(GetType(), nullptr, ".vrsave"); - - m_xer_so = m_ir->CreateAlloca(GetType(), nullptr, "xer.so"); - m_xer_ov = m_ir->CreateAlloca(GetType(), nullptr, "xer.ov"); - m_xer_ca = m_ir->CreateAlloca(GetType(), nullptr, ".carry"); - m_xer_count = m_ir->CreateAlloca(GetType(), nullptr, "xer.count"); - - m_vscr_nj = m_ir->CreateAlloca(GetType(), nullptr, "vscr.nj"); - m_vscr_sat = m_ir->CreateAlloca(GetType(), nullptr, "vscr.sat"); - - //m_fpscr_fx = m_fpscr[0] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.fx"); - //m_fpscr_ox = m_fpscr[3] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ox"); - //m_fpscr_ux = m_fpscr[4] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ux"); - //m_fpscr_zx = m_fpscr[5] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.zx"); - //m_fpscr_xx = m_fpscr[6] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.xx"); - //m_fpscr_vxsnan = m_fpscr[7] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxsnan"); - //m_fpscr_vxisi = m_fpscr[8] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxisi"); - //m_fpscr_vxidi = m_fpscr[9] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxidi"); - //m_fpscr_vxzdz = m_fpscr[10] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxzdz"); - //m_fpscr_vximz = m_fpscr[11] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vximz"); - //m_fpscr_vxvc = m_fpscr[12] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxvc"); - //m_fpscr_fr = m_fpscr[13] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.fr"); - //m_fpscr_fi = m_fpscr[14] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.fi"); - //m_fpscr_c = m_fpscr[15] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.c"); - m_fpscr_lt = m_fpscr[16] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.lt"); - m_fpscr_gt = m_fpscr[17] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.gt"); - m_fpscr_eq = m_fpscr[18] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.eq"); - m_fpscr_un = m_fpscr[19] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.un"); - //m_fpscr_reserved = m_fpscr[20] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.52"); - //m_fpscr_vxsoft = m_fpscr[21] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxsoft"); - //m_fpscr_vxsqrt = m_fpscr[22] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxsqrt"); - //m_fpscr_vxcvi = m_fpscr[23] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxcvi"); - //m_fpscr_ve = m_fpscr[24] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ve"); - //m_fpscr_oe = m_fpscr[25] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.oe"); - //m_fpscr_ue = m_fpscr[26] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ue"); - //m_fpscr_ze = m_fpscr[27] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ze"); - //m_fpscr_xe = m_fpscr[28] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.xe"); - //m_fpscr_ni = m_fpscr[29] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ni"); - //m_fpscr_rnh = m_fpscr[30] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.rn.msb"); - //m_fpscr_rnl = m_fpscr[31] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.rn.lsb"); - - /* Initialize local variables */ - m_ir->CreateStore(m_ir->CreateLoad(m_g_gpr[1]), m_gpr[1]); // SP - m_ir->CreateStore(m_ir->CreateLoad(m_g_lr), m_reg_lr); // LR - m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO - m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT - m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj); - - // TODO: only loaded r0, r11, r12 (extended arguments for program initialization) - if (!m_g_gpr[0]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 0)), m_gpr[0]); - if (!m_g_gpr[11]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 11)), m_gpr[11]); - if (!m_g_gpr[12]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 12)), m_gpr[12]); - - m_jtr = BasicBlock::Create(m_context, "__jtr", m_function); - - // Create basic blocks - for (auto&& block : info.blocks) - { - if (block.second && block.first >= m_start_addr && block.first < m_end_addr) - { - m_blocks[block.first] = BasicBlock::Create(m_context, fmt::format("loc_%llx", block.first), m_function); - } - } - - // Finalize entry block - m_ir->CreateBr(m_blocks.at(m_start_addr)); // Process blocks - for (auto&& block : info.blocks) + const auto block = std::make_pair(info.addr, info.size); { - if (!m_blocks.count(block.first)) - { - continue; - } - - // Start block - m_ir->SetInsertPoint(m_blocks.at(block.first)); - - // Bloat the beginning of each block: check state - const auto vstate = m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1), true); - const auto vblock = BasicBlock::Create(m_context, fmt::format("l0c_%llx", block.first), m_function); - const auto vcheck = BasicBlock::Create(m_context, fmt::format("lcc_%llx", block.first), m_function); - - m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), vblock, vcheck, m_md_unlikely); - m_ir->SetInsertPoint(vcheck); - Call(GetType(), "__check", m_thread, m_ir->getInt64(block.first)); - m_ir->CreateBr(vblock); - m_ir->SetInsertPoint(vblock); - // Process the instructions for (m_current_addr = block.first; m_current_addr < block.first + block.second; m_current_addr += 4) { - if (m_ir->GetInsertBlock()->getTerminator()) + if (m_body->getTerminator()) { break; } - const u32 op = *(m_bin = bin + (m_current_addr - m_start_addr) / sizeof(u32)); + const u32 op = vm::ps3::read32(vm::cast(m_current_addr)); (this->*(s_ppu_decoder.decode(op)))({op}); } // Finalize current block if necessary (create branch to the next address) - if (!m_ir->GetInsertBlock()->getTerminator()) + if (!m_body->getTerminator()) { - if (m_blocks.count(m_current_addr)) - { - m_ir->CreateBr(m_blocks.at(m_current_addr)); - } - else - { - Call(GetType(), "__end", m_ir->getInt64(m_current_addr)); - m_ir->CreateUnreachable(); - } - } - } - - // Run custom IR generation function (TODO) - if (custom) custom(this); - - m_ir->SetInsertPoint(m_jtr); - - if (m_jtr->use_empty()) - { - m_ir->CreateUnreachable(); - } - else - { - const auto _ctr = m_ir->CreateLoad(m_reg_ctr); - const auto _default = BasicBlock::Create(m_context, "__jtr.def", m_function); - const auto _switch = m_ir->CreateSwitch(_ctr, _default, ::size32(m_blocks)); - - for (const auto& pair : m_blocks) - { - _switch->addCase(m_ir->getInt64(pair.first), pair.second); - } - - m_ir->SetInsertPoint(_default); - CallFunction(0, true, _ctr); - } - - for (auto&& block : *m_function) - { - if (!block.getTerminator()) - { - m_ir->SetInsertPoint(&block); - m_ir->CreateUnreachable(); + FlushRegisters(); + CallFunction(m_current_addr); } } @@ -347,15 +187,17 @@ Value* PPUTranslator::RotateLeft(Value* arg, Value* n) return m_ir->CreateOr(m_ir->CreateShl(arg, m_ir->CreateAnd(n, mask)), m_ir->CreateLShr(arg, m_ir->CreateAnd(m_ir->CreateNeg(n), mask))); } -void PPUTranslator::CallFunction(u64 target, bool tail, Value* indirect) +void PPUTranslator::CallFunction(u64 target, Value* indirect) { - const auto func = indirect ? nullptr : m_func_list[target]; - - const auto callee_type = func ? m_func_types[target] : nullptr; - - if (func) + if (!indirect) { - m_ir->CreateCall(func, {m_thread}); + if (target < 0x10000 || target >= -0x10000) + { + Trap(m_current_addr); + return; + } + + m_ir->CreateCall(m_module->getOrInsertFunction(fmt::format("__0x%llx", target), FunctionType::get(GetType(), {m_thread_type->getPointerTo()}, false)), {m_thread}); } else { @@ -365,48 +207,64 @@ void PPUTranslator::CallFunction(u64 target, bool tail, Value* indirect) m_ir->CreateCall(m_ir->CreateIntToPtr(m_ir->CreateLoad(ptr), FunctionType::get(GetType(), {m_thread_type->getPointerTo()}, false)->getPointerTo()), {m_thread}); } - if (!tail) - { - UndefineVolatileRegisters(); - } - - if (tail) - { - m_ir->CreateRetVoid(); - } + m_ir->CreateRetVoid(); } -void PPUTranslator::UndefineVolatileRegisters() +void PPUTranslator::FlushRegisters() { - const auto undef_i64 = GetUndef(); - const auto undef_f64 = GetUndef(); - const auto undef_vec = GetUndef(); - const auto undef_bool = GetUndef(); + if (m_entry->getTerminator()) + { + return; + } - // Undefine local volatile registers - SetGpr(0, undef_i64); // r0 - SetFpr(0, undef_f64); // f0: volatile scratch register - SetVr(0, undef_vec); // v0: volatile scratch register - SetVr(1, undef_vec); // v1: volatile scratch register + auto process = [&](Value*& local, u32 index) + { + // Create pointer to the global variable + m_ir->SetInsertPoint(m_entry); + const auto ptr = m_ir->CreateStructGEP(nullptr, m_thread, index); - m_ir->CreateStore(undef_i64, m_reg_lr); // LR - m_ir->CreateStore(undef_i64, m_reg_ctr); // CTR - m_ir->CreateStore(undef_bool, m_xer_ca); // XER.CA + // Load variable if necessary + if (m_reads[&local - m_locals]) + { + m_ir->CreateStore(m_ir->CreateLoad(ptr), local); + } - m_ir->CreateStore(undef_bool, m_fpscr_lt); - m_ir->CreateStore(undef_bool, m_fpscr_gt); - m_ir->CreateStore(undef_bool, m_fpscr_eq); - m_ir->CreateStore(undef_bool, m_fpscr_un); + m_ir->SetInsertPoint(m_body); - SetCrField(0, undef_bool, undef_bool, undef_bool, undef_bool); // cr0 - SetCrField(1, undef_bool, undef_bool, undef_bool, undef_bool); // cr1 - SetCrField(5, undef_bool, undef_bool, undef_bool, undef_bool); // cr5 - SetCrField(6, undef_bool, undef_bool, undef_bool, undef_bool); // cr6 - SetCrField(7, undef_bool, undef_bool, undef_bool, undef_bool); // cr7 + // Store variable if necessary + if (m_writes[&local - m_locals]) + { + m_ir->CreateStore(m_ir->CreateLoad(local), ptr); + } - // Cannot undef sticky flags because it makes |= op meaningless - //m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO - //m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT + // Save global + m_globals[&local - m_locals] = ptr; + }; + + + for (u32 i = 0; i < 32; i++) if (m_gpr[i]) process(m_gpr[i], 3 + i); + for (u32 i = 0; i < 32; i++) if (m_fpr[i]) process(m_fpr[i], 35 + i); + for (u32 i = 0; i < 32; i++) if (m_vr[i]) process(m_vr[i], 67 + i); + for (u32 i = 0; i < 32; i++) if (m_cr[i]) process(m_cr[i], 99 + i); + if (m_lr) process(m_lr, 131); + if (m_ctr) process(m_ctr, 132); + if (m_vrsave) process(m_vrsave, 133); + if (m_so) process(m_so, 135); + if (m_ov) process(m_ov, 136); + if (m_ca) process(m_ca, 137); + if (m_cnt) process(m_cnt, 138); + if (m_sat) process(m_sat, 139); + if (m_nj) process(m_nj, 140); + for (u32 i = 16; i < 20; i++) if (m_fc[i]) process(m_fc[i], 141 + i - 16); + + m_ir->SetInsertPoint(m_entry); + const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(nullptr, m_thread, 1), true); + const auto vcheck = BasicBlock::Create(m_context, "__test", m_function); + m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), m_body, vcheck, m_md_likely); + m_ir->SetInsertPoint(vcheck); + Call(GetType(), "__check", m_thread, m_ir->getInt64(m_start_addr)); + m_ir->CreateRetVoid(); + m_ir->SetInsertPoint(m_body); } Value* PPUTranslator::Solid(Value* value) @@ -565,10 +423,15 @@ Value* PPUTranslator::Trunc(Value* value, Type* type) void PPUTranslator::UseCondition(MDNode* hint, Value* cond) { + FlushRegisters(); + if (cond) { - const auto local = BasicBlock::Create(m_context, fmt::format("loc_%llx.cond", m_current_addr/* - m_start_addr*/), m_function); - m_ir->CreateCondBr(cond, local, m_blocks.at(m_current_addr + 4), hint); + const auto local = BasicBlock::Create(m_context, fmt::format("loc_%llx.cond", m_current_addr), m_function); + const auto next = BasicBlock::Create(m_context, fmt::format("loc_%llx.next", m_current_addr), m_function); + m_ir->CreateCondBr(cond, local, next, hint); + m_ir->SetInsertPoint(next); + CallFunction(m_current_addr + 4); m_ir->SetInsertPoint(local); } } @@ -618,15 +481,15 @@ void PPUTranslator::CompilationError(const std::string& error) void PPUTranslator::MFVSCR(ppu_opcode_t op) { - const auto vscr = m_ir->CreateOr(ZExt(m_ir->CreateLoad(m_vscr_sat), GetType()), m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_vscr_nj), GetType()), 16)); + const auto vscr = m_ir->CreateOr(ZExt(RegLoad(m_sat), GetType()), m_ir->CreateShl(ZExt(RegLoad(m_nj), GetType()), 16)); SetVr(op.vd, m_ir->CreateInsertElement(ConstantVector::getSplat(4, m_ir->getInt32(0)), vscr, m_ir->getInt32(m_is_be ? 3 : 0))); } void PPUTranslator::MTVSCR(ppu_opcode_t op) { const auto vscr = m_ir->CreateExtractElement(GetVr(op.vb, VrType::vi32), m_ir->getInt32(m_is_be ? 3 : 0)); - m_ir->CreateStore(Trunc(m_ir->CreateLShr(vscr, 16), GetType()), m_vscr_nj); - m_ir->CreateStore(Trunc(vscr, GetType()), m_vscr_sat); + RegStore(Trunc(m_ir->CreateLShr(vscr, 16), GetType()), m_nj); + RegStore(Trunc(vscr, GetType()), m_sat); } void PPUTranslator::VADDCUW(ppu_opcode_t op) @@ -1746,36 +1609,21 @@ void PPUTranslator::ADDIS(ppu_opcode_t op) void PPUTranslator::BC(ppu_opcode_t op) { - const u64 target = (op.aa ? 0 : m_current_addr) + op.bt14; - - const auto cond = CheckBranchCondition(op.bo, op.bi); - - if ((target > m_start_addr && target < m_end_addr) || (target == m_start_addr && !op.lk)) + if (op.lk) { - // Local branch - - if (op.lk && target != m_current_addr) - { - CompilationError("BCL: local branch"); - Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); - m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_reg_lr); - } - else if (cond) - { - m_ir->CreateCondBr(cond, m_blocks.at(target), m_blocks.at(m_current_addr + 4), CheckBranchProbability(op.bo)); - return; - } - else - { - m_ir->CreateBr(m_blocks.at(target)); - return; - } + RegInit(m_lr); } - // External branch - UseCondition(CheckBranchProbability(op.bo), cond); - if (op.lk) m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_g_lr); - CallFunction(target, !op.lk); + const u64 target = (op.aa ? 0 : m_current_addr) + op.bt14; + + UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi)); + + if (op.lk) + { + m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_g_lr); + } + + CallFunction(target); } void PPUTranslator::SC(ppu_opcode_t op) @@ -1785,34 +1633,24 @@ void PPUTranslator::SC(ppu_opcode_t op) return UNK(op); } - Call(GetType(), op.lev ? "__lv1call" : "__syscall", m_thread, m_ir->CreateLoad(m_gpr[11])); - UndefineVolatileRegisters(); + const auto num = GetGpr(11); + FlushRegisters(); + m_ir->CreateStore(m_ir->getInt32(m_current_addr), m_ir->CreateStructGEP(nullptr, m_thread, 134)); + Call(GetType(), op.lev ? "__lv1call" : "__syscall", m_thread, num); + m_ir->CreateRetVoid(); } void PPUTranslator::B(ppu_opcode_t op) { const u64 target = (op.aa ? 0 : m_current_addr) + op.bt24; - if ((target > m_start_addr && target < m_end_addr) || (target == m_start_addr && !op.lk)) + if (op.lk) { - // Local branch - - if (op.lk && target != m_current_addr) - { - CompilationError("BL: local branch"); - Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); - m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_reg_lr); - } - else - { - m_ir->CreateBr(m_blocks.at(target)); - return; - } + RegStore(m_ir->getInt64(m_current_addr + 4), m_lr); } - - // External branch or recursive call - if (op.lk) m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_g_lr); - CallFunction(target, !op.lk); + + FlushRegisters(); + CallFunction(target); } void PPUTranslator::MCRF(ppu_opcode_t op) @@ -1826,19 +1664,16 @@ void PPUTranslator::MCRF(ppu_opcode_t op) void PPUTranslator::BCLR(ppu_opcode_t op) { + const auto target = RegLoad(m_lr); + UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi)); if (op.lk) { - // Sort of indirect call m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_g_lr); - CallFunction(0, false, m_ir->CreateLoad(m_reg_lr)); - } - else - { - // Simple return - m_ir->CreateRetVoid(); } + + CallFunction(0, target); } void PPUTranslator::CRNOR(ppu_opcode_t op) @@ -1892,63 +1727,21 @@ void PPUTranslator::CROR(ppu_opcode_t op) void PPUTranslator::BCCTR(ppu_opcode_t op) { + if (op.lk) + { + RegInit(m_lr); + } + + const auto target = RegLoad(m_ctr); + UseCondition(CheckBranchProbability(op.bo | 0x4), CheckBranchCondition(op.bo | 0x4, op.bi)); - // Jumptable: sorted set of possible targets - std::set targets; - - // Detect a possible jumptable - for (u64 jt_addr = m_current_addr + sizeof(u32), addr = jt_addr; addr < m_end_addr; addr += sizeof(u32)) + if (op.lk) { - const u64 target = jt_addr + static_cast(*++m_bin); - - if (target == jt_addr) - { - break; - } - - if (target % 4 || target < m_start_addr || target >= m_end_addr) - { - break; - } - - targets.emplace(target); - } - - if (!op.lk) - { - if (!targets.empty()) - { - // Create switch with special default case - const auto _default = BasicBlock::Create(m_context, fmt::format("loc_%llx.def", m_current_addr/* - m_start_addr*/), m_function); - const auto _switch = m_ir->CreateSwitch(m_ir->CreateLoad(m_reg_ctr), _default, ::size32(targets)); - - for (const u64 target : targets) - { - _switch->addCase(m_ir->getInt64(target), m_blocks.at(target)); - } - - m_ir->SetInsertPoint(_default); - Call(GetType(), "__end", m_ir->getInt64(m_current_addr)); - m_ir->CreateUnreachable(); - } - else - { - // Indirect branch - m_ir->CreateBr(m_jtr); - } - } - else - { - if (!targets.empty()) - { - CompilationError("BCCTRL with a jumptable"); - } - - // Indirect call m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_g_lr); - CallFunction(0, false, m_ir->CreateLoad(m_reg_ctr)); } + + CallFunction(0, target); } void PPUTranslator::RLWIMI(ppu_opcode_t op) @@ -2260,7 +2053,7 @@ void PPUTranslator::CMP(ppu_opcode_t op) void PPUTranslator::TW(ppu_opcode_t op) { - if (op.opcode != ppu_instructions::TRAP()) UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra, 32), GetGpr(op.rb, 32))); + UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra, 32), GetGpr(op.rb, 32))); Trap(m_current_addr); } @@ -2780,19 +2573,19 @@ void PPUTranslator::MFSPR(ppu_opcode_t op) switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5)) { case 0x001: // MFXER - result = ZExt(m_ir->CreateLoad(m_xer_count), GetType()); - result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_xer_so), GetType()), 29)); - result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_xer_ov), GetType()), 30)); - result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_xer_ca), GetType()), 31)); + result = ZExt(RegLoad(m_cnt), GetType()); + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_so), GetType()), 29)); + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_ov), GetType()), 30)); + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_ca), GetType()), 31)); break; case 0x008: // MFLR - result = m_ir->CreateLoad(m_reg_lr); + result = RegLoad(m_lr); break; case 0x009: // MFCTR - result = m_ir->CreateLoad(m_reg_ctr); + result = RegLoad(m_ctr); break; case 0x100: - result = ZExt(m_ir->CreateLoad(m_reg_vrsave)); + result = ZExt(RegLoad(m_vrsave)); break; case 0x10C: // MFTB result = Call(GetType(), m_pure_attr, "__get_tb"); @@ -2924,19 +2717,19 @@ void PPUTranslator::MTSPR(ppu_opcode_t op) switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5)) { case 0x001: // MTXER - m_ir->CreateStore(Trunc(m_ir->CreateLShr(value, 31), GetType()), m_xer_ca); - m_ir->CreateStore(Trunc(m_ir->CreateLShr(value, 30), GetType()), m_xer_ov); - m_ir->CreateStore(Trunc(m_ir->CreateLShr(value, 29), GetType()), m_xer_so); - m_ir->CreateStore(Trunc(value, GetType()), m_xer_count); + RegStore(Trunc(m_ir->CreateLShr(value, 31), GetType()), m_ca); + RegStore(Trunc(m_ir->CreateLShr(value, 30), GetType()), m_ov); + RegStore(Trunc(m_ir->CreateLShr(value, 29), GetType()), m_so); + RegStore(Trunc(value, GetType()), m_cnt); break; case 0x008: // MTLR - m_ir->CreateStore(value, m_reg_lr); + RegStore(value, m_lr); break; case 0x009: // MTCTR - m_ir->CreateStore(value, m_reg_ctr); + RegStore(value, m_ctr); break; case 0x100: - m_ir->CreateStore(Trunc(value), m_reg_vrsave); + RegStore(Trunc(value), m_vrsave); break; default: Call(GetType(), fmt::format("__mtspr_%u", n), value); @@ -2990,7 +2783,7 @@ void PPUTranslator::LDBRX(ppu_opcode_t op) void PPUTranslator::LSWX(ppu_opcode_t op) { - Call(GetType(), "__lswx", m_ir->getInt32(op.rd), m_ir->CreateLoad(m_xer_count), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); + Call(GetType(), "__lswx", m_ir->getInt32(op.rd), RegLoad(m_cnt), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); } void PPUTranslator::LWBRX(ppu_opcode_t op) @@ -3103,7 +2896,7 @@ void PPUTranslator::STDBRX(ppu_opcode_t op) void PPUTranslator::STSWX(ppu_opcode_t op) { - Call(GetType(), "__stswx", m_ir->getInt32(op.rs), m_ir->CreateLoad(m_xer_count), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); + Call(GetType(), "__stswx", m_ir->getInt32(op.rs), RegLoad(m_cnt), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); } void PPUTranslator::STWBRX(ppu_opcode_t op) @@ -3651,7 +3444,7 @@ void PPUTranslator::MTFSB1(ppu_opcode_t op) SetFPSCRBit(op.crbd, m_ir->getTrue(), true); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::MCRFS(ppu_opcode_t op) @@ -3671,7 +3464,7 @@ void PPUTranslator::MTFSB0(ppu_opcode_t op) SetFPSCRBit(op.crbd, m_ir->getFalse(), false); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::MTFSFI(ppu_opcode_t op) @@ -3683,7 +3476,7 @@ void PPUTranslator::MTFSFI(ppu_opcode_t op) if (op.crfd != 0) SetFPSCRBit(op.crfd * 4 + 2, m_ir->getInt1((op.i & 2) != 0), false); SetFPSCRBit(op.crfd * 4 + 3, m_ir->getInt1((op.i & 1) != 0), false); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::MFFS(ppu_opcode_t op) @@ -3692,17 +3485,14 @@ void PPUTranslator::MFFS(ppu_opcode_t op) Value* result = m_ir->getInt64(0); - for (u32 i = 0; i < 32; i++) + for (u32 i = 16; i < 20; i++) { - if (const auto bit = m_fpscr[i] ? m_ir->CreateLoad(m_fpscr[i]) : GetFPSCRBit(i)) - { - result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(bit, GetType()), i ^ 31)); - } + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_fc[i]), GetType()), i ^ 31)); } SetFpr(op.frd, result); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::MTFSF(ppu_opcode_t op) @@ -3711,7 +3501,7 @@ void PPUTranslator::MTFSF(ppu_opcode_t op) const auto value = GetFpr(op.frb, 32, true); - for (u32 i = 0; i < 32; i++) + for (u32 i = 16; i < 20; i++) { if (i != 1 && i != 2 && (op.flm & (128 >> (i / 4))) != 0) { @@ -3719,7 +3509,7 @@ void PPUTranslator::MTFSF(ppu_opcode_t op) } } - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::FCMPU(ppu_opcode_t op) @@ -3844,7 +3634,7 @@ void PPUTranslator::FSEL(ppu_opcode_t op) const auto c = GetFpr(op.frc); SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(a, ConstantFP::get(GetType(), 0.0)), c, b)); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::FMUL(ppu_opcode_t op) @@ -3971,28 +3761,28 @@ void PPUTranslator::FNEG(ppu_opcode_t op) const auto b = GetFpr(op.frb); SetFpr(op.frd, m_ir->CreateFNeg(b)); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::FMR(ppu_opcode_t op) { SetFpr(op.frd, GetFpr(op.frb)); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::FNABS(ppu_opcode_t op) { SetFpr(op.frd, m_ir->CreateFNeg(Call(GetType(), "llvm.fabs.f64", GetFpr(op.frb)))); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::FABS(ppu_opcode_t op) { SetFpr(op.frd, Call(GetType(), "llvm.fabs.f64", GetFpr(op.frb))); - if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); + if (op.rc) SetCrFieldFPCC(1); } void PPUTranslator::FCTID(ppu_opcode_t op) @@ -4031,34 +3821,25 @@ void PPUTranslator::FCFID(ppu_opcode_t op) void PPUTranslator::UNK(ppu_opcode_t op) { - CompilationError(fmt::format("Unknown/illegal opcode 0x%08x", op.opcode)); - m_ir->CreateUnreachable(); + FlushRegisters(); + Call(GetType(), "__error", m_thread, m_ir->getInt64(m_current_addr), m_ir->getInt32(op.opcode)); + m_ir->CreateRetVoid(); } Value* PPUTranslator::GetGpr(u32 r, u32 num_bits) { - return m_ir->CreateTrunc(m_ir->CreateLoad(m_gpr[r]), m_ir->getIntNTy(num_bits)); + return m_ir->CreateTrunc(RegLoad(m_gpr[r]), m_ir->getIntNTy(num_bits)); } void PPUTranslator::SetGpr(u32 r, Value* value) { - const auto i64_val = m_ir->CreateZExt(value, GetType()); - - if (true) // Update local: all regs - { - m_ir->CreateStore(i64_val, m_gpr[r]); - } - - if (r == 1) // Update global: SP - { - m_ir->CreateStore(i64_val, m_g_gpr[r]); - } + RegStore(m_ir->CreateZExt(value, GetType()), m_gpr[r]); } Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int) { - const auto value = m_ir->CreateAlignedLoad(m_fpr[r], 8); + const auto value = RegLoad(m_fpr[r]); if (!as_int && bits == 64) { @@ -4081,11 +3862,15 @@ void PPUTranslator::SetFpr(u32 r, Value* val) val->getType() == GetType() ? m_ir->CreateBitCast(val, GetType()) : val->getType() == GetType() ? m_ir->CreateFPExt(val, GetType()) : val; - m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8); + RegStore(f64_val, m_fpr[r]); } Value* PPUTranslator::GetVr(u32 vr, VrType type) { + RegInit(m_vr[vr]); + + m_reads[&m_vr[vr] - m_locals] = true; + const auto value = m_ir->CreateAlignedLoad(m_vr[vr], 16); switch (type) @@ -4102,6 +3887,10 @@ Value* PPUTranslator::GetVr(u32 vr, VrType type) void PPUTranslator::SetVr(u32 vr, Value* value) { + RegInit(m_vr[vr]); + + m_writes[&m_vr[vr] - m_locals] = true; + const auto type = value->getType(); const auto size = type->getPrimitiveSizeInBits(); @@ -4124,12 +3913,12 @@ void PPUTranslator::SetVr(u32 vr, Value* value) Value* PPUTranslator::GetCrb(u32 crb) { - return m_ir->CreateLoad(m_cr[crb]); + return RegLoad(m_cr[crb]); } void PPUTranslator::SetCrb(u32 crb, Value* value) { - m_ir->CreateStore(value, m_cr[crb]); + RegStore(value, m_cr[crb]); } void PPUTranslator::SetCrField(u32 group, Value* lt, Value* gt, Value* eq, Value* so) @@ -4137,7 +3926,7 @@ void PPUTranslator::SetCrField(u32 group, Value* lt, Value* gt, Value* eq, Value SetCrb(group * 4 + 0, lt ? lt : GetUndef()); SetCrb(group * 4 + 1, gt ? gt : GetUndef()); SetCrb(group * 4 + 2, eq ? eq : GetUndef()); - SetCrb(group * 4 + 3, so ? so : m_ir->CreateLoad(m_xer_so)); + SetCrb(group * 4 + 3, so ? so : RegLoad(m_so)); } void PPUTranslator::SetCrFieldSignedCmp(u32 n, Value* a, Value* b) @@ -4156,12 +3945,17 @@ void PPUTranslator::SetCrFieldUnsignedCmp(u32 n, Value* a, Value* b) SetCrField(n, lt, gt, eq); } +void PPUTranslator::SetCrFieldFPCC(u32 n) +{ + SetCrField(n, GetFPSCRBit(16), GetFPSCRBit(17), GetFPSCRBit(18), GetFPSCRBit(19)); +} + void PPUTranslator::SetFPCC(Value* lt, Value* gt, Value* eq, Value* un, bool set_cr) { - m_ir->CreateStore(lt, m_fpscr_lt); - m_ir->CreateStore(gt, m_fpscr_gt); - m_ir->CreateStore(eq, m_fpscr_eq); - m_ir->CreateStore(un, m_fpscr_un); + SetFPSCRBit(16, lt, false); + SetFPSCRBit(17, gt, false); + SetFPSCRBit(18, eq, false); + SetFPSCRBit(19, un, false); if (set_cr) SetCrField(1, lt, gt, eq, un); } @@ -4189,101 +3983,101 @@ void PPUTranslator::SetFPRF(Value* value, bool set_cr) void PPUTranslator::SetFPSCR_FR(Value* value) { - m_ir->CreateStore(value, m_fpscr_fr); + //m_ir->CreateStore(value, m_fpscr_fr); } void PPUTranslator::SetFPSCR_FI(Value* value) { - m_ir->CreateStore(value, m_fpscr_fi); - SetFPSCRException(m_fpscr_xx, value); + //m_ir->CreateStore(value, m_fpscr_fi); + //SetFPSCRException(m_fpscr_xx, value); } void PPUTranslator::SetFPSCRException(Value* ptr, Value* value) { - m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(ptr), value), ptr); - m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx); + //m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(ptr), value), ptr); + //m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx); } Value* PPUTranslator::GetFPSCRBit(u32 n) { - if (n == 1 && m_fpscr[24]) - { - // Floating-Point Enabled Exception Summary (FEX) 24-29 - Value* value = m_ir->CreateLoad(m_fpscr[24]); - for (u32 i = 25; i <= 29; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); - return value; - } + //if (n == 1 && m_fpscr[24]) + //{ + // // Floating-Point Enabled Exception Summary (FEX) 24-29 + // Value* value = m_ir->CreateLoad(m_fpscr[24]); + // for (u32 i = 25; i <= 29; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); + // return value; + //} - if (n == 2 && m_fpscr[7]) - { - // Floating-Point Invalid Operation Exception Summary (VX) 7-12, 21-23 - Value* value = m_ir->CreateLoad(m_fpscr[7]); - for (u32 i = 8; i <= 12; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); - for (u32 i = 21; i <= 23; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); - return value; - } + //if (n == 2 && m_fpscr[7]) + //{ + // // Floating-Point Invalid Operation Exception Summary (VX) 7-12, 21-23 + // Value* value = m_ir->CreateLoad(m_fpscr[7]); + // for (u32 i = 8; i <= 12; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); + // for (u32 i = 21; i <= 23; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); + // return value; + //} - if (n >= 32 || !m_fpscr[n]) + if (n < 16 || n > 19) { return nullptr; // ??? } // Get bit - const auto value = m_ir->CreateLoad(m_fpscr[n]); + const auto value = RegLoad(m_fc[n]); - if (n == 0 || (n >= 3 && n <= 12) || (n >= 21 && n <= 23)) - { - // Clear FX or exception bits - m_ir->CreateStore(m_ir->getFalse(), m_fpscr[n]); - } + //if (n == 0 || (n >= 3 && n <= 12) || (n >= 21 && n <= 23)) + //{ + // // Clear FX or exception bits + // m_ir->CreateStore(m_ir->getFalse(), m_fpscr[n]); + //} return value; } void PPUTranslator::SetFPSCRBit(u32 n, Value* value, bool update_fx) { - if (n >= 32 || !m_fpscr[n]) + if (n < 16 || n > 19) { //CompilationError("SetFPSCRBit(): inaccessible bit " + std::to_string(n)); return; // ??? } - if (update_fx) - { - if ((n >= 3 && n <= 12) || (n >= 21 && n <= 23)) - { - // Update FX bit if necessary - m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx); - } - } + //if (update_fx) + //{ + // if ((n >= 3 && n <= 12) || (n >= 21 && n <= 23)) + // { + // // Update FX bit if necessary + // m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx); + // } + //} //if (n >= 24 && n <= 28) CompilationError("SetFPSCRBit: exception enable bit " + std::to_string(n)); //if (n == 29) CompilationError("SetFPSCRBit: NI bit"); //if (n >= 30) CompilationError("SetFPSCRBit: RN bit"); // Store the bit - m_ir->CreateStore(value, m_fpscr[n]); + RegStore(value, m_fc[n]); } Value* PPUTranslator::GetCarry() { - return m_ir->CreateLoad(m_xer_ca); + return RegLoad(m_ca); } void PPUTranslator::SetCarry(Value* bit) { - m_ir->CreateStore(bit, m_xer_ca); + RegStore(bit, m_ca); } void PPUTranslator::SetOverflow(Value* bit) { - m_ir->CreateStore(bit, m_xer_ov); - m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_xer_so), bit), m_xer_so); + RegStore(bit, m_ov); + RegStore(m_ir->CreateOr(RegLoad(m_so), bit), m_so); } void PPUTranslator::SetSat(Value* bit) { - m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_vscr_sat), bit), m_vscr_sat); + RegStore(m_ir->CreateOr(RegLoad(m_sat), bit), m_sat); } Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right) @@ -4297,9 +4091,10 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right) return trap_condition; } -Value* PPUTranslator::Trap(u64 addr) +void PPUTranslator::Trap(u64 addr) { - return Call(GetType(), /*AttributeSet::get(m_context, AttributeSet::FunctionIndex, Attribute::NoReturn),*/ "__trap", m_ir->getInt64(m_current_addr)); + Call(GetType(), "__trap", m_thread, m_ir->getInt64(m_current_addr)); + m_ir->CreateRetVoid(); } Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi) @@ -4310,10 +4105,10 @@ Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi) const bool bo3 = (bo & 0x02) != 0; // Decrement counter if necessary - const auto ctr = bo2 ? nullptr : m_ir->CreateSub(m_ir->CreateLoad(m_reg_ctr), m_ir->getInt64(1)); + const auto ctr = bo2 ? nullptr : m_ir->CreateSub(RegLoad(m_ctr), m_ir->getInt64(1)); // Store counter if necessary - if (ctr) m_ir->CreateStore(ctr, m_reg_ctr); + if (ctr) RegStore(ctr, m_ctr); // Generate counter condition const auto use_ctr = bo2 ? nullptr : m_ir->CreateICmp(bo3 ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, ctr, m_ir->getInt64(0)); diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 43c616f9ad..8f33f0020b 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -119,36 +119,18 @@ class PPUTranslator final //: public CPUTranslator // Attributes for function calls which are "pure" and may be optimized away if their results are unused const llvm::AttributeSet m_pure_attr; - // Available functions: types (not set or nullptr for untyped) - std::unordered_map m_func_types; - - // Available functions - std::unordered_map m_func_list; - - // LLVM IR builder + // IR builder llvm::IRBuilder<>* m_ir; // LLVM function llvm::Function* m_function; - // LLVM function type (may be null) - llvm::FunctionType* m_function_type; - // Function range u64 m_start_addr, m_end_addr, m_current_addr; - // Basic blocks for current function - std::unordered_map m_blocks; - - // JT resolver block - llvm::BasicBlock* m_jtr; - llvm::MDNode* m_md_unlikely; llvm::MDNode* m_md_likely; - // Current binary data - be_t* m_bin{}; - /* Variables */ // Memory base @@ -161,62 +143,65 @@ class PPUTranslator final //: public CPUTranslator // Callable functions llvm::Value* m_call; + // Main block + llvm::BasicBlock* m_body; + llvm::BasicBlock* m_entry; + // Thread context struct llvm::StructType* m_thread_type; - llvm::Value* m_globals[96]{}; - llvm::Value** const m_g_gpr = m_globals + 0; - llvm::Value** const m_g_fpr = m_globals + 32; - llvm::Value** const m_g_vr = m_globals + 64; - - llvm::Value* m_locals[96]{}; + llvm::Value* m_globals[169]; + llvm::Value* m_locals[169]; llvm::Value** const m_gpr = m_locals + 0; llvm::Value** const m_fpr = m_locals + 32; llvm::Value** const m_vr = m_locals + 64; + llvm::Value** const m_cr = m_locals + 96; + llvm::Value** const m_fc = m_locals + 128; - llvm::Value* m_cr[32]{}; - llvm::Value* m_g_lr; - llvm::Value* m_reg_lr; - llvm::Value* m_reg_ctr; // CTR register (counter) - llvm::Value* m_reg_vrsave; - llvm::Value* m_xer_so; // XER.SO bit, summary overflow - llvm::Value* m_xer_ov; // XER.OV bit, overflow flag - llvm::Value* m_xer_ca; // XER.CA bit, carry flag - llvm::Value* m_xer_count; - llvm::Value* m_vscr_nj; // VSCR.NJ bit, non-Java mode - llvm::Value* m_vscr_sat; // VSCR.SAT bit, sticky saturation flag + std::array m_writes; + std::array m_reads; - llvm::Value* m_fpscr[32]{}; - llvm::Value* m_fpscr_fx; // bit 32 (first) - llvm::Value* m_fpscr_ox; // bit 35 (4th) - llvm::Value* m_fpscr_ux; - llvm::Value* m_fpscr_zx; - llvm::Value* m_fpscr_xx; - llvm::Value* m_fpscr_vxsnan; - llvm::Value* m_fpscr_vxisi; - llvm::Value* m_fpscr_vxidi; - llvm::Value* m_fpscr_vxzdz; - llvm::Value* m_fpscr_vximz; - llvm::Value* m_fpscr_vxvc; - llvm::Value* m_fpscr_fr; - llvm::Value* m_fpscr_fi; - llvm::Value* m_fpscr_c; - llvm::Value* m_fpscr_lt; - llvm::Value* m_fpscr_gt; - llvm::Value* m_fpscr_eq; - llvm::Value* m_fpscr_un; - llvm::Value* m_fpscr_reserved; - llvm::Value* m_fpscr_vxsoft; - llvm::Value* m_fpscr_vxsqrt; - llvm::Value* m_fpscr_vxcvi; - llvm::Value* m_fpscr_ve; - llvm::Value* m_fpscr_oe; - llvm::Value* m_fpscr_ue; - llvm::Value* m_fpscr_ze; - llvm::Value* m_fpscr_xe; - llvm::Value* m_fpscr_ni; - llvm::Value* m_fpscr_rnh; // RN high bit - llvm::Value* m_fpscr_rnl; // RN low bit +#define DEF_VALUE(loc, glb, pos)\ + llvm::Value*& loc = m_locals[pos];\ + llvm::Value*& glb = m_globals[pos]; + + DEF_VALUE(m_lr, m_g_lr, 160); + DEF_VALUE(m_ctr, m_g_ctr, 161); // CTR register (counter) + DEF_VALUE(m_vrsave, m_g_vrsave, 162); + DEF_VALUE(m_so, m_g_so, 163); // XER.SO bit, summary overflow + DEF_VALUE(m_ov, m_g_ov, 164); // XER.OV bit, overflow flag + DEF_VALUE(m_ca, m_g_ca, 165); // XER.CA bit, carry flag + DEF_VALUE(m_cnt, m_g_cnt, 166); + DEF_VALUE(m_nj, m_g_nj, 167); // VSCR.NJ bit, non-Java mode + DEF_VALUE(m_sat, m_g_sat, 168); // VSCR.SAT bit, sticky saturation flag + +#undef DEF_VALUE + + template + void RegInit(llvm::Value*& local) + { + if (!local) + { + local = new llvm::AllocaInst(GetType(), nullptr, sizeof(T)); + m_entry->getInstList().push_back(llvm::cast(local)); + } + } + + template + llvm::Value* RegLoad(llvm::Value*& local) + { + RegInit(local); + m_reads.at(&local - m_locals) = true; + return m_ir->CreateLoad(local); + } + + template + void RegStore(llvm::Value* value, llvm::Value*& local) + { + RegInit(local); + m_writes.at(&local - m_locals) = true; + m_ir->CreateStore(value, local); + } public: @@ -233,10 +218,10 @@ public: llvm::Value* RotateLeft(llvm::Value* arg, llvm::Value* n); // Emit function call - void CallFunction(u64 target, bool tail, llvm::Value* indirect = nullptr); + void CallFunction(u64 target, llvm::Value* indirect = nullptr); - // Set some registers to undef (after function call) - void UndefineVolatileRegisters(); + // Write global registers + void FlushRegisters(); // Load gpr llvm::Value* GetGpr(u32 r, u32 num_bits = 64); @@ -339,6 +324,9 @@ public: // Set CR field based on unsigned comparison void SetCrFieldUnsignedCmp(u32 n, llvm::Value* a, llvm::Value* b); + // Set CR field from FPSCR CC fieds + void SetCrFieldFPCC(u32 n); + // Set FPSCR CC fields provided, optionally updating CR1 void SetFPCC(llvm::Value* lt, llvm::Value* gt, llvm::Value* eq, llvm::Value* un, bool set_cr = false); @@ -376,7 +364,7 @@ public: llvm::Value* CheckTrapCondition(u32 to, llvm::Value* left, llvm::Value* right); // Emit trap - llvm::Value* Trap(u64 addr); + void Trap(u64 addr); // Get condition for branch instructions llvm::Value* CheckBranchCondition(u32 bo, u32 bi); @@ -440,11 +428,8 @@ public: // Get thread context struct type llvm::Type* GetContextType(); - // Add function - void AddFunction(u64 addr, llvm::Function* func, llvm::FunctionType* type = nullptr); - // Parses PPU opcodes and translate them into LLVM IR - llvm::Function* TranslateToIR(const ppu_function& info, be_t* bin, void(*custom)(PPUTranslator*) = nullptr); + llvm::Function* Translate(const ppu_function& info); void MFVSCR(ppu_opcode_t op); void MTVSCR(ppu_opcode_t op); diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 151ea9cfa7..f0124774fa 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -979,6 +979,7 @@ extern void ppu_execute_syscall(ppu_thread& ppu, u64 code) { LOG_TODO(HLE, "Unimplemented syscall %s -> CELL_OK", ppu_get_syscall_name(code)); ppu.gpr[3] = 0; + ppu.cia += 4; } return; diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index ee8ec91a10..4bbcb96de2 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -348,7 +348,8 @@ namespace vm } } - void* real_addr = vm::base(addr); + void* real_addr = g_base_addr + addr; + void* exec_addr = g_exec_addr + addr; #ifdef _WIN32 auto protection = flags & page_writable ? PAGE_READWRITE : (flags & page_readable ? PAGE_READONLY : PAGE_NOACCESS); @@ -455,7 +456,8 @@ namespace vm } } - void* real_addr = vm::base(addr); + void* real_addr = g_base_addr + addr; + void* exec_addr = g_exec_addr + addr; #ifdef _WIN32 verify(__func__), ::VirtualFree(real_addr, size, MEM_DECOMMIT);