From f9ee8978ff6d86cd8394210b72f2ba694bc19e07 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 19 Jan 2021 20:40:15 +0300 Subject: [PATCH] PPU LLVM: improve analyser Compile possibly executable holes between detected functions. Add unused "PPU LLVM Greedy Mode" option (for future updates). Add "nounwind" attribute to compiled functions (reduces size). --- Utilities/JIT.cpp | 32 ++++- rpcs3/Emu/Cell/PPUAnalyser.cpp | 203 +++++++++++++++++++++++++++++-- rpcs3/Emu/Cell/PPUModule.cpp | 2 +- rpcs3/Emu/Cell/PPUThread.cpp | 69 +++++------ rpcs3/Emu/Cell/PPUTranslator.cpp | 17 --- rpcs3/Emu/system_config.h | 1 + 6 files changed, 256 insertions(+), 68 deletions(-) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 83c27cf094..28c3545a37 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -7,6 +7,7 @@ #include "mutex.h" #include "util/vm.hpp" #include "util/asm.hpp" +#include #include #include @@ -310,12 +311,29 @@ const bool jit_initialize = []() -> bool fmt::throw_exception("Null function: %s", name); } +namespace vm +{ + extern u8* const g_sudo_addr; +} + static shared_mutex null_mtx; static std::unordered_map null_funcs; static u64 make_null_function(const std::string& name) { + if (name.starts_with("__0x")) + { + u32 addr = -1; + auto res = std::from_chars(name.c_str() + 4, name.c_str() + name.size(), addr, 16); + + if (res.ec == std::errc() && res.ptr == name.c_str() + name.size() && addr < 0x8000'0000) + { + // Point the garbage to reserved, non-executable memory + return reinterpret_cast(vm::g_sudo_addr + addr); + } + } + std::lock_guard lock(null_mtx); if (u64& func_ptr = null_funcs[name]) [[likely]] @@ -376,8 +394,12 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager if (!addr) { - jit_log.error("Function '%s' linked but not found.", name); addr = make_null_function(name); + + if (!addr) + { + fmt::throw_exception("Failed to link '%s'", name); + } } return {addr, llvm::JITSymbolFlags::Exported}; @@ -453,8 +475,12 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager if (!addr) { - jit_log.error("Function '%s' linked but not found.", name); addr = make_null_function(name); + + if (!addr) + { + fmt::throw_exception("Failed to link '%s' (MM2)", name); + } } return {addr, llvm::JITSymbolFlags::Exported}; @@ -730,7 +756,7 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co for (auto&& [name, addr] : _link) { - m_engine->addGlobalMapping(name, addr); + m_engine->updateGlobalMapping(name, addr); } } diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index b184f79ba8..9d52391c4a 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -3,6 +3,7 @@ #include "PPUOpcodes.h" #include "PPUModule.h" +#include "Emu/system_config.h" #include #include "util/yaml.hpp" @@ -577,7 +578,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) func_queue.emplace_back(func); func.addr = addr; func.toc = toc; - func.name = fmt::format("__0x%x", func.addr); ppu_log.trace("Function 0x%x added (toc=0x%x)", addr, toc); return func; }; @@ -1432,7 +1432,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) // Just ensure that functions don't overlap if (func.addr + func.size > next) { - ppu_log.warning("Function overlap: [0x%x] 0x%x -> 0x%x", func.addr, func.size, next - func.addr); + ppu_log.trace("Function overlap: [0x%x] 0x%x -> 0x%x", func.addr, func.size, next - func.addr); continue; //func.size = next - func.addr; // Also invalidate blocks @@ -1502,7 +1502,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) if (_ptr.addr() >= next) { - ppu_log.warning("Function gap: [0x%x] 0x%x bytes at 0x%x", func.addr, next - start, start); + ppu_log.trace("Function gap: [0x%x] 0x%x bytes at 0x%x", func.addr, next - start, start); break; } } @@ -1522,15 +1522,200 @@ void ppu_module::analyse(u32 lib_toc, u32 entry) } } - // Convert map to vector (destructive) - for (auto&& pair : fmap) + ppu_log.notice("Function analysis: %zu functions (%zu enqueued)", fmap.size(), func_queue.size()); + + // Decompose functions to basic blocks + for (auto&& [_, func] : as_rvalue(std::move(fmap))) { - auto& func = pair.second; - ppu_log.trace("Function %s (size=0x%x, toc=0x%x, attr %#x)", func.name, func.size, func.toc, func.attr); - funcs.emplace_back(std::move(func)); + for (auto [addr, size] : func.blocks) + { + if (!size) + { + continue; + } + + auto& block = fmap[addr]; + + if (block.addr || block.size) + { + ppu_log.trace("Block __0x%x exists (size=0x%x)", block.addr, block.size); + continue; + } + + block.addr = addr; + block.size = size; + block.toc = func.toc; + ppu_log.trace("Block __0x%x added (func=0x%x, size=0x%x, toc=0x%x)", block.addr, _, block.size, block.toc); + } } - ppu_log.notice("Function analysis: %zu functions (%zu enqueued)", funcs.size(), func_queue.size()); + // Simple callable block analysis + std::vector> block_queue; + block_queue.reserve(128000); + + std::unordered_set block_set; + + u32 exp = start; + u32 lim = end; + + // Start with full scan + block_queue.emplace_back(exp, lim); + + // block_queue may grow + for (usz i = 0; i < block_queue.size(); i++) + { + std::tie(exp, lim) = block_queue[i]; + + if (lim == 0) + { + // Find next function + const auto found = fmap.upper_bound(exp); + + if (found != fmap.cend()) + { + lim = found->first; + } + + ppu_log.trace("Block rescan: addr=0x%x, lim=0x%x", exp, lim); + } + + while (exp < lim) + { + u32 i_pos = exp; + + bool is_good = true; + + for (; i_pos < lim; i_pos += 4) + { + const u32 opc = vm::_ref(i_pos); + + switch (auto type = s_ppu_itype.decode(opc)) + { + case ppu_itype::UNK: + case ppu_itype::ECIWX: + case ppu_itype::ECOWX: + { + // Seemingly bad instruction, skip this block + is_good = false; + break; + } + case ppu_itype::TD: + case ppu_itype::TDI: + case ppu_itype::TW: + case ppu_itype::TWI: + case ppu_itype::B: + case ppu_itype::BC: + { + if (type == ppu_itype::B || type == ppu_itype::BC) + { + if (entry == 0 && ppu_opcode_t{opc}.aa) + { + // Ignore absolute branches in PIC (PRX) + is_good = false; + break; + } + + const u32 target = (opc & 2 ? 0 : i_pos) + (type == ppu_itype::B ? +ppu_opcode_t{opc}.bt24 : +ppu_opcode_t{opc}.bt14); + + if (target < start || target >= end) + { + // Sanity check + is_good = false; + break; + } + + const auto found = fmap.find(target); + + if (target != i_pos && found == fmap.cend()) + { + if (block_set.count(target) == 0) + { + ppu_log.trace("Block target found: 0x%x (i_pos=0x%x)", target, i_pos); + block_queue.emplace_back(target, 0); + block_set.emplace(target); + } + } + } + + [[fallthrough]]; + } + case ppu_itype::BCCTR: + case ppu_itype::BCLR: + case ppu_itype::SC: + { + if (type == ppu_itype::SC && opc != ppu_instructions::SC(0)) + { + // Strict garbage filter + is_good = false; + break; + } + + if (type == ppu_itype::BCCTR && opc & 0xe000) + { + // Garbage filter + is_good = false; + break; + } + + if (type == ppu_itype::BCLR && opc & 0xe000) + { + // Garbage filter + is_good = false; + break; + } + + // Good block terminator found, add single block + break; + } + default: + { + // Normal instruction: keep scanning + continue; + } + } + + break; + } + + if (i_pos < lim) + { + i_pos += 4; + } + + if (is_good) + { + auto& block = fmap[exp]; + + if (!block.addr) + { + block.addr = exp; + block.size = i_pos - exp; + ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size); + } + } + + exp = i_pos; + } + } + + // Remove overlaps in blocks + for (auto it = fmap.begin(), end = fmap.end(); it != fmap.end(); it++) + { + const auto next = std::next(it); + + if (next != end && next->first < it->first + it->second.size) + { + it->second.size = next->first - it->first; + } + } + + // Convert map to vector (destructive) + for (auto&& pair : as_rvalue(std::move(fmap))) + { + funcs.emplace_back(std::move(pair.second)); + } + + ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size()); } void ppu_acontext::UNK(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index de5ed47364..6a89d1d874 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -467,7 +467,7 @@ static auto ppu_load_exports(ppu_linkage_info* link, u32 exports_start, u32 expo if (i < lib.num_func) { - ppu_loader.notice("** Special: [%s] at 0x%x", ppu_get_function_name({}, nid), addr); + ppu_loader.notice("** Special: [%s] at 0x%x [0x%x, 0x%x]", ppu_get_function_name({}, nid), addr, vm::_ref(addr), vm::_ref(addr + 4)); } else { diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index e2bee2add4..f41f4ec887 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -2595,30 +2595,41 @@ bool ppu_initialize(const ppu_module& info, bool check_only) // Overall block size in bytes usz bsize = 0; + usz bcount = 0; while (fpos < info.funcs.size()) { auto& func = info.funcs[fpos]; + if (!func.size) + { + fpos++; + continue; + } + if (bsize + func.size > 100 * 1024 && bsize) { - break; + if (bcount >= 1000) + { + break; + } } - for (auto&& block : func.blocks) + // Copy block or function entry + ppu_function& entry = part.funcs.emplace_back(func); + + // Fixup some information + entry.name = fmt::format("__0x%x", entry.addr - reloc); + + if (entry.blocks.empty()) { - bsize += block.second; - - // Also split functions blocks into functions (TODO) - ppu_function entry; - entry.addr = block.first; - entry.size = block.second; - entry.toc = func.toc; - fmt::append(entry.name, "__0x%x", block.first - reloc); - part.funcs.emplace_back(std::move(entry)); + entry.blocks.emplace(func.addr, func.size); } + bsize += func.size; + fpos++; + bcount++; } // Compute module hash to generate (hopefully) unique object name @@ -2726,6 +2737,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) java_mode_handling, accurate_cache_line_stores, reservations_128_byte, + greedy_mode, __bitset_enum_max }; @@ -2736,25 +2748,17 @@ bool ppu_initialize(const ppu_module& info, bool check_only) settings += ppu_settings::non_win32; #endif if (g_cfg.core.llvm_accurate_dfma) - { settings += ppu_settings::accurate_fma; - } if (g_cfg.core.llvm_ppu_accurate_vector_nan) - { settings += ppu_settings::accurate_ppu_vector_nan; - } if (g_cfg.core.llvm_ppu_jm_handling) - { settings += ppu_settings::java_mode_handling; - } if (has_dcbz == 2) - { settings += ppu_settings::accurate_cache_line_stores; - } if (g_cfg.core.ppu_128_reservations_loop_max_length) - { settings += ppu_settings::reservations_128_byte; - } + if (g_cfg.core.ppu_llvm_greedy_mode) + settings += ppu_settings::greedy_mode; // Write version, hash, CPU, settings fmt::append(obj_name, "v3-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); @@ -2899,21 +2903,15 @@ bool ppu_initialize(const ppu_module& info, bool check_only) { if (!func.size) continue; - for (const auto& block : func.blocks) - { - if (block.second) - { - const u64 addr = jit->get(fmt::format("__0x%x", block.first - reloc)); - jit_mod.funcs.emplace_back(reinterpret_cast(addr)); - ppu_ref(block.first) = addr; - } - } + const u64 addr = ensure(jit->get(fmt::format("__0x%x", func.addr - reloc))); + jit_mod.funcs.emplace_back(reinterpret_cast(addr)); + ppu_ref(func.addr) = addr; } // Initialize global variables for (auto& var : globals) { - const u64 addr = jit->get(var.first); + const u64 addr = ensure(jit->get(var.first)); jit_mod.vars.emplace_back(reinterpret_cast(addr)); @@ -2932,13 +2930,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) { if (!func.size) continue; - for (const auto& block : func.blocks) - { - if (block.second) - { - ppu_ref(block.first) = reinterpret_cast(jit_mod.funcs[index++]); - } - } + ppu_ref(func.addr) = ensure(reinterpret_cast(jit_mod.funcs[index++])); } index = 0; @@ -2989,6 +2981,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co { const auto f = cast(_module->getOrInsertFunction(func.name, _func).getCallee()); f->addAttribute(1, Attribute::NoAlias); + f->addFnAttr(Attribute::NoUnwind); } } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 0ae31ffc86..4582155bd9 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1975,7 +1975,6 @@ void PPUTranslator::SC(ppu_opcode_t op) if (index < 1024) { - // Call the syscall directly Call(GetType(), fmt::format("%s", ppu_syscall_code(index)), m_thread)->setTailCallKind(llvm::CallInst::TCK_Tail); m_ir->CreateRetVoid(); return; @@ -2491,7 +2490,6 @@ void PPUTranslator::MFOCRF(ppu_opcode_t op) if (pos >= 8 || 0x80u >> pos != op.crm) { - CompilationError("MFOCRF: Undefined behaviour"); SetGpr(op.rd, UndefValue::get(GetType())); return; } @@ -2771,7 +2769,6 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op) if (pos >= 8 || 0x80u >> pos != op.crm) { - CompilationError("MTOCRF: Undefined behaviour"); return; } } @@ -3220,7 +3217,6 @@ void PPUTranslator::LDBRX(ppu_opcode_t op) void PPUTranslator::LSWX(ppu_opcode_t op) { - CompilationError("Unsupported instruction LSWX. Please report."); Call(GetType(), "__lswx_not_supported", m_ir->getInt32(op.rd), RegLoad(m_cnt), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); } @@ -3338,7 +3334,6 @@ void PPUTranslator::STDBRX(ppu_opcode_t op) void PPUTranslator::STSWX(ppu_opcode_t op) { - CompilationError("Unsupported instruction STSWX. Please report."); Call(GetType(), "__stswx_not_supported", m_ir->getInt32(op.rs), RegLoad(m_cnt), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); } @@ -4154,8 +4149,6 @@ void PPUTranslator::FNMADDS(ppu_opcode_t op) void PPUTranslator::MTFSB1(ppu_opcode_t op) { - CompilationError("MTFSB1"); - SetFPSCRBit(op.crbd, m_ir->getTrue(), true); if (op.rc) SetCrFieldFPCC(1); @@ -4163,8 +4156,6 @@ void PPUTranslator::MTFSB1(ppu_opcode_t op) void PPUTranslator::MCRFS(ppu_opcode_t op) { - CompilationError("MCRFS"); - const auto lt = GetFPSCRBit(op.crfs * 4 + 0); const auto gt = GetFPSCRBit(op.crfs * 4 + 1); const auto eq = GetFPSCRBit(op.crfs * 4 + 2); @@ -4174,8 +4165,6 @@ void PPUTranslator::MCRFS(ppu_opcode_t op) void PPUTranslator::MTFSB0(ppu_opcode_t op) { - CompilationError("MTFSB0"); - SetFPSCRBit(op.crbd, m_ir->getFalse(), false); if (op.rc) SetCrFieldFPCC(1); @@ -4183,8 +4172,6 @@ void PPUTranslator::MTFSB0(ppu_opcode_t op) void PPUTranslator::MTFSFI(ppu_opcode_t op) { - CompilationError("MTFSFI"); - SetFPSCRBit(op.crfd * 4 + 0, m_ir->getInt1((op.i & 8) != 0), false); if (op.crfd != 0) SetFPSCRBit(op.crfd * 4 + 1, m_ir->getInt1((op.i & 4) != 0), false); if (op.crfd != 0) SetFPSCRBit(op.crfd * 4 + 2, m_ir->getInt1((op.i & 2) != 0), false); @@ -4195,8 +4182,6 @@ void PPUTranslator::MTFSFI(ppu_opcode_t op) void PPUTranslator::MFFS(ppu_opcode_t op) { - ppu_log.warning("LLVM: [0x%08x] Warning: MFFS", m_addr + (m_reloc ? m_reloc->addr : 0)); - Value* result = m_ir->getInt64(0); for (u32 i = 16; i < 20; i++) @@ -4211,8 +4196,6 @@ void PPUTranslator::MFFS(ppu_opcode_t op) void PPUTranslator::MTFSF(ppu_opcode_t op) { - ppu_log.warning("LLVM: [0x%08x] Warning: MTFSF", m_addr + (m_reloc ? m_reloc->addr : 0)); - const auto value = GetFpr(op.frb, 32, true); for (u32 i = 16; i < 20; i++) diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 3ee03253fd..ec4a558b03 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -32,6 +32,7 @@ struct cfg_root : cfg::node cfg::_bool llvm_logs{ this, "Save LLVM logs" }; cfg::string llvm_cpu{ this, "Use LLVM CPU" }; cfg::_int<0, INT32_MAX> llvm_threads{ this, "Max LLVM Compile Threads", 0 }; + cfg::_bool ppu_llvm_greedy_mode{ this, "PPU LLVM Greedy Mode", false, false }; cfg::_bool thread_scheduler_enabled{ this, "Enable thread scheduler", thread_scheduler_enabled_def }; cfg::_bool set_daz_and_ftz{ this, "Set DAZ and FTZ", false }; cfg::_enum spu_decoder{ this, "SPU Decoder", spu_decoder_type::llvm };