From 77594dc66c6f100a0fbc381bee570f40d94dd48e Mon Sep 17 00:00:00 2001 From: Ivan Date: Thu, 7 Jul 2016 21:42:39 +0300 Subject: [PATCH] PPU LLVM: New analyser (#1858) Minor fixes VEX prefix support --- Utilities/Thread.cpp | 51 +++ rpcs3/Emu/Cell/PPUAnalyser.cpp | 588 ++++++++++++++++++++++++++ rpcs3/Emu/Cell/PPUAnalyser.h | 52 ++- rpcs3/Emu/Cell/PPUModule.cpp | 499 +++------------------- rpcs3/Emu/Cell/PPUModule.h | 5 +- rpcs3/Emu/Cell/PPUThread.cpp | 46 +- rpcs3/Emu/Cell/PPUTranslator.cpp | 34 +- rpcs3/Emu/Cell/PPUTranslator.h | 3 +- rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp | 2 +- rpcs3/Emu/Cell/lv2/sys_prx.h | 4 +- rpcs3/Emu/System.h | 1 + rpcs3/emucore.vcxproj | 3 +- rpcs3/emucore.vcxproj.filters | 3 + 13 files changed, 798 insertions(+), 493 deletions(-) create mode 100644 rpcs3/Emu/Cell/PPUAnalyser.cpp diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 607bf03945..999c31355e 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -590,6 +590,57 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz } break; } + case 0xc4: // 3-byte VEX prefix + case 0xc5: // 2-byte VEX prefix + { + // Last prefix byte: op2 or op3 + const u8 opx = op1 == 0xc5 ? op2 : op3; + + // Implied prefixes + rex |= op2 & 0x80 ? 0 : 0x4; // REX.R + rex |= op1 == 0xc4 && op3 & 0x80 ? 0x8 : 0; // REX.W ??? + oso = (opx & 0x3) == 0x1; + repe = (opx & 0x3) == 0x2; + repne = (opx & 0x3) == 0x3; + + const u8 vopm = op1 == 0xc5 ? 1 : op2 & 0x1f; + const u8 vop1 = op1 == 0xc5 ? op3 : code[2]; + const u8 vlen = (opx & 0x4) ? 32 : 16; + const u8 vreg = (~opx >> 3) & 0xf; + out_length += op1 == 0xc5 ? 2 : 3; + code += op1 == 0xc5 ? 2 : 3; + + if (vopm == 0x1) switch (vop1) // Implied leading byte 0x0F + { + case 0x11: + case 0x29: + { + if (!repe && !repne) // VMOVAPS/VMOVAPD/VMOVUPS/VMOVUPD mem,reg + { + out_op = X64OP_STORE; + out_reg = get_modRM_reg_xmm(code, rex); + out_size = vlen; + out_length += get_modRM_size(code); + return; + } + break; + } + case 0x7f: + { + if (repe || oso) // VMOVDQU/VMOVDQA mem,reg + { + out_op = X64OP_STORE; + out_reg = get_modRM_reg_xmm(code, rex); + out_size = vlen; + out_length += get_modRM_size(code); + return; + } + break; + } + } + + break; + } case 0xc6: { if (!lock && !oso && get_modRM_reg(code, 0) == 0) // MOV r8/m8, imm8 diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp new file mode 100644 index 0000000000..a4edaa0e98 --- /dev/null +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -0,0 +1,588 @@ +#include "stdafx.h" +#include "PPUOpcodes.h" +#include "PPUModule.h" +#include "PPUAnalyser.h" + +#include + +#include "yaml-cpp/yaml.h" + +const ppu_decoder s_ppu_itype; +const ppu_decoder s_ppu_iname; + +void ppu_validate(const std::string& fname, const std::vector& funcs, u32 reloc) +{ + // Load custom PRX configuration if available + if (fs::file yml{fname + ".yml"}) + { + const auto cfg = YAML::Load(yml.to_string()); + + u32 index = 0; + + // Validate detected functions using information provided + for (const auto func : cfg["functions"]) + { + const u32 addr = func["addr"].as(-1); + const u32 size = func["size"].as(0); + + if (addr != -1 && index < funcs.size()) + { + u32 found = funcs[index].addr - reloc; + + while (addr > found && index + 1 < funcs.size()) + { + LOG_ERROR(LOADER, "%s.yml : validation failed at 0x%x (0x%x, 0x%x)", fname, found, addr, size); + index++; + found = funcs[index].addr - reloc; + } + + if (addr < found) + { + LOG_ERROR(LOADER, "%s.yml : function not found (0x%x, 0x%x)", fname, addr, size); + continue; + } + + if (size && size < funcs[index].size) + { + LOG_ERROR(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].size, addr, size); + } + + if (size > funcs[index].size) + { + LOG_ERROR(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].size, addr, size); + } + + index++; + } + else + { + LOG_ERROR(LOADER, "%s.yml : function not found at the end (0x%x, 0x%x)", fname, addr, size); + break; + } + } + + if (!index) + { + return; // ??? + } + + while (index < funcs.size()) + { + if (funcs[index].size) + { + LOG_ERROR(LOADER, "%s.yml : function not covered at 0x%x (size=0x%x)", fname, funcs[index].addr, funcs[index].size); + } + + index++; + } + + LOG_SUCCESS(LOADER, "%s.yml : validation completed", fname); + } +} + +std::vector ppu_analyse(const std::vector>& segs, const std::vector>& secs, u32 entry, u32 lib_toc) +{ + // Assume first segment is executable + const u32 start = segs[0].first; + const u32 end = segs[0].first + segs[0].second; + const u32 start_toc = entry ? +vm::read32(entry + 4) : lib_toc; + + // Known TOCs (usually only 1) + std::unordered_set TOCs; + + // Known functions + std::map funcs; + + // Function analysis workload + std::vector> func_queue; + + // Register new function + auto add_func = [&](u32 addr, u32 toc, u32 origin) -> ppu_function& + { + ppu_function& func = funcs[addr]; + + if (func.addr) + { + // Update TOC (TODO: this doesn't work well) + if (func.toc == 0 || toc == -1) + { + func.toc = toc; + } + else if (toc && func.toc != -1 && func.toc != toc) + { + //LOG_WARNING(PPU, "Function 0x%x: TOC mismatch (0x%x vs 0x%x)", addr, toc, func.toc); + func.toc = -1; + } + + return func; + } + + func_queue.emplace_back(func); + func.addr = addr; + func.toc = toc; + LOG_TRACE(PPU, "Function 0x%x added (toc=0x%x, origin=0x%x)", addr, toc, origin); + return func; + }; + + // Register new TOC and find basic set of functions + auto add_toc = [&](u32 toc) + { + if (!toc || toc == -1 || !TOCs.emplace(toc).second) + { + return; + } + + // Grope for OPD section (TODO: optimization, better constraints) + for (const auto& seg : segs) + { + for (vm::cptr ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++) + { + if (ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0 && ptr[1] == toc) + { + // New function + LOG_NOTICE(PPU, "OPD*: [0x%x] 0x%x (TOC=0x%x)", ptr, ptr[0], ptr[1]); + add_func(*ptr, toc, ptr.addr()); + ptr++; + } + } + } + }; + + // Get next function address + auto get_limit = [&](u32 addr) -> u32 + { + const auto found = funcs.lower_bound(addr); + + if (found != funcs.end()) + { + return found->first; + } + + return end; + }; + + // Find OPD section + for (const auto& sec : secs) + { + const u32 sec_end = sec.first + sec.second; + + if (entry >= sec.first && entry < sec_end) + { + for (vm::cptr ptr = vm::cast(sec.first); ptr.addr() < sec_end; ptr += 2) + { + // Add function and TOC + const u32 addr = ptr[0]; + const u32 toc = ptr[1]; + LOG_NOTICE(PPU, "OPD: [0x%x] 0x%x (TOC=0x%x)", ptr, addr, toc); + TOCs.emplace(toc); + + auto& func = add_func(addr, toc, ptr.addr()); + } + + break; + } + } + + // Otherwise, register initial set of functions (likely including the entry point) + add_toc(start_toc); + + // Find eh_frame section + for (const auto& sec : secs) + { + const u32 sec_end = sec.first + sec.second; + + if (sec.first + 32 >= sec_end || vm::read64(sec.first) != 0x0000001c00000000 || vm::read16(sec.first + 8) != 0x017a) + { + continue; + } + + for (vm::cptr ptr = vm::cast(sec.first); ptr.addr() < sec_end - 4; ptr = vm::cast(ptr.addr() + ptr[0] + 4)) + { + if (const u32 off = ptr[1]) + { + const u32 addr = ptr[3] + (ptr + 2).addr(); // Function offset (64 bit) + const u32 size = ptr[5]; // Function size (64 bit) + + LOG_NOTICE(PPU, ".eh_frame: [0x%x] 0x%x, 0x%x (size=0x%x)", ptr, ptr[0], ptr[1], size); + + if (!ptr[3]) continue; // TODO (some entries have zero offset) + + auto& func = add_func(addr, 0, ptr.addr()); + func.attr += ppu_attr::known_size; + func.size = size; + } + } + } + + // Main loop (func_queue may grow) + for (std::size_t i = 0; i < func_queue.size(); i++) + { + ppu_function& func = func_queue[i]; + + if (func.blocks.empty()) + { + // Special function analysis + const vm::cptr ptr = vm::cast(func.addr); + const vm::cptr fend = vm::cast(end); + + using namespace ppu_instructions; + + if (ptr + 1 <= fend && (ptr[0] & 0xfc000001) == B({}, {})) + { + // Simple gate + func.size = 0x4; + func.blocks.emplace(func.addr, func.size); + const u32 target = ppu_branch_target(ptr[0] & 0x2 ? 0 : ptr.addr(), s32(ptr[0]) << 6 >> 6); + add_func(target, func.toc, func.addr); + continue; + } + + if (ptr + 4 <= fend && + ptr[0] == STD(r2, r1, 0x28) && + (ptr[1] & 0xffff0000) == ADDIS(r2, r2, {}) && + (ptr[2] & 0xffff0000) == ADDI(r2, r2, {}) && + (ptr[3] & 0xfc000001) == B({}, {})) + { + // TOC change gate + func.size = 0x10; + func.blocks.emplace(func.addr, func.size); + const u32 new_toc = func.toc && func.toc != -1 ? func.toc + (ptr[1] << 16) + s16(ptr[2]) : 0; + const u32 target = ppu_branch_target(ptr[3] & 0x2 ? 0 : (ptr + 3).addr(), s32(ptr[3]) << 6 >> 6); + add_func(target, new_toc, func.addr); + add_toc(new_toc); + continue; + } + + if (ptr + 8 <= fend && + (ptr[0] & 0xffff0000) == LI(r12, 0) && + (ptr[1] & 0xffff0000) == ORIS(r12, r12, 0) && + (ptr[2] & 0xffff0000) == LWZ(r12, r12, 0) && + ptr[3] == STD(r2, r1, 0x28) && + ptr[4] == LWZ(r0, r12, 0) && + ptr[5] == LWZ(r2, r12, 4) && + ptr[6] == MTCTR(r0) && + ptr[7] == BCTR()) + { + // The most used simple import stub + func.size = 0x20; + func.blocks.emplace(func.addr, func.size); + continue; + } + + if (ptr + 3 <= fend && + (ptr[0] & 0xffff0000) == LI(r0, 0) && + (ptr[1] & 0xffff0000) == ORIS(r0, r0, 0) && + (ptr[2] & 0xfc000003) == B({}, {}, {})) + { + // Import stub with r0 usage + func.attr += ppu_attr::uses_r0; + } + + // TODO: detect no_return, scribe more TODOs + + // Acknowledge completion + func.blocks.emplace(vm::cast(func.addr), 0); + } + + // Block analysis workload + std::vector>> block_queue; + + // Add new block for analysis + auto add_block = [&](u32 addr) -> bool + { + const auto _pair = func.blocks.emplace(addr, 0); + + if (_pair.second) + { + block_queue.emplace_back(*_pair.first); + return true; + } + + return false; + }; + + for (auto& block : func.blocks) + { + if (!block.second) + { + block_queue.emplace_back(block); + } + } + + // TODO: lower priority? + if (func.attr & ppu_attr::no_size) + { + const u32 next = get_limit(func.blocks.crbegin()->first + 1); + + // Find more block entries + for (const auto& seg : segs) + { + for (vm::cptr ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++) + { + const u32 value = *ptr; + + if (value % 4 == 0 && value >= func.addr && value < next) + { + add_block(value); + } + } + } + } + + const bool was_empty = block_queue.empty(); + + // Block loop (block_queue may grow, may be aborted via clearing) + for (std::size_t j = 0; j < block_queue.size(); j++) + { + auto& block = block_queue[j].get(); + + for (vm::cptr _ptr = vm::cast(block.first); _ptr.addr() < end;) + { + const u32 iaddr = _ptr.addr(); + const ppu_opcode_t op{*_ptr++}; + const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + + if (type == ppu_itype::UNK) + { + // Invalid blocks will remain empty + break; + } + else if (type == ppu_itype::B || type == ppu_itype::BC) + { + const u32 target = ppu_branch_target(op.aa ? 0 : iaddr, type == ppu_itype::B ? +op.ll : +op.simm16); + const bool is_call = op.lk && target != iaddr; + const auto pfunc = is_call ? &add_func(target, 0, func.addr) : nullptr; + + if (pfunc && pfunc->blocks.empty()) + { + // Postpone analysis (no info) + block_queue.clear(); + break; + } + + // Add next block if necessary + if ((is_call && !pfunc->attr.test(ppu_attr::no_return)) || (type == ppu_itype::BC && (op.bo & 0x14) != 0x14)) + { + add_block(_ptr.addr()); + } + + if (op.lk && (target == iaddr || pfunc->attr.test(ppu_attr::no_return))) + { + // Nothing + } + else if (is_call || target < func.addr/* || target >= get_limit(_ptr.addr())*/) + { + // Add function call (including obvious tail call) + add_func(target, 0, func.addr); + } + else + { + // Add block + add_block(target); + } + + block.second = _ptr.addr() - block.first; + break; + } + else if (type == ppu_itype::BCLR) + { + if (op.lk || (op.bo & 0x14) != 0x14) + { + add_block(_ptr.addr()); + } + + block.second = _ptr.addr() - block.first; + break; + } + else if (type == ppu_itype::BCCTR) + { + if (op.lk || (op.bo & 0x10) != 0x10) + { + add_block(_ptr.addr()); + } + else + { + // Analyse jumptable (TODO) + const u32 jt_addr = _ptr.addr(); + const u32 jt_end = end; + + for (; _ptr.addr() < jt_end; _ptr++) + { + const u32 addr = jt_addr + *_ptr; + + if (addr == jt_addr) + { + // TODO (cannot branch to jumptable itself) + break; + } + + if (addr % 4 || addr < func.addr || addr >= jt_end) + { + break; + } + + add_block(addr); + } + + if (jt_addr != jt_end && _ptr.addr() == jt_addr) + { + // Acknowledge jumptable detection failure + func.attr += ppu_attr::no_size; + add_block(iaddr); + block_queue.clear(); + } + } + + block.second = _ptr.addr() - block.first; + break; + } + } + } + + if (block_queue.empty() && !was_empty) + { + // Block aborted: abort function, postpone + func_queue.emplace_back(func); + continue; + } + + // Finalization: determine function size + for (const auto& block : func.blocks) + { + const u32 expected = func.addr + func.size; + + if (func.attr & ppu_attr::known_size) + { + continue; + } + + if (expected == block.first) + { + func.size += block.second; + } + else if (expected + 4 == block.first && vm::read32(expected) == ppu_instructions::NOP()) + { + func.size += block.second + 4; + } + else if (expected < block.first) + { + //block.second = 0; + continue; + } + + // Function min size constraint (TODO) + for (vm::cptr _ptr = vm::cast(block.first); _ptr.addr() < block.first + block.second;) + { + const u32 iaddr = _ptr.addr(); + const ppu_opcode_t op{*_ptr++}; + const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + + if (type == ppu_itype::BCCTR && !op.lk) + { + const u32 jt_base = _ptr.addr() - func.addr; + + for (; _ptr.addr() < block.first + block.second; _ptr++) + { + func.size = std::max(func.size, jt_base + *_ptr); + } + + break; + } + else if (type == ppu_itype::BC && !op.lk) + { + const u32 target = ppu_branch_target(op.aa ? 0 : iaddr, +op.simm16); + + func.size = std::max(func.size, target - func.addr); + + break; + } + } + } + + // Finalization: normalize blocks + for (auto& block : func.blocks) + { + const auto next = func.blocks.upper_bound(block.first); + + // Normalize block if necessary + if (next != func.blocks.end()) + { + block.second = next->first - block.first; + } + + // Invalidate blocks out of the function + const u32 fend = func.addr + func.size; + const u32 bend = block.first + block.second; + + if (block.first >= fend) + { + block.second = 0; + } + else if (bend > fend) + { + block.second -= bend - fend; + } + } + + // Finalization: process remaining tail calls + for (const auto& block : func.blocks) + { + for (vm::cptr _ptr = vm::cast(block.first); _ptr.addr() < block.first + block.second;) + { + const u32 iaddr = _ptr.addr(); + const ppu_opcode_t op{*_ptr++}; + const ppu_itype::type type = s_ppu_itype.decode(op.opcode); + + if (type == ppu_itype::B || type == ppu_itype::BC) + { + const u32 target = ppu_branch_target(op.aa ? 0 : iaddr, type == ppu_itype::B ? +op.ll : +op.simm16); + + if (target < func.addr || target >= func.addr + func.size) + { + add_func(target, func.toc, func.addr); + } + } + else if (type == ppu_itype::BCCTR && !op.lk) + { + // Jumptable (do not touch entries) + break; + } + } + } + } + + // Function shrinkage (TODO: it's potentially dangerous but improvable) + for (auto& _pair : funcs) + { + auto& func = _pair.second; + + // Next function start + const u32 next = get_limit(_pair.first + 1); + + // Just ensure that functions don't overlap + if (func.addr + func.size > next) + { + LOG_WARNING(PPU, "Function overlap: [0x%x] 0x%x -> 0x%x", func.addr, func.size, next - func.addr); + continue; //func.size = next - func.addr; + + // Also invalidate blocks + for (auto& block : func.blocks) + { + if (block.first + block.second > next) + { + block.second = block.first >= next ? 0 : next - block.first; + } + } + } + } + + // Convert map to vector (destructive) + std::vector result; + + for (auto&& func : funcs) + { + result.emplace_back(std::move(func.second)); + } + + return result; +} diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index 3716028f5e..f9e9d43271 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -1,5 +1,33 @@ #pragma once +#include + +#include "../Utilities/BitSet.h" + +// PPU Function Attributes +enum class ppu_attr : u32 +{ + known_size, + no_return, + no_size, + uses_r0, +}; + +// PPU Function Information +struct ppu_function +{ + u32 addr{}; + u32 toc{}; + u32 size{}; + bitset_t attr{}; + + std::map blocks; // Basic blocks: addr -> size +}; + +extern void ppu_validate(const std::string& fname, const std::vector& funcs, u32 reloc); + +extern std::vector ppu_analyse(const std::vector>& segs, const std::vector>& secs, u32 entry, u32 lib_toc); + // PPU Instruction Type struct ppu_itype { @@ -396,19 +424,18 @@ struct ppu_itype } }; +// Encode instruction name: 6 bits per character (0x20..0x5f), max 10 +static constexpr u64 ppu_iname_encode(const char* ptr, u64 value = 0) +{ + return *ptr == '\0' ? value : ppu_iname_encode(ptr + 1, (*ptr - 0x20) | (value << 6)); +} + struct ppu_iname { - // Aggregate to store instruction name - struct type { const char* name; }; +#define NAME(x) x = ppu_iname_encode(#x), - // Enable address-of operator for ppu_decoder<> - friend constexpr const char* operator &(type value) + enum type : u64 { - return value.name; - } - -#define NAME(x) static constexpr type x{#x}; - NAME(UNK) NAME(MFVSCR) NAME(MTVSCR) @@ -790,6 +817,13 @@ struct ppu_iname NAME(FCTID) NAME(FCTIDZ) NAME(FCFID) + }; #undef NAME + + // Enable address-of operator for ppu_decoder<> + friend constexpr type operator &(type value) + { + return value; + } }; diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 49e02818eb..ec7ff94b85 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -14,10 +14,6 @@ #include #include -#include "yaml-cpp/yaml.h" - -const ppu_decoder s_ppu_itype; -//const ppu_decoder s_ppu_iname; LOG_CHANNEL(cellAdec); LOG_CHANNEL(cellAtrac); @@ -120,7 +116,7 @@ extern std::string ppu_get_variable_name(const std::string& module, u32 vnid); extern void sys_initialize_tls(PPUThread&, u64, u32, u32, u32); -extern void ppu_initialize(const std::string& name, const std::vector>& set, u32 entry); +extern void ppu_initialize(const std::string& name, const std::vector& set, u32 entry); // Function lookup table. Not supposed to grow after emulation start. std::vector g_ppu_function_cache; @@ -350,7 +346,7 @@ static void ppu_initialize_modules() // Detect import stub at specified address and inject HACK instruction with index immediate. static bool ppu_patch_import_stub(u32 addr, u32 index) { - const auto data = vm::cptr::make(addr); + const auto data = vm::_ptr(addr); using namespace ppu_instructions; @@ -366,10 +362,10 @@ static bool ppu_patch_import_stub(u32 addr, u32 index) data[6] == MTCTR(r0) && data[7] == BCTR()) { - std::memset(vm::base(addr), 0, 32); - vm::write32(addr + 0, STD(r2, r1, 0x28)); // Save RTOC - vm::write32(addr + 4, HACK(index)); - vm::write32(addr + 8, BLR()); + data[0] = STD(r2, r1, 0x28); // Save RTOC + data[1] = HACK(index); + data[2] = BLR(); + std::fill(data + 3, data + 8, NOP()); return true; } @@ -397,9 +393,9 @@ static bool ppu_patch_import_stub(u32 addr, u32 index) sub[0xd] == MTLR(r0) && sub[0xe] == BLR()) { - vm::write32(addr + 0, HACK(index)); - vm::write32(addr + 4, BLR()); - vm::write32(addr + 8, 0); + data[0] = HACK(index); + data[1] = BLR(); + data[2] = NOP(); return true; } } @@ -422,13 +418,13 @@ static bool ppu_patch_import_stub(u32 addr, u32 index) data[0xe] == MTLR(r0) && data[0xf] == BLR()) { - std::memset(vm::base(addr), 0, 64); - vm::write32(addr + 0, HACK(index)); - vm::write32(addr + 4, BLR()); + data[0] = HACK(index); + data[1] = BLR(); + std::fill(data + 2, data + 16, NOP()); return true; } - if (vm::check_addr(addr, 64) && + if (vm::check_addr(addr, 60) && data[0x0] == MFLR(r0) && data[0x1] == STD(r0, r1, 0x10) && data[0x2] == STDU(r1, r1, -0x80) && @@ -445,9 +441,9 @@ static bool ppu_patch_import_stub(u32 addr, u32 index) data[0xd] == MTLR(r0) && data[0xe] == BLR()) { - std::memset(vm::base(addr), 0, 64); - vm::write32(addr + 0, HACK(index)); - vm::write32(addr + 4, BLR()); + data[0] = HACK(index); + data[1] = BLR(); + std::fill(data + 2, data + 15, NOP()); return true; } @@ -467,9 +463,9 @@ static bool ppu_patch_import_stub(u32 addr, u32 index) data[0xc] == LD(r2, r1, 0x28) && data[0xd] == BLR()) { - std::memset(vm::base(addr), 0, 56); - vm::write32(addr + 0, HACK(index)); - vm::write32(addr + 4, BLR()); + data[0] = HACK(index); + data[1] = BLR(); + std::fill(data + 2, data + 14, NOP()); return true; } @@ -670,16 +666,12 @@ static auto ppu_load_exports(const std::shared_ptr& link, u32 return result; } -static u32 ppu_load_imports(const std::shared_ptr& link, u32 imports_start, u32 imports_end) +static void ppu_load_imports(const std::shared_ptr& link, u32 imports_start, u32 imports_end) { - u32 result = imports_start; - for (u32 addr = imports_start; addr < imports_end;) { const auto& lib = vm::_ref(addr); - result = std::min(result, lib.name.addr()); - const std::string module_name(lib.name.get_ptr()); LOG_NOTICE(LOADER, "** Imported module '%s' (0x%x, 0x%x)", module_name, lib.unk4, lib.unk5); @@ -737,404 +729,13 @@ static u32 ppu_load_imports(const std::shared_ptr& link, u32 i addr += lib.size ? lib.size : sizeof(ppu_prx_module_info); } - - return result; -} - -// Returns max branch address of jumptable -never_inline static u32 ppu_is_jumptable(vm::ptr& start_ptr, u32 start, u32 end) -{ - u32 max_addr = 0; - - if (end - start_ptr.addr() < 8) - { - return 0; - } - - for (vm::ptr ptr = start_ptr; ptr.addr() < end; ptr++) - { - const u32 addr = start_ptr.addr() + *ptr; - - if (addr % 4 || addr < start || addr >= end) - { - if (ptr - start_ptr < 2) - { - return 0; - } - - start_ptr = ptr; - return max_addr; - } - - max_addr = std::max(max_addr, addr); - } - - start_ptr = vm::cast(end); - return max_addr; -} - -// Guess whether the function cannot be divided at specific position `split` -static bool ppu_is_coherent(u32 start, u32 end, u32 split) -{ - // Check if the block before `split` is directly connected (can fall through) - for (vm::ptr rptr = vm::cast(split - 4);; rptr--) - { - const u32 _last = *rptr; - - // Skip NOPs - if (_last == ppu_instructions::NOP()) - { - if (rptr.addr() == start) return true; - continue; - } - - switch (const auto type = s_ppu_itype.decode(_last)) - { - case ppu_itype::UNK: - case ppu_itype::TD: - case ppu_itype::TDI: - case ppu_itype::TW: - case ppu_itype::TWI: - { - break; - } - - case ppu_itype::B: - { - if (ppu_opcode_t{_last}.lk) return true; - break; - } - - case ppu_itype::BC: - case ppu_itype::BCLR: - { - if (ppu_opcode_t{_last}.lk || (ppu_opcode_t{_last}.bo & 0x14) != 0x14) return true; - break; - } - - case ppu_itype::BCCTR: - { - if (ppu_opcode_t{_last}.lk || (ppu_opcode_t{_last}.bo & 0x10) == 0) return true; - break; - } - - default: - { - return true; - } - } - - break; - } - - // Find branches from one part to another - for (vm::ptr ptr = vm::cast(start); ptr.addr() < split; ptr++) - { - const u32 value = *ptr; - const auto type = s_ppu_itype.decode(value); - - const ppu_opcode_t op{value}; - - if (type == ppu_itype::B || type == ppu_itype::BC) - { - const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16); - - if (target % 4 == 0 && target >= split && target < end) - { - return !op.lk; - } - } - - if (type == ppu_itype::BCCTR && !op.lk) - { - const u32 max = ppu_is_jumptable(++ptr, start, end); - - if (max && max >= split) - { - return true; - } - - ptr--; - } - } - - // TODO: ??? - return false; -} - -static std::vector> ppu_analyse(u32 start, u32 end, const std::vector>& segs, u32 rtoc) -{ - // Function entries (except the last one) - std::set result - { - end, - }; - - // Instruction usage stats - //std::unordered_map stats; - - // Jumptable entries (addr->size) - std::unordered_map jts; - - // Block entries - std::set blocks; - - // First pass; Detect branch + link instructions - for (vm::ptr ptr = vm::cast(start); ptr.addr() < end; ptr++) - { - const u32 value = *ptr; - const auto type = s_ppu_itype.decode(value); - //const auto name = s_ppu_iname.decode(value); - - const ppu_opcode_t op{value}; - - if (type == ppu_itype::B || type == ppu_itype::BC) - { - const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16); - - if (op.lk && target % 4 == 0 && target >= start && target < end && target != ptr.addr()) - { - LOG_NOTICE(PPU, "BCall: 0x%x -> 0x%x", ptr, target); - result.emplace(target); - } - - if (!op.lk && target % 4 == 0 && target >= start && target < end) - { - blocks.emplace(target); - } - } - - if (type == ppu_itype::BCCTR && !op.lk) - { - const auto jt = ++ptr; - - if (ppu_is_jumptable(ptr, start, end)) - { - LOG_NOTICE(PPU, "JTable: 0x%x .. 0x%x", jt, ptr); - jts.emplace(jt.addr(), ptr.addr() - jt.addr()); - - for (auto _ptr = jt; _ptr != ptr; _ptr++) - { - blocks.emplace(jt.addr() + *_ptr); - } - } - else - { - LOG_NOTICE(PPU, "BCCTR: 0x%x", ptr - 1); - } - - ptr--; - } - - //stats[name]++; - } - - // Find OPD table - for (const auto& seg : segs) - { - for (vm::ptr ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++) - { - if (ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0 && ptr[1] == rtoc) - { - while (ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0 && !jts.count(ptr[0]) /*&& ptr[1] == rtoc*/) - { - LOG_NOTICE(PPU, "OPD: 0x%x -> 0x%x (rtoc=0x%x)", ptr, ptr[0], ptr[1]); - result.emplace(ptr[0]); - ptr += 2; - } - - break; - } - } - } - - // Find more block entries - for (const auto& seg : segs) - { - for (vm::ptr ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++) - { - const u32 value = *ptr; - - if (value % 4 == 0 && value >= start && value < end) - { - blocks.emplace(value); - } - } - } - - // Detect tail calls - std::deque task{result.begin(), result.end()}; - - while (!task.empty()) - { - const u32 f_start = task.front(); - const auto f_up = result.upper_bound(f_start); - - if (f_up != result.end()) for (vm::ptr ptr = vm::cast(f_start); ptr.addr() < *f_up; ptr++) - { - const u32 value = *ptr; - const auto type = s_ppu_itype.decode(value); - - const ppu_opcode_t op{value}; - - if (type == ppu_itype::B || type == ppu_itype::BC) - { - const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16); - - if (!op.lk && target % 4 == 0 && target >= start && target < end && (target < f_start || target >= *f_up)) - { - auto _lower = result.lower_bound(target); - - if (*_lower == target || _lower == result.begin()) - { - continue; - } - - const u32 f2_end = *_lower; - const u32 f2_start = *--_lower; - - if (ppu_is_coherent(f2_start, f2_end, target)) - { - continue; - } - - LOG_NOTICE(LOADER, "Tail call: 0x%x -> 0x%x", ptr, target); - result.emplace(target); - - // Rescan two new functions if the insertion took place - task.push_back(target); - task.push_back(f2_start); - } - } - } - - task.pop_front(); - } - - // Fill (addr, size) vector - std::vector> vr; - - for (auto it = result.begin(), end = --result.end(); it != end; it++) - { - const u32 addr = *it; - const auto _up = result.upper_bound(addr); - - // Set initial (addr, size) - vr.emplace_back(std::make_pair(addr, *_up - addr)); - - // Analyse function against its end - for (u32& size = vr.back().second; size;) - { - const auto next = result.upper_bound(addr + size); - - if (next != result.end() && ppu_is_coherent(addr, *next, addr + size)) - { - // Extend and check again - const u32 new_size = *next - addr; - - LOG_NOTICE(LOADER, "Extended: 0x%x (0x%x --> 0x%x)", addr, size, new_size); - size = new_size; - continue; - } - - break; - } - } - - // Add blocks as (addr, 0) // TODO - for (auto value : blocks) - { - vr.emplace_back(std::make_pair(value, 0)); - } - - // Print some stats - //{ - // std::multimap> sorted; - - // for (const auto& pair : stats) - // sorted.emplace(pair.second, pair.first); - - // for (const auto& stat : sorted) - // LOG_NOTICE(PPU, "Stats: (%llu) %s", stat.first, stat.second); - //} - - return vr; -} - -static void ppu_validate(const std::string& fname, const std::vector>& funcs, u32 reloc) -{ - // Load custom PRX configuration if available - if (fs::file yml{fname + ".yml"}) - { - const auto cfg = YAML::Load(yml.to_string()); - - u32 index = 0; - - // Validate detected functions using information provided - for (const auto func : cfg["functions"]) - { - const u32 addr = func["addr"].as(-1); - const u32 size = func["size"].as(0); - - if (addr != -1 && index < funcs.size()) - { - u32 found = funcs[index].first - reloc; - - while (addr > found && index + 1 < funcs.size()) - { - LOG_ERROR(LOADER, "%s.yml : validation failed at 0x%x (0x%x, 0x%x)", fname, found, addr, size); - index++; - found = funcs[index].first - reloc; - } - - if (addr < found) - { - LOG_ERROR(LOADER, "%s.yml : function not found (0x%x, 0x%x)", fname, addr, size); - continue; - } - - if (size && size < funcs[index].second) - { - LOG_WARNING(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].second, addr, size); - } - - if (size > funcs[index].second) - { - LOG_ERROR(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].second, addr, size); - } - - index++; - } - else - { - LOG_ERROR(LOADER, "%s.yml : function not found at the end (0x%x, 0x%x)", fname, addr, size); - break; - } - } - - if (!index) - { - return; // ??? - } - - while (index < funcs.size()) - { - if (funcs[index].second) - { - LOG_ERROR(LOADER, "%s.yml : function not covered at 0x%x (size=0x%x)", fname, funcs[index].first, funcs[index].second); - } - - index++; - } - - LOG_SUCCESS(LOADER, "%s.yml : validation completed", fname); - } } template<> std::shared_ptr ppu_prx_loader::load() const { std::vector> segments; + std::vector> sections; // Unused for (const auto& prog : progs) { @@ -1269,17 +870,9 @@ std::shared_ptr ppu_prx_loader::load() const prx->specials = ppu_load_exports(link, lib_info->exports_start, lib_info->exports_end); - const std::initializer_list addr_list - { - ppu_load_imports(link, lib_info->imports_start, lib_info->imports_end), + ppu_load_imports(link, lib_info->imports_start, lib_info->imports_end); - lib_info.addr(), - lib_info->imports_start, - lib_info->exports_start, - }; - - // Get functions - prx->func = ppu_analyse(segments[0].first, std::min(addr_list), segments, lib_info->toc); + prx->funcs = ppu_analyse(segments, sections, 0, lib_info->toc); } else { @@ -1309,9 +902,11 @@ void ppu_exec_loader::load() const // Segment info std::vector> segments; + // Section info (optional) + std::vector> sections; + // Functions - std::vector> exec_set; - u32 exec_end{}; + std::vector exec_set; // Allocate memory at fixed positions for (const auto& prog : progs) @@ -1333,8 +928,20 @@ void ppu_exec_loader::load() const segments.emplace_back(std::make_pair(addr, size)); - if (prog.p_flags & 1) // Test EXEC flag - exec_end = addr + size; + //if (prog.p_flags & 1) exec_end = addr + size; // Test EXEC flag + } + } + + for (const auto& s : shdrs) + { + LOG_NOTICE(LOADER, "** Section: sh_type=0x%x, addr=0x%llx, size=0x%llx, flags=0x%x", s.sh_type, s.sh_addr, s.sh_size, s.sh_flags); + + const u32 addr = vm::cast(s.sh_addr); + const u32 size = vm::cast(s.sh_size); + + if (s.sh_type == 1 && addr && size) + { + sections.emplace_back(std::make_pair(addr, size)); } } @@ -1422,6 +1029,8 @@ void ppu_exec_loader::load() const LOG_NOTICE(LOADER, "* libent_start = *0x%x", proc_prx_param.libent_start); LOG_NOTICE(LOADER, "* libstub_start = *0x%x", proc_prx_param.libstub_start); + LOG_NOTICE(LOADER, "* unk0 = 0x%x", proc_prx_param.unk0); + LOG_NOTICE(LOADER, "* unk2 = 0x%x", proc_prx_param.unk2); if (proc_prx_param.magic != 0x1b434cec) { @@ -1429,10 +1038,7 @@ void ppu_exec_loader::load() const } ppu_load_exports(link, proc_prx_param.libent_start, proc_prx_param.libent_end); - - const u32 min_addr = ppu_load_imports(link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); - - exec_end = std::min(min_addr, exec_end); + ppu_load_imports(link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); } break; } @@ -1481,9 +1087,9 @@ void ppu_exec_loader::load() const } // Add functions - exec_set.insert(exec_set.end(), prx->func.begin(), prx->func.end()); + exec_set.insert(exec_set.end(), prx->funcs.begin(), prx->funcs.end()); - ppu_validate(lle_dir + '/' + name, prx->func, prx->func[0].first); + ppu_validate(lle_dir + '/' + name, prx->funcs, prx->funcs[0].addr); } else { @@ -1615,8 +1221,7 @@ void ppu_exec_loader::load() const } // Analyse executable - const u32 entry_rtoc = vm::read32(vm::cast(header.e_entry, HERE) + 4); - const auto funcs = ppu_analyse(segments[0].first, exec_end, segments, entry_rtoc); + const auto funcs = ppu_analyse(segments, sections, static_cast(header.e_entry), 0); ppu_validate(vfs::get(Emu.GetPath()), funcs, 0); @@ -1628,11 +1233,6 @@ void ppu_exec_loader::load() const // TODO: adjust for liblv2 loading option using namespace ppu_instructions; - auto ppu_thr_stop_data = vm::ptr::make(vm::alloc(2 * 4, vm::main)); - Emu.SetCPUThreadStop(ppu_thr_stop_data.addr()); - ppu_thr_stop_data[0] = HACK(1); - ppu_thr_stop_data[1] = BLR(); - static const int branch_size = 10 * 4; auto make_branch = [](vm::ptr& ptr, u32 addr, bool last) @@ -1690,7 +1290,10 @@ void ppu_exec_loader::load() const make_branch(entry, static_cast(header.e_entry), true); // Register entry function (addr, size) - exec_set.emplace_back(std::make_pair(entry.addr() & -0x1000, entry.addr() & 0xfff)); + ppu_function entry_func; + entry_func.addr = entry.addr() & -0x1000; + entry_func.size = entry.addr() & 0xfff; + exec_set.emplace_back(entry_func); // Initialize recompiler ppu_initialize("", exec_set, static_cast(header.e_entry)); diff --git a/rpcs3/Emu/Cell/PPUModule.h b/rpcs3/Emu/Cell/PPUModule.h index 274c0964e9..9dbc922cf3 100644 --- a/rpcs3/Emu/Cell/PPUModule.h +++ b/rpcs3/Emu/Cell/PPUModule.h @@ -12,9 +12,8 @@ extern u32 ppu_generate_id(const char* name); // Flags set with REG_FUNC enum ppu_static_function_flags : u32 { - MFF_FORCED_HLE = (1 << 0), // Always call HLE function (TODO: deactivated) - - MFF_PERFECT = MFF_FORCED_HLE, // Indicates that function is completely implemented and can replace LLE implementation + MFF_FORCED_HLE = (1 << 0), // Always call HLE function + MFF_PERFECT = (1 << 1), // Indicates complete implementation and LLE interchangeability }; // HLE function information diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 38b437fb95..2b1181cd92 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -387,6 +387,11 @@ extern void sse_cellbe_stvrx(u64 addr, __m128i a); throw fmt::exception("Trap! (0x%llx)", addr); } +[[noreturn]] static void ppu_unreachable(u64 addr) +{ + throw fmt::exception("Unreachable! (0x%llx)", addr); +} + static void ppu_trace(u64 addr) { LOG_NOTICE(PPU, "Trace: 0x%llx", addr); @@ -469,10 +474,18 @@ static __m128i ppu_vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m1 return ppu.VR[20].vi; } -extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) +extern void ppu_initialize(const std::string& name, const std::vector& funcs, u32 entry) { if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty()) { + if (!Emu.GetCPUThreadStop()) + { + auto ppu_thr_stop_data = vm::ptr::make(vm::alloc(2 * 4, vm::main)); + Emu.SetCPUThreadStop(ppu_thr_stop_data.addr()); + ppu_thr_stop_data[0] = ppu_instructions::HACK(1); + ppu_thr_stop_data[1] = ppu_instructions::BLR(); + } + return; } @@ -481,6 +494,7 @@ extern void ppu_initialize(const std::string& name, const std::vector(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func)); + const auto f = cast(module->getOrInsertFunction(fmt::format("__0x%x", info.addr), _func)); f->addAttribute(1, Attribute::NoAlias); - translator->AddFunction(info.first, f); + translator->AddFunction(info.addr, f); + } + + for (const auto& b : info.blocks) + { + if (b.second) + { + translator->AddBlockInfo(b.first); + } } - - translator->AddBlockInfo(info.first); } legacy::FunctionPassManager pm(module.get()); @@ -561,9 +581,9 @@ extern void ppu_initialize(const std::string& name, const std::vectorTranslateToIR(info.first, info.first + info.second, vm::_ptr(info.first)); + const auto func = translator->TranslateToIR(info, vm::_ptr(info.addr)); // Run optimization passes pm.run(*func); @@ -655,14 +675,12 @@ extern void ppu_initialize(const std::string& name, const std::vectorget(fmt::format("__sub_%x", addr)); - memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, sizeof(ppu_function_t)); - s_ppu_compiled[addr / 4] = (ppu_function_t)link; + const std::uintptr_t link = jit->get(fmt::format("__0x%x", info.addr)); + ppu_register_function_at(info.addr, (ppu_function_t)link); - LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", addr, link, addr, info.second); + LOG_NOTICE(PPU, "** Function __0x%x -> 0x%llx (size=0x%x, toc=0x%x, attr %#x)", info.addr, link, info.size, info.toc, info.attr); } } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 7b25879f9e..89d360332c 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -109,23 +109,26 @@ void PPUTranslator::AddBlockInfo(u64 addr) m_block_info.emplace(addr); } -Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* bin, void(*custom)(PPUTranslator*)) +Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t* bin, void(*custom)(PPUTranslator*)) { - m_function = m_func_list[start_addr]; - m_function_type = m_func_types[start_addr]; - m_start_addr = start_addr; - m_end_addr = end_addr; + m_function = m_func_list[info.addr]; + m_function_type = m_func_types[info.addr]; + m_start_addr = info.addr; + m_end_addr = info.addr + info.size; m_blocks.clear(); m_value_usage.clear(); + std::fill(std::begin(m_globals), std::end(m_globals), nullptr); + std::fill(std::begin(m_locals), std::end(m_locals), nullptr); IRBuilder<> builder(BasicBlock::Create(m_context, "__entry", m_function)); m_ir = &builder; /* Create context variables */ - //m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(start_addr)); + //m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(info.addr)); m_thread = &*m_function->getArgumentList().begin(); // Non-volatile registers with special meaning (TODO) + if (info.attr & ppu_attr::uses_r0) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0, ".r0g"); m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".sp"); m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 2, ".rtoc"); m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 13, ".tls"); @@ -202,8 +205,9 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj); - // TODO: only loaded r12 (extended argument for program initialization) - m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 12)), m_gpr[12]); + // TODO: only loaded r0 and r12 (r12 is extended argument for program initialization) + if (!m_g_gpr[0]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0)), m_gpr[0]); + if (!m_g_gpr[12]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 12)), m_gpr[12]); m_jtr = BasicBlock::Create(m_context, "__jtr", m_function); @@ -212,13 +216,13 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* m_ir->CreateBr(start); m_ir->SetInsertPoint(start); - for (m_current_addr = start_addr; m_current_addr < end_addr;) + for (m_current_addr = m_start_addr; m_current_addr < m_end_addr;) { // Preserve current address (m_current_addr may be changed by the decoder) const u64 addr = m_current_addr; // Translate opcode - const u32 op = *(m_bin = bin + (addr - start_addr) / sizeof(u32)); + const u32 op = *(m_bin = bin + (addr - m_start_addr) / sizeof(u32)); (this->*(s_ppu_decoder.decode(op)))({op}); // Calculate next address if necessary @@ -243,7 +247,7 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* // Finalize past-the-end block if (!m_ir->GetInsertBlock()->getTerminator()) { - Call(GetType(), "__end", m_ir->getInt64(end_addr)); + Call(GetType(), "__end", m_ir->getInt64(m_end_addr)); m_ir->CreateUnreachable(); } @@ -256,7 +260,7 @@ Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* else { // Get block entries - const std::vector cases{m_block_info.upper_bound(start_addr), m_block_info.lower_bound(end_addr)}; + const std::vector cases{m_block_info.upper_bound(m_start_addr), m_block_info.lower_bound(m_end_addr)}; const auto _ctr = m_ir->CreateLoad(m_reg_ctr); const auto _default = BasicBlock::Create(m_context, "__jtr.def", m_function); @@ -594,7 +598,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align void PPUTranslator::CompilationError(const std::string& error) { - LOG_ERROR(PPU, "0x%08llx: Error: %s", m_current_addr, error); + LOG_ERROR(PPU, "[0x%08llx] 0x%08llx: Error: %s", m_start_addr, m_current_addr, error); } @@ -3095,7 +3099,7 @@ void PPUTranslator::SRAD(ppu_opcode_t op) const auto res_128 = m_ir->CreateAShr(arg_ext, shift_num); // i128 const auto result = Trunc(res_128); SetGpr(op.ra, result); - SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(arg_ext, m_ir->CreateShl(result, shift_num)))); + SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(arg_ext, m_ir->CreateShl(res_128, shift_num)))); if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); } @@ -3916,7 +3920,7 @@ void PPUTranslator::FCFID(ppu_opcode_t op) void PPUTranslator::UNK(ppu_opcode_t op) { - LOG_WARNING(PPU, "0x%08llx: Unknown/illegal opcode 0x%08x", m_current_addr, op.opcode); + CompilationError(fmt::format("Unknown/illegal opcode 0x%08x", op.opcode)); m_ir->CreateUnreachable(); } diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 354c745b4f..905c54dbab 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -9,6 +9,7 @@ #include #include "../rpcs3/Emu/Cell/PPUOpcodes.h" +#include "../rpcs3/Emu/Cell/PPUAnalyser.h" #ifdef _MSC_VER #pragma warning(push, 0) @@ -448,7 +449,7 @@ public: void AddBlockInfo(u64 addr); // Parses PPU opcodes and translate them into LLVM IR - llvm::Function* TranslateToIR(u64 start_addr, u64 end_addr, be_t* bin, void(*custom)(PPUTranslator*) = nullptr); + llvm::Function* TranslateToIR(const ppu_function& info, be_t* bin, void(*custom)(PPUTranslator*) = nullptr); void MFVSCR(ppu_opcode_t op); void MTVSCR(ppu_opcode_t op); diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index f191aa2dba..81c273d083 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -41,7 +41,7 @@ void _sys_ppu_thread_exit(PPUThread& ppu, u64 errorcode) } // Throw if this syscall was not called directly by the SC instruction (hack) - if (ppu.GPR[11] != 41 || ppu.custom_task) + if (ppu.LR == 0 || ppu.GPR[11] != 41 || ppu.custom_task) { throw cpu_state::exit; } diff --git a/rpcs3/Emu/Cell/lv2/sys_prx.h b/rpcs3/Emu/Cell/lv2/sys_prx.h index f9dc8578c8..ef11ffc1ab 100644 --- a/rpcs3/Emu/Cell/lv2/sys_prx.h +++ b/rpcs3/Emu/Cell/lv2/sys_prx.h @@ -1,5 +1,7 @@ #pragma once +#include "Emu/Cell/PPUAnalyser.h" + namespace vm { using namespace ps3; } // Return codes @@ -79,7 +81,7 @@ struct lv2_prx_t bool is_started = false; std::unordered_map specials; - std::vector> func; + std::vector funcs; vm::ptr argv)> start = vm::null; vm::ptr argv)> stop = vm::null; diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 48fbb8f44a..0d7d720b23 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -136,6 +136,7 @@ public: void ResetInfo() { m_info = {}; + m_cpu_thr_stop = 0; } void SetTLSData(u32 addr, u32 filesz, u32 memsz) diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 54c248e965..7a42722c77 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -97,6 +97,7 @@ + NotUsing @@ -673,4 +674,4 @@ - + \ No newline at end of file diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index c0420a3774..95e9c82e6f 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -881,6 +881,9 @@ Utilities + + Emu\Cell +