From 37212a632c3f9389449ed3e8462847f31ce8c955 Mon Sep 17 00:00:00 2001 From: Eladash Date: Tue, 29 Aug 2023 14:50:50 +0300 Subject: [PATCH] SPU: Refactor function discovery --- rpcs3/Emu/Cell/RawSPUThread.cpp | 2 +- rpcs3/Emu/Cell/SPURecompiler.cpp | 19 ++++++++--------- rpcs3/Emu/Cell/SPUThread.cpp | 36 ++++++++++++++++++++------------ rpcs3/Emu/Cell/SPUThread.h | 4 ++-- 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index fa27229ca9..b322ab017f 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -383,7 +383,7 @@ void spu_load_exec(const spu_exec_object& elf) spu->status_npc = {SPU_STATUS_RUNNING, elf.header.e_entry}; atomic_storage::release(spu->pc, elf.header.e_entry); - const auto funcs = spu->discover_functions(spu->ls, umax); + const auto funcs = spu->discover_functions(0, { spu->ls , SPU_LS_SIZE }, true, umax); for (u32 addr : funcs) { diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 45e08df319..1441ed91d0 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -561,10 +561,7 @@ extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 s spu_section_data::data_t obj{vaddr, std::move(data)}; - std::vector ls_data(SPU_LS_SIZE); - std::memcpy(ls_data.data() + vaddr, ls_data_vaddr, size); - - obj.funcs = spu_thread::discover_functions(ls_data.data(), umax); + obj.funcs = spu_thread::discover_functions(vaddr, { reinterpret_cast(ls_data_vaddr), size }, true, umax); if (obj.funcs.empty()) { @@ -2111,7 +2108,7 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/ } } -std::vector spu_thread::discover_functions(const void* ls_start, u32 /*entry*/) +std::vector spu_thread::discover_functions(u32 base_addr, std::span ls, bool is_known_addr, u32 /*entry*/) { std::vector calls; calls.reserve(100); @@ -2119,14 +2116,16 @@ std::vector spu_thread::discover_functions(const void* ls_start, u32 /*entr // Discover functions // Use the most simple method: search for instructions that calls them // And then filter invalid cases (does not detect tail calls) - for (u32 i = 0x10; i < SPU_LS_SIZE; i += 0x10) + const v128 brasl_mask = is_known_addr ? v128::from32p(0x62) : v128::from32p(umax); + + for (u32 i = utils::align(base_addr, 0x10); i < std::min(base_addr + ls.size(), 0x3FFF0); i += 0x10) { // Search for BRSL and BRASL // TODO: BISL - const v128 inst = read_from_ptr>(static_cast(ls_start), i); + const v128 inst = read_from_ptr>(ls.data(), i - base_addr); const v128 shifted = gv_shr32(inst, 23); const v128 eq_brsl = gv_eq32(shifted, v128::from32p(0x66)); - const v128 eq_brasl = gv_eq32(shifted, v128::from32p(0x62)); + const v128 eq_brasl = gv_eq32(shifted, brasl_mask); const v128 result = eq_brsl | eq_brasl; if (!gv_testz(result)) @@ -2144,14 +2143,14 @@ std::vector spu_thread::discover_functions(const void* ls_start, u32 /*entr calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller) { // Check the validity of both the callee code and the following caller code - return !is_exec_code(caller, ls_start) || !is_exec_code(caller + 4, ls_start); + return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr); }), calls.end()); std::vector addrs; for (u32 addr : calls) { - const spu_opcode_t op{read_from_ptr>(static_cast(ls_start), addr)}; + const spu_opcode_t op{read_from_ptr>(ls, addr - base_addr)}; const u32 func = op_branch_targets(addr, op)[0]; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index e5648f9cd0..45d3f87234 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1196,7 +1196,7 @@ void spu_thread::dump_regs(std::string& ret, std::any& /*custom_data*/) const } } - if (i3 >= 0x80 && is_exec_code(i3, ls)) + if (i3 >= 0x80 && is_exec_code(i3, { ls, SPU_LS_SIZE })) { dis_asm.disasm(i3); fmt::append(ret, " -> %s", dis_asm.last_opcode); @@ -1300,7 +1300,7 @@ std::vector> spu_thread::dump_callstack_list() const return true; } - return !addr || !is_exec_code(addr, ls); + return !addr || !is_exec_code(addr, { ls, SPU_LS_SIZE }); }; if (first && lr._u32[3] != gpr0._u32[3] && !is_invalid(gpr0)) @@ -4019,17 +4019,22 @@ bool spu_thread::check_mfc_interrupts(u32 next_pc) return false; } -bool spu_thread::is_exec_code(u32 addr, const void* ls_ptr) +bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_addr) { - if (addr & ~0x3FFFC) - { - return false; - } - for (u32 i = 0; i < 30; i++) { + if (addr & ~0x3FFFC) + { + return false; + } + + if (addr < base_addr || addr >= base_addr + ls_ptr.size()) + { + return false; + } + const u32 addr0 = spu_branch_target(addr); - const u32 op = read_from_ptr>(static_cast(ls_ptr) + addr0); + const u32 op = read_from_ptr>(ls_ptr, addr0 - base_addr); const auto type = s_spu_itype.decode(op); if (type == spu_itype::UNK || !op) @@ -4055,9 +4060,14 @@ bool spu_thread::is_exec_code(u32 addr, const void* ls_ptr) continue; } + if (route_pc < base_addr || route_pc >= base_addr + ls_ptr.size()) + { + return false; + } + // Test the validity of a single instruction of the optional target // This function can't be too slow and is unlikely to improve results by a great deal - const u32 op0 = read_from_ptr>(static_cast(ls_ptr) + route_pc); + const u32 op0 = read_from_ptr>(ls_ptr, route_pc - base_addr); const auto type0 = s_spu_itype.decode(op); if (type == spu_itype::UNK || !op) @@ -6151,12 +6161,12 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span>(all_data.data(), pc0 - 4); + const u32 op = read_from_ptr>(all_data, pc0 - 4); // Try to find function entry (if they are placed sequentially search for BI $LR of previous function) if (!op || op == 0x35000000u || s_spu_itype.decode(op) == spu_itype::UNK) { - if (is_exec_code(pc0, all_data.data())) + if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE })) break; } } @@ -6166,7 +6176,7 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span discover_functions(const void* ls_start, u32 /*entry*/); + static bool is_exec_code(u32 addr, std::span ls_ptr, u32 base_addr = 0); // Only a hint, do not rely on it other than debugging purposes + static std::vector discover_functions(u32 base_addr, std::span ls, bool is_known_addr, u32 /*entry*/); u32 get_ch_count(u32 ch); s64 get_ch_value(u32 ch); bool set_ch_value(u32 ch, u32 value);