diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 68ab341af5..90449bde54 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -996,6 +996,11 @@ bool jit_compiler::check(const std::string& path) return false; } +void jit_compiler::update_global_mapping(const std::string& name, u64 addr) +{ + m_engine->updateGlobalMapping(name, addr); +} + void jit_compiler::fin() { m_engine->finalizeObject(); diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 33a7317091..3fb5fb2bba 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -267,14 +267,14 @@ namespace asmjit // Build runtime function with asmjit::X86Assembler template -inline FT build_function_asm(std::string_view name, F&& builder) +inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* custom_runtime = nullptr) { #ifdef __APPLE__ pthread_jit_write_protect_np(false); #endif using namespace asmjit; - auto& rt = get_global_runtime(); + auto& rt = custom_runtime ? *custom_runtime : get_global_runtime(); CodeHolder code; code.init(rt.environment()); @@ -362,6 +362,9 @@ public: // Add object (path to obj file) void add(const std::string& path); + // Update global mapping for a single value + void update_global_mapping(const std::string& name, u64 addr); + // Check object file static bool check(const std::string& path); diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 82e6b6aa59..17d6d4792e 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -519,12 +519,14 @@ s32 _spurs::create_lv2_eq(ppu_thread& ppu, vm::ptr spurs, vm::ptr(ppu.test_stopped()); return rc; } if (_spurs::attach_lv2_eq(ppu, spurs, *queueId, port, 1, true)) { sys_event_queue_destroy(ppu, *queueId, SYS_EVENT_QUEUE_DESTROY_FORCE); + static_cast(ppu.test_stopped()); } return CELL_OK; @@ -623,6 +625,7 @@ s32 _spurs::detach_lv2_eq(vm::ptr spurs, u8 spuPort, bool spursCreate void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr spurs) { ensure(ppu_execute<&sys_lwmutex_lock>(ppu, spurs.ptr(&CellSpurs::mutex), 0) == 0); + static_cast(ppu.test_stopped()); while (true) { @@ -686,6 +689,7 @@ void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr spurs) if (spurs->handlerDirty == 0) { ensure(ppu_execute<&sys_lwcond_wait>(ppu, spurs.ptr(&CellSpurs::cond), 0) == 0); + static_cast(ppu.test_stopped()); } spurs->handlerWaiting = 0; @@ -693,6 +697,7 @@ void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr spurs) // If we reach here then a runnable workload was found ensure(ppu_execute<&sys_lwmutex_unlock>(ppu, spurs.ptr(&CellSpurs::mutex)) == 0); + static_cast(ppu.test_stopped()); } void _spurs::handler_entry(ppu_thread& ppu, vm::ptr spurs) @@ -711,7 +716,10 @@ void _spurs::handler_entry(ppu_thread& ppu, vm::ptr spurs) ensure(sys_spu_thread_group_start(ppu, spurs->spuTG) == 0); - if (s32 rc = sys_spu_thread_group_join(ppu, spurs->spuTG, vm::null, vm::null); rc + 0u != CELL_EFAULT) + const s32 rc = sys_spu_thread_group_join(ppu, spurs->spuTG, vm::null, vm::null); + static_cast(ppu.test_stopped()); + + if (rc + 0u != CELL_EFAULT) { if (rc + 0u == CELL_ESTAT) { @@ -810,6 +818,7 @@ s32 _spurs::wakeup_shutdown_completion_waiter(ppu_thread& ppu, vm::ptrx28 == 2u)); rc = sys_semaphore_post(ppu, static_cast(wklF->sem), 1); + static_cast(ppu.test_stopped()); } return rc; @@ -823,6 +832,7 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr spurs) while (true) { ensure(sys_event_queue_receive(ppu, spurs->eventQueue, vm::null, 0) == 0); + static_cast(ppu.test_stopped()); const u64 event_src = ppu.gpr[4]; const u64 event_data1 = ppu.gpr[5]; @@ -854,6 +864,8 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr spurs) sys_semaphore_post(ppu, static_cast(spurs->wklF2[i].sem), 1); } } + + static_cast(ppu.test_stopped()); } else { @@ -883,6 +895,7 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr spurs) else if (data0 == 2) { ensure(sys_semaphore_post(ppu, static_cast(spurs->semPrv), 1) == 0); + static_cast(ppu.test_stopped()); } else if (data0 == 3) { @@ -2015,7 +2028,7 @@ void _spurs::trace_status_update(ppu_thread& ppu, vm::ptr spurs) { u8 init; - spurs->sysSrvTrace.atomic_op([spurs, &init](CellSpurs::SrvTraceSyncVar& data) + vm::atomic_op(spurs->sysSrvTrace, [spurs, &init](CellSpurs::SrvTraceSyncVar& data) { if ((init = data.sysSrvTraceInitialised)) { @@ -2026,8 +2039,9 @@ void _spurs::trace_status_update(ppu_thread& ppu, vm::ptr spurs) if (init) { - spurs->sysSrvMessage = 0xff; + vm::light_op(spurs->sysSrvMessage, [&](atomic_t& v){ v.release(0xff); }); ensure(sys_semaphore_wait(ppu, static_cast(spurs->semPrv), 0) == 0); + static_cast(ppu.test_stopped()); } } @@ -2457,7 +2471,7 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr ensure((res_wkl <= 31)); vm::light_op(spurs->sysSrvMsgUpdateWorkload, [](atomic_t& v){ v.release(0xff); }); - vm::light_op(spurs->sysSrvMessage, [](atomic_t& v){ v.release(0xff); }); + vm::light_op(spurs->sysSrvMessage, [](atomic_t& v){ v.release(0xff); }); return CELL_OK; } @@ -2551,7 +2565,7 @@ s32 cellSpursShutdownWorkload(ppu_thread& ppu, vm::ptr spurs, u32 wid if (old_state == SPURS_WKL_STATE_SHUTTING_DOWN) { - vm::light_op(spurs->sysSrvMessage, [&](atomic_t& v){ v.release(0xff); }); + vm::light_op(spurs->sysSrvMessage, [&](atomic_t& v){ v.release(0xff); }); return CELL_OK; } @@ -2807,7 +2821,7 @@ s32 cellSpursReadyCountStore(ppu_thread& ppu, vm::ptr spurs, u32 wid, return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - vm::light_op(spurs->readyCount(wid), [&](atomic_t& v) + vm::light_op(spurs->readyCount(wid), [&](atomic_t& v) { v.release(static_cast(value)); }); @@ -3256,6 +3270,7 @@ s32 cellSpursEventFlagSet(ppu_thread& ppu, vm::ptr eventFlag eventFlag->pendingRecvTaskEvents[ppuWaitSlot] = ppuEvents; ensure(sys_event_port_send(eventFlag->eventPortId, 0, 0, 0) == 0); + static_cast(ppu.test_stopped()); } if (pendingRecv) @@ -3325,7 +3340,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr eventFl bool recv; s32 rc; u16 receivedEvents; - eventFlag->ctrl.atomic_op([eventFlag, mask, mode, block, &recv, &rc, &receivedEvents](CellSpursEventFlag::ControlSyncVar& ctrl) + vm::atomic_op(eventFlag->ctrl, [eventFlag, mask, mode, block, &recv, &rc, &receivedEvents](CellSpursEventFlag::ControlSyncVar& ctrl) { u16 relevantEvents = ctrl.events & *mask; if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) @@ -3429,6 +3444,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr eventFl { // Block till something happens ensure(sys_event_queue_receive(ppu, eventFlag->eventQueueId, vm::null, 0) == 0); + static_cast(ppu.test_stopped()); s32 i = 0; if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) @@ -3437,7 +3453,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr eventFl } *mask = eventFlag->pendingRecvTaskEvents[i]; - eventFlag->ctrl.atomic_op([](auto& ctrl) { ctrl.ppuPendingRecv = 0; }); + vm::atomic_op(eventFlag->ctrl, [](CellSpursEventFlag::ControlSyncVar& ctrl) { ctrl.ppuPendingRecv = 0; }); } *mask = receivedEvents; diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index aa2ac67ae4..bdcd9dee00 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -38,8 +38,6 @@ void fmt_class_string>::format(std::string& out, u64 arg) format_bitset(out, arg, "[", ",", "]", &fmt_class_string::format); } -u32 ppu_get_far_jump(u32 pc); - void ppu_module::validate(u32 reloc) { // Load custom PRX configuration if available @@ -1202,12 +1200,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b const ppu_opcode_t op{*_ptr++}; const ppu_itype::type type = s_ppu_itype.decode(op.opcode); - if (ppu_get_far_jump(iaddr)) - { - block.second = _ptr.addr() - block.first - 4; - break; - } - if (type == ppu_itype::UNK) { // Invalid blocks will remain empty @@ -1397,11 +1389,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b const ppu_opcode_t op{*_ptr++}; const ppu_itype::type type = s_ppu_itype.decode(op.opcode); - if (ppu_get_far_jump(iaddr)) - { - break; - } - if (type == ppu_itype::B || type == ppu_itype::BC) { const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14); @@ -1476,11 +1463,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b const ppu_opcode_t op{*_ptr++}; const ppu_itype::type type = s_ppu_itype.decode(op.opcode); - if (ppu_get_far_jump(addr)) - { - _ptr.set(next); - } - else if (type == ppu_itype::UNK) + if (type == ppu_itype::UNK) { break; } @@ -1692,11 +1675,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b for (; i_pos < lim; i_pos += 4) { - if (ppu_get_far_jump(i_pos)) - { - continue; - } - const u32 opc = vm::_ref(i_pos); switch (auto type = s_ppu_itype.decode(opc)) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 7c7fe23455..a8b3ed69a0 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -616,9 +616,13 @@ struct ppu_far_jumps_t bool link; bool with_toc; std::string module_name; + ppu_intrp_func_t func; }; + ppu_far_jumps_t(int) noexcept {} + std::unordered_map vals; + ::jit_runtime rt; mutable shared_mutex mutex; @@ -679,17 +683,64 @@ struct ppu_far_jumps_t return {}; } + + template + ppu_intrp_func_t gen_jump(u32 pc) + { + [[maybe_unused]] std::conditional_t, const shared_mutex&> lock(mutex); + + auto it = vals.find(pc); + + if (it == vals.end()) + { + return nullptr; + } + + if (!it->second.func) + { + it->second.func = build_function_asm("", [&](native_asm& c, auto& args) + { + using namespace asmjit; + +#ifdef ARCH_X64 + c.mov(args[0], x86::rbp); + c.mov(x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)), pc); + c.jmp(ppu_far_jump); +#else + Label jmp_address = c.newLabel(); + Label imm_address = c.newLabel(); + + c.ldr(args[1].r32(), arm::ptr(imm_address)); + c.str(args[1].r32(), arm::Mem(args[0], ::offset32(&ppu_thread::cia))); + c.ldr(args[1], arm::ptr(jmp_address)); + c.br(args[1]); + + c.align(AlignMode::kCode, 16); + c.bind(jmp_address); + c.embedUInt64(reinterpret_cast(ppu_far_jump)); + c.bind(imm_address); + c.embedUInt32(pc); +#endif + }, &rt); + } + + return it->second.func; + } }; u32 ppu_get_far_jump(u32 pc) { - g_fxo->init(); + if (!g_fxo->is_init()) + { + return 0; + } + return g_fxo->get().get_target(pc); } -static void ppu_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t* this_op, ppu_intrp_func*) +static void ppu_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t*, ppu_intrp_func*) { - const u32 cia = g_fxo->get().get_target(vm::get_addr(this_op), &ppu); + const u32 cia = g_fxo->get().get_target(ppu.cia, &ppu); if (!vm::check_addr(cia, vm::page_executable)) { @@ -740,7 +791,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st return false; } - g_fxo->init(); + g_fxo->init(0); if (!module_name.empty()) { @@ -759,7 +810,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st std::lock_guard lock(jumps.mutex); jumps.vals.insert_or_assign(entry, ppu_far_jumps_t::all_info_t{target, link, with_toc, std::move(module_name)}); - ppu_register_function_at(entry, 4, &ppu_far_jump); + ppu_register_function_at(entry, 4, g_cfg.core.ppu_decoder == ppu_decoder_type::_static ? &ppu_far_jump : ensure(g_fxo->get().gen_jump(entry))); return true; } @@ -781,7 +832,10 @@ bool ppu_form_branch_to_code(u32 entry, u32 target) void ppu_remove_hle_instructions(u32 addr, u32 size) { - g_fxo->init(); + if (Emu.IsStopped() || !g_fxo->is_init()) + { + return; + } auto& jumps = g_fxo->get(); @@ -3392,6 +3446,19 @@ bool ppu_initialize(const ppu_module& info, bool check_only) } } + if (jit) + { + const auto far_jump = ppu_get_far_jump(func.addr) ? g_fxo->get().gen_jump(func.addr) : nullptr; + + if (far_jump) + { + // Replace the function with ppu_far_jump + jit->update_global_mapping(fmt::format("__0x%x", func.addr - reloc), reinterpret_cast(far_jump)); + fpos++; + continue; + } + } + // Copy block or function entry ppu_function& entry = part.funcs.emplace_back(func); @@ -3713,8 +3780,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) const auto addr = ensure(reinterpret_cast(jit->get(name))); jit_mod.funcs.emplace_back(addr); - if (ppu_ref(func.addr) != ppu_far_jump) - ppu_register_function_at(func.addr, 4, addr); + ppu_register_function_at(func.addr, 4, addr); if (g_cfg.core.ppu_debug) ppu_log.notice("Installing function %s at 0x%x: %p (reloc = 0x%x)", name, func.addr, ppu_ref(func.addr), reloc); @@ -3733,8 +3799,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) const u64 addr = reinterpret_cast(ensure(jit_mod.funcs[index++])); - if (ppu_ref(func.addr) != ppu_far_jump) - ppu_register_function_at(func.addr, 4, addr); + ppu_register_function_at(func.addr, 4, addr); if (g_cfg.core.ppu_debug) ppu_log.notice("Reinstalling function at 0x%x: %p (reloc=0x%x)", func.addr, ppu_ref(func.addr), reloc); diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 50de63ec9a..e525b0e2fa 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -236,14 +236,6 @@ Function* PPUTranslator::Translate(const ppu_function& info) m_rel = nullptr; } - if (ppu_get_far_jump(m_addr + base)) - { - // Branch into an HLEd instruction using the jump table - FlushRegisters(); - CallFunction(0, m_reloc ? m_ir->CreateAdd(m_ir->getInt64(m_addr), m_seg0) : m_ir->getInt64(m_addr)); - continue; - } - const u32 op = vm::read32(vm::cast(m_addr + base)); (this->*(s_ppu_decoder.decode(op)))({op});