1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-21 18:22:33 +01:00

PPU LLVM: Fix HLE function injection

This commit is contained in:
Eladash 2022-08-17 16:53:05 +03:00 committed by Ivan
parent c0e3b86064
commit 1dd1062be1
6 changed files with 110 additions and 51 deletions

View File

@ -996,6 +996,11 @@ bool jit_compiler::check(const std::string& path)
return false;
}
void jit_compiler::update_global_mapping(const std::string& name, u64 addr)
{
m_engine->updateGlobalMapping(name, addr);
}
void jit_compiler::fin()
{
m_engine->finalizeObject();

View File

@ -267,14 +267,14 @@ namespace asmjit
// Build runtime function with asmjit::X86Assembler
template <typename FT, typename Asm = native_asm, typename F>
inline FT build_function_asm(std::string_view name, F&& builder)
inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* custom_runtime = nullptr)
{
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
using namespace asmjit;
auto& rt = get_global_runtime();
auto& rt = custom_runtime ? *custom_runtime : get_global_runtime();
CodeHolder code;
code.init(rt.environment());
@ -362,6 +362,9 @@ public:
// Add object (path to obj file)
void add(const std::string& path);
// Update global mapping for a single value
void update_global_mapping(const std::string& name, u64 addr);
// Check object file
static bool check(const std::string& path);

View File

@ -519,12 +519,14 @@ s32 _spurs::create_lv2_eq(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, vm::ptr<u32
{
if (s32 rc = sys_event_queue_create(ppu, queueId, vm::make_var(attr), SYS_EVENT_QUEUE_LOCAL, size))
{
static_cast<void>(ppu.test_stopped());
return rc;
}
if (_spurs::attach_lv2_eq(ppu, spurs, *queueId, port, 1, true))
{
sys_event_queue_destroy(ppu, *queueId, SYS_EVENT_QUEUE_DESTROY_FORCE);
static_cast<void>(ppu.test_stopped());
}
return CELL_OK;
@ -623,6 +625,7 @@ s32 _spurs::detach_lv2_eq(vm::ptr<CellSpurs> spurs, u8 spuPort, bool spursCreate
void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
{
ensure(ppu_execute<&sys_lwmutex_lock>(ppu, spurs.ptr(&CellSpurs::mutex), 0) == 0);
static_cast<void>(ppu.test_stopped());
while (true)
{
@ -686,6 +689,7 @@ void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
if (spurs->handlerDirty == 0)
{
ensure(ppu_execute<&sys_lwcond_wait>(ppu, spurs.ptr(&CellSpurs::cond), 0) == 0);
static_cast<void>(ppu.test_stopped());
}
spurs->handlerWaiting = 0;
@ -693,6 +697,7 @@ void _spurs::handler_wait_ready(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
// If we reach here then a runnable workload was found
ensure(ppu_execute<&sys_lwmutex_unlock>(ppu, spurs.ptr(&CellSpurs::mutex)) == 0);
static_cast<void>(ppu.test_stopped());
}
void _spurs::handler_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
@ -711,7 +716,10 @@ void _spurs::handler_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
ensure(sys_spu_thread_group_start(ppu, spurs->spuTG) == 0);
if (s32 rc = sys_spu_thread_group_join(ppu, spurs->spuTG, vm::null, vm::null); rc + 0u != CELL_EFAULT)
const s32 rc = sys_spu_thread_group_join(ppu, spurs->spuTG, vm::null, vm::null);
static_cast<void>(ppu.test_stopped());
if (rc + 0u != CELL_EFAULT)
{
if (rc + 0u == CELL_ESTAT)
{
@ -810,6 +818,7 @@ s32 _spurs::wakeup_shutdown_completion_waiter(ppu_thread& ppu, vm::ptr<CellSpurs
{
ensure((wklF->x28 == 2u));
rc = sys_semaphore_post(ppu, static_cast<u32>(wklF->sem), 1);
static_cast<void>(ppu.test_stopped());
}
return rc;
@ -823,6 +832,7 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
while (true)
{
ensure(sys_event_queue_receive(ppu, spurs->eventQueue, vm::null, 0) == 0);
static_cast<void>(ppu.test_stopped());
const u64 event_src = ppu.gpr[4];
const u64 event_data1 = ppu.gpr[5];
@ -854,6 +864,8 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
sys_semaphore_post(ppu, static_cast<u32>(spurs->wklF2[i].sem), 1);
}
}
static_cast<void>(ppu.test_stopped());
}
else
{
@ -883,6 +895,7 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
else if (data0 == 2)
{
ensure(sys_semaphore_post(ppu, static_cast<u32>(spurs->semPrv), 1) == 0);
static_cast<void>(ppu.test_stopped());
}
else if (data0 == 3)
{
@ -2015,7 +2028,7 @@ void _spurs::trace_status_update(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
{
u8 init;
spurs->sysSrvTrace.atomic_op([spurs, &init](CellSpurs::SrvTraceSyncVar& data)
vm::atomic_op(spurs->sysSrvTrace, [spurs, &init](CellSpurs::SrvTraceSyncVar& data)
{
if ((init = data.sysSrvTraceInitialised))
{
@ -2026,8 +2039,9 @@ void _spurs::trace_status_update(ppu_thread& ppu, vm::ptr<CellSpurs> spurs)
if (init)
{
spurs->sysSrvMessage = 0xff;
vm::light_op<true>(spurs->sysSrvMessage, [&](atomic_t<u8>& v){ v.release(0xff); });
ensure(sys_semaphore_wait(ppu, static_cast<u32>(spurs->semPrv), 0) == 0);
static_cast<void>(ppu.test_stopped());
}
}
@ -2457,7 +2471,7 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, vm::ptr<u32>
ensure((res_wkl <= 31));
vm::light_op(spurs->sysSrvMsgUpdateWorkload, [](atomic_t<u8>& v){ v.release(0xff); });
vm::light_op(spurs->sysSrvMessage, [](atomic_t<u8>& v){ v.release(0xff); });
vm::light_op<true>(spurs->sysSrvMessage, [](atomic_t<u8>& v){ v.release(0xff); });
return CELL_OK;
}
@ -2551,7 +2565,7 @@ s32 cellSpursShutdownWorkload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 wid
if (old_state == SPURS_WKL_STATE_SHUTTING_DOWN)
{
vm::light_op(spurs->sysSrvMessage, [&](atomic_t<u8>& v){ v.release(0xff); });
vm::light_op<true>(spurs->sysSrvMessage, [&](atomic_t<u8>& v){ v.release(0xff); });
return CELL_OK;
}
@ -2807,7 +2821,7 @@ s32 cellSpursReadyCountStore(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 wid,
return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
}
vm::light_op(spurs->readyCount(wid), [&](atomic_t<u8>& v)
vm::light_op<true>(spurs->readyCount(wid), [&](atomic_t<u8>& v)
{
v.release(static_cast<u8>(value));
});
@ -3256,6 +3270,7 @@ s32 cellSpursEventFlagSet(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFlag
eventFlag->pendingRecvTaskEvents[ppuWaitSlot] = ppuEvents;
ensure(sys_event_port_send(eventFlag->eventPortId, 0, 0, 0) == 0);
static_cast<void>(ppu.test_stopped());
}
if (pendingRecv)
@ -3325,7 +3340,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFl
bool recv;
s32 rc;
u16 receivedEvents;
eventFlag->ctrl.atomic_op([eventFlag, mask, mode, block, &recv, &rc, &receivedEvents](CellSpursEventFlag::ControlSyncVar& ctrl)
vm::atomic_op(eventFlag->ctrl, [eventFlag, mask, mode, block, &recv, &rc, &receivedEvents](CellSpursEventFlag::ControlSyncVar& ctrl)
{
u16 relevantEvents = ctrl.events & *mask;
if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
@ -3429,6 +3444,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFl
{
// Block till something happens
ensure(sys_event_queue_receive(ppu, eventFlag->eventQueueId, vm::null, 0) == 0);
static_cast<void>(ppu.test_stopped());
s32 i = 0;
if (eventFlag->direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
@ -3437,7 +3453,7 @@ s32 _spurs::event_flag_wait(ppu_thread& ppu, vm::ptr<CellSpursEventFlag> eventFl
}
*mask = eventFlag->pendingRecvTaskEvents[i];
eventFlag->ctrl.atomic_op([](auto& ctrl) { ctrl.ppuPendingRecv = 0; });
vm::atomic_op(eventFlag->ctrl, [](CellSpursEventFlag::ControlSyncVar& ctrl) { ctrl.ppuPendingRecv = 0; });
}
*mask = receivedEvents;

View File

@ -38,8 +38,6 @@ void fmt_class_string<bs_t<ppu_attr>>::format(std::string& out, u64 arg)
format_bitset(out, arg, "[", ",", "]", &fmt_class_string<ppu_attr>::format);
}
u32 ppu_get_far_jump(u32 pc);
void ppu_module::validate(u32 reloc)
{
// Load custom PRX configuration if available
@ -1202,12 +1200,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(iaddr))
{
block.second = _ptr.addr() - block.first - 4;
break;
}
if (type == ppu_itype::UNK)
{
// Invalid blocks will remain empty
@ -1397,11 +1389,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(iaddr))
{
break;
}
if (type == ppu_itype::B || type == ppu_itype::BC)
{
const u32 target = (op.aa ? 0 : iaddr) + (type == ppu_itype::B ? +op.bt24 : +op.bt14);
@ -1476,11 +1463,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
const ppu_opcode_t op{*_ptr++};
const ppu_itype::type type = s_ppu_itype.decode(op.opcode);
if (ppu_get_far_jump(addr))
{
_ptr.set(next);
}
else if (type == ppu_itype::UNK)
if (type == ppu_itype::UNK)
{
break;
}
@ -1692,11 +1675,6 @@ void ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
for (; i_pos < lim; i_pos += 4)
{
if (ppu_get_far_jump(i_pos))
{
continue;
}
const u32 opc = vm::_ref<u32>(i_pos);
switch (auto type = s_ppu_itype.decode(opc))

View File

@ -616,9 +616,13 @@ struct ppu_far_jumps_t
bool link;
bool with_toc;
std::string module_name;
ppu_intrp_func_t func;
};
ppu_far_jumps_t(int) noexcept {}
std::unordered_map<u32, all_info_t> vals;
::jit_runtime rt;
mutable shared_mutex mutex;
@ -679,17 +683,64 @@ struct ppu_far_jumps_t
return {};
}
template <bool Locked = true>
ppu_intrp_func_t gen_jump(u32 pc)
{
[[maybe_unused]] std::conditional_t<Locked, std::lock_guard<shared_mutex>, const shared_mutex&> lock(mutex);
auto it = vals.find(pc);
if (it == vals.end())
{
return nullptr;
}
if (!it->second.func)
{
it->second.func = build_function_asm<ppu_intrp_func_t>("", [&](native_asm& c, auto& args)
{
using namespace asmjit;
#ifdef ARCH_X64
c.mov(args[0], x86::rbp);
c.mov(x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)), pc);
c.jmp(ppu_far_jump);
#else
Label jmp_address = c.newLabel();
Label imm_address = c.newLabel();
c.ldr(args[1].r32(), arm::ptr(imm_address));
c.str(args[1].r32(), arm::Mem(args[0], ::offset32(&ppu_thread::cia)));
c.ldr(args[1], arm::ptr(jmp_address));
c.br(args[1]);
c.align(AlignMode::kCode, 16);
c.bind(jmp_address);
c.embedUInt64(reinterpret_cast<u64>(ppu_far_jump));
c.bind(imm_address);
c.embedUInt32(pc);
#endif
}, &rt);
}
return it->second.func;
}
};
u32 ppu_get_far_jump(u32 pc)
{
g_fxo->init<ppu_far_jumps_t>();
if (!g_fxo->is_init<ppu_far_jumps_t>())
{
return 0;
}
return g_fxo->get<ppu_far_jumps_t>().get_target(pc);
}
static void ppu_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*)
static void ppu_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t<u32>*, ppu_intrp_func*)
{
const u32 cia = g_fxo->get<ppu_far_jumps_t>().get_target(vm::get_addr(this_op), &ppu);
const u32 cia = g_fxo->get<ppu_far_jumps_t>().get_target(ppu.cia, &ppu);
if (!vm::check_addr(cia, vm::page_executable))
{
@ -740,7 +791,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st
return false;
}
g_fxo->init<ppu_far_jumps_t>();
g_fxo->init<ppu_far_jumps_t>(0);
if (!module_name.empty())
{
@ -759,7 +810,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st
std::lock_guard lock(jumps.mutex);
jumps.vals.insert_or_assign(entry, ppu_far_jumps_t::all_info_t{target, link, with_toc, std::move(module_name)});
ppu_register_function_at(entry, 4, &ppu_far_jump);
ppu_register_function_at(entry, 4, g_cfg.core.ppu_decoder == ppu_decoder_type::_static ? &ppu_far_jump : ensure(g_fxo->get<ppu_far_jumps_t>().gen_jump<false>(entry)));
return true;
}
@ -781,7 +832,10 @@ bool ppu_form_branch_to_code(u32 entry, u32 target)
void ppu_remove_hle_instructions(u32 addr, u32 size)
{
g_fxo->init<ppu_far_jumps_t>();
if (Emu.IsStopped() || !g_fxo->is_init<ppu_far_jumps_t>())
{
return;
}
auto& jumps = g_fxo->get<ppu_far_jumps_t>();
@ -3392,6 +3446,19 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
}
}
if (jit)
{
const auto far_jump = ppu_get_far_jump(func.addr) ? g_fxo->get<ppu_far_jumps_t>().gen_jump(func.addr) : nullptr;
if (far_jump)
{
// Replace the function with ppu_far_jump
jit->update_global_mapping(fmt::format("__0x%x", func.addr - reloc), reinterpret_cast<u64>(far_jump));
fpos++;
continue;
}
}
// Copy block or function entry
ppu_function& entry = part.funcs.emplace_back(func);
@ -3713,8 +3780,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
const auto addr = ensure(reinterpret_cast<ppu_intrp_func_t>(jit->get(name)));
jit_mod.funcs.emplace_back(addr);
if (ppu_ref(func.addr) != ppu_far_jump)
ppu_register_function_at(func.addr, 4, addr);
ppu_register_function_at(func.addr, 4, addr);
if (g_cfg.core.ppu_debug)
ppu_log.notice("Installing function %s at 0x%x: %p (reloc = 0x%x)", name, func.addr, ppu_ref(func.addr), reloc);
@ -3733,8 +3799,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
const u64 addr = reinterpret_cast<uptr>(ensure(jit_mod.funcs[index++]));
if (ppu_ref(func.addr) != ppu_far_jump)
ppu_register_function_at(func.addr, 4, addr);
ppu_register_function_at(func.addr, 4, addr);
if (g_cfg.core.ppu_debug)
ppu_log.notice("Reinstalling function at 0x%x: %p (reloc=0x%x)", func.addr, ppu_ref(func.addr), reloc);

View File

@ -236,14 +236,6 @@ Function* PPUTranslator::Translate(const ppu_function& info)
m_rel = nullptr;
}
if (ppu_get_far_jump(m_addr + base))
{
// Branch into an HLEd instruction using the jump table
FlushRegisters();
CallFunction(0, m_reloc ? m_ir->CreateAdd(m_ir->getInt64(m_addr), m_seg0) : m_ir->getInt64(m_addr));
continue;
}
const u32 op = vm::read32(vm::cast(m_addr + base));
(this->*(s_ppu_decoder.decode(op)))({op});