mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 12:12:50 +01:00
PPU LLVM: upgrade to GHC call conv
Get rid of some global variables. Implement ppu_escape (unused yet). Bump PPU cache version to v4.
This commit is contained in:
parent
aeeceb7d0b
commit
0c034ad7de
@ -1,5 +1,6 @@
|
||||
#include "stdafx.h"
|
||||
#include "PPUFunction.h"
|
||||
#include "Utilities/JIT.h"
|
||||
|
||||
#include "PPUModule.h"
|
||||
|
||||
@ -1888,7 +1889,7 @@ extern std::string ppu_get_variable_name(const std::string& _module, u32 vnid)
|
||||
return fmt::format("0x%08X", vnid);
|
||||
}
|
||||
|
||||
std::vector<ppu_function_t>& ppu_function_manager::access()
|
||||
std::vector<ppu_function_t>& ppu_function_manager::access(bool ghc)
|
||||
{
|
||||
static std::vector<ppu_function_t> list
|
||||
{
|
||||
@ -1907,15 +1908,43 @@ std::vector<ppu_function_t>& ppu_function_manager::access()
|
||||
},
|
||||
};
|
||||
|
||||
return list;
|
||||
static std::vector<ppu_function_t> list_ghc
|
||||
{
|
||||
build_function_asm<ppu_function_t>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.jmp(imm_ptr(list[0]));
|
||||
}),
|
||||
build_function_asm<ppu_function_t>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.jmp(imm_ptr(list[1]));
|
||||
}),
|
||||
};
|
||||
|
||||
return ghc ? list_ghc : list;
|
||||
}
|
||||
|
||||
u32 ppu_function_manager::add_function(ppu_function_t function)
|
||||
{
|
||||
auto& list = access();
|
||||
auto& list2 = access(true);
|
||||
|
||||
list.push_back(function);
|
||||
|
||||
// Generate trampoline
|
||||
list2.push_back(build_function_asm<ppu_function_t>([&](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.jmp(imm_ptr(function));
|
||||
}));
|
||||
|
||||
return ::size32(list) - 1;
|
||||
}
|
||||
|
||||
|
@ -256,7 +256,7 @@ class ppu_function_manager
|
||||
};
|
||||
|
||||
// Access global function list
|
||||
static std::vector<ppu_function_t>& access();
|
||||
static std::vector<ppu_function_t>& access(bool ghc = false);
|
||||
|
||||
static u32 add_function(ppu_function_t function);
|
||||
|
||||
@ -276,9 +276,9 @@ public:
|
||||
}
|
||||
|
||||
// Read all registered functions
|
||||
static inline const auto& get()
|
||||
static inline const auto& get(bool llvm = false)
|
||||
{
|
||||
return access();
|
||||
return access(llvm);
|
||||
}
|
||||
|
||||
static inline u32 func_addr(u32 index)
|
||||
|
@ -244,7 +244,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link)
|
||||
};
|
||||
|
||||
// Initialize double-purpose fake OPD array for HLE functions
|
||||
const auto& hle_funcs = ppu_function_manager::get();
|
||||
const auto& hle_funcs = ppu_function_manager::get(g_cfg.core.ppu_decoder == ppu_decoder_type::llvm);
|
||||
|
||||
// Allocate memory for the array (must be called after fixed allocations)
|
||||
ppu_function_manager::addr = vm::alloc(::size32(hle_funcs) * 8, vm::main);
|
||||
|
@ -130,6 +130,128 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op);
|
||||
|
||||
extern void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
||||
|
||||
const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
|
||||
using namespace asmjit;
|
||||
|
||||
#ifdef _WIN32
|
||||
c.push(x86::r15);
|
||||
c.push(x86::r14);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r12);
|
||||
c.push(x86::rsi);
|
||||
c.push(x86::rdi);
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::rbx);
|
||||
c.sub(x86::rsp, 0xa8);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x90), x86::xmm15);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x80), x86::xmm14);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x70), x86::xmm13);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x60), x86::xmm12);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x50), x86::xmm11);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x40), x86::xmm10);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x30), x86::xmm9);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x20), x86::xmm8);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0x10), x86::xmm7);
|
||||
c.movaps(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
||||
#else
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r15);
|
||||
c.push(x86::r14);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r12);
|
||||
c.push(x86::rbx);
|
||||
c.push(x86::rax);
|
||||
#endif
|
||||
|
||||
// Save native stack pointer for longjmp emulation
|
||||
c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::saved_native_sp)), x86::rsp);
|
||||
|
||||
// Initialize args
|
||||
c.mov(x86::r13, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.mov(x86::rbp, args[0]);
|
||||
c.mov(x86::edx, x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia))); // Load PC
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::edx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 17);
|
||||
c.shr(x86::rax, 17);
|
||||
c.shr(x86::rdx, 47);
|
||||
c.shl(x86::rdx, 12);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
|
||||
c.mov(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.mov(x86::r14, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 0))); // Load some registers
|
||||
c.mov(x86::rsi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 1)));
|
||||
c.mov(x86::rdi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 2)));
|
||||
|
||||
if (utils::has_avx())
|
||||
{
|
||||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.call(x86::rax);
|
||||
|
||||
if (utils::has_avx())
|
||||
{
|
||||
c.vzeroupper();
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
c.movaps(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
||||
c.movaps(x86::xmm7, x86::oword_ptr(x86::rsp, 0x10));
|
||||
c.movaps(x86::xmm8, x86::oword_ptr(x86::rsp, 0x20));
|
||||
c.movaps(x86::xmm9, x86::oword_ptr(x86::rsp, 0x30));
|
||||
c.movaps(x86::xmm10, x86::oword_ptr(x86::rsp, 0x40));
|
||||
c.movaps(x86::xmm11, x86::oword_ptr(x86::rsp, 0x50));
|
||||
c.movaps(x86::xmm12, x86::oword_ptr(x86::rsp, 0x60));
|
||||
c.movaps(x86::xmm13, x86::oword_ptr(x86::rsp, 0x70));
|
||||
c.movaps(x86::xmm14, x86::oword_ptr(x86::rsp, 0x80));
|
||||
c.movaps(x86::xmm15, x86::oword_ptr(x86::rsp, 0x90));
|
||||
c.add(x86::rsp, 0xa8);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::rbp);
|
||||
c.pop(x86::rdi);
|
||||
c.pop(x86::rsi);
|
||||
c.pop(x86::r12);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::r14);
|
||||
c.pop(x86::r15);
|
||||
#else
|
||||
c.add(x86::rsp, +8);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::r12);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::r14);
|
||||
c.pop(x86::r15);
|
||||
c.pop(x86::rbp);
|
||||
#endif
|
||||
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
// Restore native stack pointer (longjmp emulation)
|
||||
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&ppu_thread::saved_native_sp)));
|
||||
|
||||
// Return to the return location
|
||||
c.jmp(x86::qword_ptr(x86::rsp, -8));
|
||||
});
|
||||
|
||||
void ppu_recompiler_fallback(ppu_thread& ppu);
|
||||
|
||||
const auto ppu_recompiler_fallback_ghc = build_function_asm<void(*)(ppu_thread& ppu)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.jmp(imm_ptr(ppu_recompiler_fallback));
|
||||
});
|
||||
|
||||
// Get pointer to executable cache
|
||||
static u64& ppu_ref(u32 addr)
|
||||
{
|
||||
@ -174,26 +296,33 @@ void ppu_recompiler_fallback(ppu_thread& ppu)
|
||||
|
||||
const auto& table = g_ppu_interpreter_fast.get_table();
|
||||
|
||||
u64 ctr = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
// Run instructions in interpreter
|
||||
if (const u32 op = vm::read32(ppu.cia); table[ppu_decode(op)](ppu, {op})) [[likely]]
|
||||
if (const u32 op = vm::read32(ppu.cia); ctr++, table[ppu_decode(op)](ppu, {op})) [[likely]]
|
||||
{
|
||||
ppu.cia += 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uptr func = ppu_ref(ppu.cia); func != reinterpret_cast<uptr>(ppu_recompiler_fallback))
|
||||
if (uptr func = ppu_ref(ppu.cia); (func << 17 >> 17) != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
{
|
||||
// We found a recompiler function at cia, return
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ppu.test_stopped())
|
||||
{
|
||||
return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
{
|
||||
ppu_log.warning("Exiting interpreter at 0x%x (executed %u functions)", ppu.cia, ctr);
|
||||
}
|
||||
}
|
||||
|
||||
void ppu_reservation_fallback(ppu_thread& ppu)
|
||||
@ -262,12 +391,23 @@ extern void ppu_register_range(u32 addr, u32 size)
|
||||
utils::memory_commit(&ppu_ref(addr), size * 2, utils::protection::rw);
|
||||
vm::page_protect(addr, utils::align(size, 0x10000), 0, vm::page_executable);
|
||||
|
||||
const u64 fallback = g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? reinterpret_cast<uptr>(ppu_recompiler_fallback) : reinterpret_cast<uptr>(ppu_fallback);
|
||||
const u64 fallback = reinterpret_cast<uptr>(ppu_fallback);
|
||||
const u64 seg_base = addr;
|
||||
|
||||
size &= ~3; // Loop assumes `size = n * 4`, enforce that by rounding down
|
||||
|
||||
while (size)
|
||||
{
|
||||
ppu_ref(addr) = fallback;
|
||||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||
{
|
||||
// Assume addr is the start of first segment of PRX
|
||||
ppu_ref(addr) = reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc) | (seg_base << (32 + 3));
|
||||
}
|
||||
else
|
||||
{
|
||||
ppu_ref(addr) = fallback;
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
size -= 4;
|
||||
}
|
||||
@ -278,7 +418,7 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr)
|
||||
// Initialize specific function
|
||||
if (ptr)
|
||||
{
|
||||
ppu_ref(addr) = reinterpret_cast<uptr>(ptr);
|
||||
ppu_ref(addr) = (reinterpret_cast<uptr>(ptr) & 0x7fff'ffff'ffffu) | (ppu_ref(addr) & ~0x7fff'ffff'ffffu);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -833,9 +973,15 @@ void ppu_thread::exec_task()
|
||||
{
|
||||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||
{
|
||||
while (!(state & (cpu_flag::ret + cpu_flag::exit + cpu_flag::stop)))
|
||||
while (true)
|
||||
{
|
||||
reinterpret_cast<ppu_function_t>(ppu_ref(cia))(*this);
|
||||
if (state) [[unlikely]]
|
||||
{
|
||||
if (check_state())
|
||||
break;
|
||||
}
|
||||
|
||||
ppu_gateway(this);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -1927,9 +2073,9 @@ namespace
|
||||
// Compiled PPU module info
|
||||
struct jit_module
|
||||
{
|
||||
std::vector<u64*> vars;
|
||||
std::vector<ppu_function_t> funcs;
|
||||
std::shared_ptr<jit_compiler> pjit;
|
||||
bool init = false;
|
||||
};
|
||||
|
||||
struct jit_module_manager
|
||||
@ -2554,9 +2700,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
// Compiler instance (deferred initialization)
|
||||
std::shared_ptr<jit_compiler>& jit = jit_mod.pjit;
|
||||
|
||||
// Global variables to initialize
|
||||
std::vector<std::pair<std::string, u64>> globals;
|
||||
|
||||
// Split module into fragments <= 1 MiB
|
||||
usz fpos = 0;
|
||||
|
||||
@ -2574,7 +2717,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
|
||||
bool compiled_new = false;
|
||||
|
||||
while (jit_mod.vars.empty() && fpos < info.funcs.size())
|
||||
while (!jit_mod.init && fpos < info.funcs.size())
|
||||
{
|
||||
// Initialize compiler instance
|
||||
if (!jit && get_current_cpu_thread())
|
||||
@ -2582,17 +2725,11 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
jit = std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu);
|
||||
}
|
||||
|
||||
// First function in current module part
|
||||
const auto fstart = fpos;
|
||||
|
||||
// Copy module information (TODO: optimize)
|
||||
ppu_module part;
|
||||
part.copy_part(info);
|
||||
part.funcs.reserve(16000);
|
||||
|
||||
// Unique suffix for each module part
|
||||
const u32 suffix = info.funcs.at(fstart).addr - reloc;
|
||||
|
||||
// Overall block size in bytes
|
||||
usz bsize = 0;
|
||||
usz bcount = 0;
|
||||
@ -2761,7 +2898,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
settings += ppu_settings::greedy_mode;
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
fmt::append(obj_name, "v3-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
fmt::append(obj_name, "v4-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
}
|
||||
|
||||
if (Emu.IsStopped())
|
||||
@ -2771,16 +2908,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
|
||||
if (!check_only)
|
||||
{
|
||||
globals.emplace_back(fmt::format("__mptr%x", suffix), reinterpret_cast<u64>(vm::g_base_addr));
|
||||
globals.emplace_back(fmt::format("__cptr%x", suffix), reinterpret_cast<u64>(vm::g_exec_addr));
|
||||
|
||||
// Initialize segments for relocations
|
||||
for (u32 i = 0, num = 0; i < info.segs.size(); i++)
|
||||
{
|
||||
if (!info.segs[i].addr) continue;
|
||||
globals.emplace_back(fmt::format("__seg%u_%x", num++, suffix), info.segs[i].addr);
|
||||
}
|
||||
|
||||
link_workload.emplace_back(obj_name, false);
|
||||
}
|
||||
|
||||
@ -2894,7 +3021,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
}
|
||||
|
||||
// Jit can be null if the loop doesn't ever enter.
|
||||
if (jit && jit_mod.vars.empty())
|
||||
if (jit && !jit_mod.init)
|
||||
{
|
||||
jit->fin();
|
||||
|
||||
@ -2903,23 +3030,16 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
{
|
||||
if (!func.size) continue;
|
||||
|
||||
const u64 addr = ensure(jit->get(fmt::format("__0x%x", func.addr - reloc)));
|
||||
const auto name = fmt::format("__0x%x", func.addr - reloc);
|
||||
const u64 addr = ensure(jit->get(name));
|
||||
jit_mod.funcs.emplace_back(reinterpret_cast<ppu_function_t>(addr));
|
||||
ppu_ref(func.addr) = addr;
|
||||
ppu_ref(func.addr) = (addr & 0x7fff'ffff'ffffu) | (ppu_ref(func.addr) & ~0x7fff'ffff'ffffu);
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
ppu_log.notice("Installing function %s at 0x%x: %p (reloc = 0x%x)", name, func.addr, ppu_ref(func.addr), reloc);
|
||||
}
|
||||
|
||||
// Initialize global variables
|
||||
for (auto& var : globals)
|
||||
{
|
||||
const u64 addr = ensure(jit->get(var.first));
|
||||
|
||||
jit_mod.vars.emplace_back(reinterpret_cast<u64*>(addr));
|
||||
|
||||
if (addr)
|
||||
{
|
||||
*reinterpret_cast<u64*>(addr) = var.second;
|
||||
}
|
||||
}
|
||||
jit_mod.init = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2930,23 +3050,14 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
{
|
||||
if (!func.size) continue;
|
||||
|
||||
ppu_ref(func.addr) = ensure(reinterpret_cast<uptr>(jit_mod.funcs[index++]));
|
||||
const u64 addr = ensure(reinterpret_cast<uptr>(jit_mod.funcs[index++]));
|
||||
ppu_ref(func.addr) = (addr & 0x7fff'ffff'ffffu) | (ppu_ref(func.addr) & ~0x7fff'ffff'ffffu);
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
ppu_log.notice("Reinstalling function at 0x%x: %p (reloc=0x%x)", func.addr, ppu_ref(func.addr), reloc);
|
||||
}
|
||||
|
||||
index = 0;
|
||||
|
||||
// Rewrite global variables
|
||||
while (index < jit_mod.vars.size())
|
||||
{
|
||||
*jit_mod.vars[index++] = reinterpret_cast<u64>(vm::g_base_addr);
|
||||
*jit_mod.vars[index++] = reinterpret_cast<u64>(vm::g_exec_addr);
|
||||
|
||||
for (const auto& seg : info.segs)
|
||||
{
|
||||
if (!seg.addr) continue;
|
||||
*jit_mod.vars[index++] = seg.addr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return compiled_new;
|
||||
@ -2971,8 +3082,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||
PPUTranslator translator(jit.get_context(), _module.get(), module_part, jit.get_engine());
|
||||
|
||||
// Define some types
|
||||
const auto _void = Type::getVoidTy(jit.get_context());
|
||||
const auto _func = FunctionType::get(_void, {translator.GetContextType()->getPointerTo()}, false);
|
||||
const auto _func = FunctionType::get(translator.get_type<void>(), {
|
||||
translator.get_type<u8*>(), // Exec base
|
||||
translator.GetContextType()->getPointerTo(), // PPU context
|
||||
translator.get_type<u64>(), // Segment address (for PRX)
|
||||
translator.get_type<u8*>(), // Memory base
|
||||
translator.get_type<u64>(), // r0
|
||||
translator.get_type<u64>(), // r1
|
||||
translator.get_type<u64>(), // r2
|
||||
}, false);
|
||||
|
||||
// Initialize function list
|
||||
for (const auto& func : module_part.funcs)
|
||||
@ -2980,7 +3098,8 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||
if (func.size)
|
||||
{
|
||||
const auto f = cast<Function>(_module->getOrInsertFunction(func.name, _func).getCallee());
|
||||
f->addAttribute(1, Attribute::NoAlias);
|
||||
f->setCallingConv(CallingConv::GHC);
|
||||
f->addAttribute(2, Attribute::NoAlias);
|
||||
f->addFnAttr(Attribute::NoUnwind);
|
||||
}
|
||||
}
|
||||
|
@ -267,6 +267,8 @@ public:
|
||||
// Thread name
|
||||
atomic_ptr<std::string> ppu_tname;
|
||||
|
||||
u64 saved_native_sp = 0; // Host thread's stack pointer for emulated longjmp
|
||||
|
||||
u64 last_ftsc = 0;
|
||||
u64 last_ftime = 0;
|
||||
u32 last_faddr = 0;
|
||||
|
@ -26,14 +26,6 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||
// Bind context
|
||||
cpu_translator::initialize(context, engine);
|
||||
|
||||
// There is no weak linkage on JIT, so let's create variables with different names for each module part
|
||||
const u32 gsuffix = m_info.relocs.empty() ? info.funcs[0].addr : info.funcs[0].addr - m_info.segs[0].addr;
|
||||
|
||||
// Memory base
|
||||
m_base = new GlobalVariable(*_module, ArrayType::get(GetType<char>(), 0x100000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__mptr%x", gsuffix));
|
||||
m_base->setInitializer(ConstantPointerNull::get(cast<PointerType>(m_base->getType()->getPointerElementType())));
|
||||
m_base->setExternallyInitialized(true);
|
||||
|
||||
// Thread context struct (TODO: safer member access)
|
||||
const u32 off0 = offset32(&ppu_thread::state);
|
||||
const u32 off1 = offset32(&ppu_thread::gpr);
|
||||
@ -56,11 +48,6 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||
|
||||
m_thread_type = StructType::create(m_context, thread_struct, "context_t");
|
||||
|
||||
// Callable
|
||||
m_call = new GlobalVariable(*_module, ArrayType::get(GetType<u64>(), 0x40000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__cptr%x", gsuffix));
|
||||
m_call->setInitializer(ConstantPointerNull::get(cast<PointerType>(m_call->getType()->getPointerElementType())));
|
||||
m_call->setExternallyInitialized(true);
|
||||
|
||||
const auto md_name = MDString::get(m_context, "branch_weights");
|
||||
const auto md_low = ValueAsMetadata::get(ConstantInt::get(GetType<u32>(), 1));
|
||||
const auto md_high = ValueAsMetadata::get(ConstantInt::get(GetType<u32>(), 666));
|
||||
@ -69,16 +56,6 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||
m_md_likely = MDTuple::get(m_context, {md_name, md_high, md_low});
|
||||
m_md_unlikely = MDTuple::get(m_context, {md_name, md_low, md_high});
|
||||
|
||||
// Create segment variables
|
||||
for (const auto& seg : m_info.segs)
|
||||
{
|
||||
if (!seg.addr) continue;
|
||||
auto gv = new GlobalVariable(*_module, GetType<u64>(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__seg%u_%x", m_segs.size(), gsuffix));
|
||||
gv->setInitializer(ConstantInt::get(GetType<u64>(), seg.addr));
|
||||
gv->setExternallyInitialized(true);
|
||||
m_segs.emplace_back(gv);
|
||||
}
|
||||
|
||||
// Sort relevant relocations (TODO)
|
||||
const auto caddr = m_info.segs[0].addr;
|
||||
const auto cend = caddr + m_info.segs[0].size;
|
||||
@ -194,11 +171,18 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
}
|
||||
}
|
||||
|
||||
m_thread = &*m_function->arg_begin();
|
||||
m_base_loaded = m_ir->CreateLoad(m_base);
|
||||
m_thread = &*(m_function->arg_begin() + 1);
|
||||
m_base = &*(m_function->arg_begin() + 3);
|
||||
m_exec = &*(m_function->arg_begin() + 0);
|
||||
m_seg0 = &*(m_function->arg_begin() + 2);
|
||||
|
||||
m_gpr[0] = &*(m_function->arg_begin() + 4);
|
||||
m_gpr[1] = &*(m_function->arg_begin() + 5);
|
||||
m_gpr[2] = &*(m_function->arg_begin() + 6);
|
||||
|
||||
const auto body = BasicBlock::Create(m_context, "__body", m_function);
|
||||
|
||||
//Call(GetType<void>(), "__trace", GetAddr());
|
||||
if (need_check)
|
||||
{
|
||||
// Check status register in the entry block
|
||||
@ -302,7 +286,7 @@ Value* PPUTranslator::GetAddr(u64 _add)
|
||||
if (m_reloc)
|
||||
{
|
||||
// Load segment address from global variable, compute actual instruction address
|
||||
return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_ir->CreateLoad(m_segs[m_reloc - m_info.segs.data()]));
|
||||
return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_seg0);
|
||||
}
|
||||
|
||||
return m_ir->getInt64(m_addr + _add);
|
||||
@ -351,20 +335,23 @@ Value* PPUTranslator::RotateLeft(Value* arg, Value* n)
|
||||
|
||||
void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||
{
|
||||
const auto type = FunctionType::get(GetType<void>(), {m_thread_type->getPointerTo()}, false);
|
||||
const auto type = m_function->getFunctionType();
|
||||
const auto block = m_ir->GetInsertBlock();
|
||||
|
||||
FunctionCallee callee;
|
||||
|
||||
auto seg0 = m_seg0;
|
||||
|
||||
if (!indirect)
|
||||
{
|
||||
if ((!m_reloc && target < 0x10000) || target >= u64{} - 0x10000)
|
||||
if ((!m_reloc && target < 0x10000) || target >= 0x100000000u - 0x10000)
|
||||
{
|
||||
Trap();
|
||||
return;
|
||||
}
|
||||
|
||||
callee = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type);
|
||||
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target), type);
|
||||
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -379,13 +366,19 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||
}
|
||||
}
|
||||
|
||||
const auto pos = m_ir->CreateLShr(indirect, 2, "", true);
|
||||
const auto ptr = m_ir->CreateGEP(m_ir->CreateLoad(m_call), {m_ir->getInt64(0), pos});
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateLoad(ptr), type->getPointerTo()));
|
||||
const auto pos = m_ir->CreateShl(indirect, 1);
|
||||
const auto ptr = m_ir->CreateGEP(m_exec, pos);
|
||||
const auto val = m_ir->CreateLoad(m_ir->CreateBitCast(ptr, get_type<u64*>()));
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0x7fff'ffff'ffff), type->getPointerTo()));
|
||||
|
||||
// Load new segment address
|
||||
seg0 = m_ir->CreateShl(m_ir->CreateLShr(val, 47), 12);
|
||||
}
|
||||
|
||||
m_ir->SetInsertPoint(block);
|
||||
m_ir->CreateCall(callee, {m_thread})->setTailCallKind(llvm::CallInst::TCK_Tail);
|
||||
const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)});
|
||||
c->setTailCallKind(llvm::CallInst::TCK_Tail);
|
||||
c->setCallingConv(CallingConv::GHC);
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
@ -627,7 +620,7 @@ void PPUTranslator::UseCondition(MDNode* hint, Value* cond)
|
||||
|
||||
llvm::Value* PPUTranslator::GetMemory(llvm::Value* addr, llvm::Type* type)
|
||||
{
|
||||
return bitcast(m_ir->CreateGEP(m_base_loaded, {m_ir->getInt64(0), addr}), type->getPointerTo());
|
||||
return bitcast(m_ir->CreateGEP(m_base, addr), type->getPointerTo());
|
||||
}
|
||||
|
||||
Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
|
||||
|
@ -36,18 +36,17 @@ class PPUTranslator final : public cpu_translator
|
||||
|
||||
/* Variables */
|
||||
|
||||
// Segments
|
||||
std::vector<llvm::GlobalVariable*> m_segs;
|
||||
|
||||
// Memory base
|
||||
llvm::GlobalVariable* m_base;
|
||||
llvm::Value* m_base_loaded;
|
||||
llvm::Value* m_base;
|
||||
|
||||
// Thread context
|
||||
llvm::Value* m_thread;
|
||||
|
||||
// Callable functions
|
||||
llvm::GlobalVariable* m_call;
|
||||
llvm::Value* m_exec;
|
||||
|
||||
// Segment 0 address
|
||||
llvm::Value* m_seg0;
|
||||
|
||||
// Thread context struct
|
||||
llvm::StructType* m_thread_type;
|
||||
|
Loading…
Reference in New Issue
Block a user