1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 12:12:50 +01:00

Added jit_runtime class

Is a memory manager for ASMJIT, replaces asmjit::JitRuntime
Unified memory manager for ASMJIT and LLVM
Unified SPU trampoline generation
Remove previous workarounds
This commit is contained in:
Nekotekina 2019-01-26 23:15:45 +03:00
parent d1f544c755
commit 4292997a01
7 changed files with 219 additions and 358 deletions

View File

@ -1,10 +1,190 @@
#include "types.h"
#include "JIT.h" #include "JIT.h"
#include "StrFmt.h"
#include "File.h"
#include "Log.h"
#include "mutex.h"
#include "sysinfo.h"
#include "VirtualMemory.h"
#include <immintrin.h> #include <immintrin.h>
asmjit::JitRuntime& asmjit::get_global_runtime() // Memory manager mutex
shared_mutex s_mutex2;
#ifdef __linux__
#define CAN_OVERCOMMIT
#endif
static u8* get_jit_memory()
{
// Reserve 2G memory (magic static)
static void* const s_memory2 = []() -> void*
{
void* ptr = utils::memory_reserve(0x80000000);
#ifdef CAN_OVERCOMMIT
utils::memory_commit(ptr, 0x80000000);
utils::memory_protect(ptr, 0x40000000, utils::protection::wx);
#endif
return ptr;
}();
return static_cast<u8*>(s_memory2);
}
// Allocation counters (1G code, 1G data subranges)
static atomic_t<u64> s_code_pos{0}, s_data_pos{0};
// Snapshot of code generated before main()
static std::vector<u8> s_code_init, s_data_init;
template <atomic_t<u64>& Ctr, uint Off, utils::protection Prot>
static u8* add_jit_memory(std::size_t size, uint align)
{
// Select subrange
u8* pointer = get_jit_memory() + Off;
if (UNLIKELY(!size && !align))
{
// Return subrange info
return pointer;
}
#ifndef CAN_OVERCOMMIT
std::lock_guard lock(s_mutex2);
#endif
u64 olda, newa;
// Simple allocation by incrementing pointer to the next free data
const u64 pos = Ctr.atomic_op([&](u64& ctr) -> u64
{
const u64 _pos = ::align(ctr, align);
const u64 _new = ::align(_pos + size, align);
if (UNLIKELY(_new > 0x40000000))
{
return -1;
}
// Check the necessity to commit more memory
olda = ::align(ctr, 0x10000);
newa = ::align(_new, 0x10000);
ctr = _new;
return _pos;
});
if (UNLIKELY(pos == -1))
{
LOG_FATAL(GENERAL, "JIT: Out of memory (size=0x%x, align=0x%x, off=0x%x)", size, align, Off);
return nullptr;
}
if (UNLIKELY(olda != newa))
{
#ifdef CAN_OVERCOMMIT
// TODO: possibly madvise
#else
// Commit more memory
utils::memory_commit(pointer + olda, newa - olda, Prot);
#endif
}
return pointer + pos;
}
jit_runtime::jit_runtime()
: HostRuntime()
{
}
jit_runtime::~jit_runtime()
{
}
asmjit::Error jit_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
{
std::size_t codeSize = code->getCodeSize();
if (UNLIKELY(!codeSize))
{
*dst = nullptr;
return asmjit::kErrorNoCodeGenerated;
}
void* p = jit_runtime::alloc(codeSize, 16);
if (UNLIKELY(!p))
{
*dst = nullptr;
return asmjit::kErrorNoVirtualMemory;
}
std::size_t relocSize = code->relocate(p);
if (UNLIKELY(!relocSize))
{
*dst = nullptr;
return asmjit::kErrorInvalidState;
}
flush(p, relocSize);
*dst = p;
return asmjit::kErrorOk;
}
asmjit::Error jit_runtime::_release(void* ptr) noexcept
{
return asmjit::kErrorOk;
}
u8* jit_runtime::alloc(std::size_t size, uint align, bool exec) noexcept
{
if (exec)
{
return add_jit_memory<s_code_pos, 0x0, utils::protection::wx>(size, align);
}
else
{
return add_jit_memory<s_data_pos, 0x40000000, utils::protection::rw>(size, align);
}
}
void jit_runtime::initialize()
{
if (!s_code_init.empty() || !s_data_init.empty())
{
return;
}
// Create code/data snapshot
s_code_init.resize(s_code_pos);
std::memcpy(s_code_init.data(), alloc(0, 0, true), s_code_pos);
s_data_init.resize(s_data_pos);
std::memcpy(s_data_init.data(), alloc(0, 0, false), s_data_pos);
}
void jit_runtime::finalize() noexcept
{
// Reset JIT memory
#ifdef CAN_OVERCOMMIT
utils::memory_reset(get_jit_memory(), 0x80000000);
utils::memory_protect(get_jit_memory(), 0x40000000, utils::protection::wx);
#else
utils::memory_decommit(get_jit_memory(), 0x80000000);
#endif
s_code_pos = 0;
s_data_pos = 0;
// Restore code/data snapshot
std::memcpy(alloc(s_code_init.size(), 1, true), s_code_init.data(), s_code_init.size());
std::memcpy(alloc(s_data_init.size(), 1, false), s_data_init.data(), s_data_init.size());
}
::jit_runtime& asmjit::get_global_runtime()
{ {
// Magic static // Magic static
static asmjit::JitRuntime g_rt; static ::jit_runtime g_rt;
return g_rt; return g_rt;
} }
@ -38,14 +218,6 @@ void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code
#include <array> #include <array>
#include <deque> #include <deque>
#include "types.h"
#include "StrFmt.h"
#include "File.h"
#include "Log.h"
#include "mutex.h"
#include "sysinfo.h"
#include "VirtualMemory.h"
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning(push, 0) #pragma warning(push, 0)
#endif #endif
@ -95,12 +267,6 @@ static void* const s_memory = []() -> void*
return utils::memory_reserve(s_memory_size); return utils::memory_reserve(s_memory_size);
}(); }();
// Reserve 2G of memory, should replace previous area for ASLR compatibility
static void* const s_memory2 = utils::memory_reserve(0x80000000);
static u64 s_code_pos = 0;
static u64 s_data_pos = 0;
static void* s_next = s_memory; static void* s_next = s_memory;
#ifdef _WIN32 #ifdef _WIN32
@ -135,11 +301,6 @@ extern void jit_finalize()
utils::memory_decommit(s_memory, s_memory_size); utils::memory_decommit(s_memory, s_memory_size);
s_next = s_memory; s_next = s_memory;
utils::memory_decommit(s_memory2, 0x80000000);
s_code_pos = 0;
s_data_pos = 0;
} }
// Helper class // Helper class
@ -322,15 +483,6 @@ struct MemoryManager : llvm::RTDyldMemoryManager
// Simple memory manager // Simple memory manager
struct MemoryManager2 : llvm::RTDyldMemoryManager struct MemoryManager2 : llvm::RTDyldMemoryManager
{ {
// Patchwork again...
void* const m_memory = s_memory2;
u8* const m_code = static_cast<u8*>(m_memory) + 0x00000000;
u8* const m_data = static_cast<u8*>(m_memory) + 0x40000000;
u64& m_code_pos = s_code_pos;
u64& m_data_pos = s_data_pos;
MemoryManager2() = default; MemoryManager2() = default;
~MemoryManager2() override ~MemoryManager2() override
@ -339,64 +491,12 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager
u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
{ {
std::lock_guard lock(s_mutex); return jit_runtime::alloc(size, align, true);
// Simple allocation
const u64 old = m_code_pos;
const u64 pos = ::align(m_code_pos, align);
m_code_pos = ::align(pos + size, align);
if (m_code_pos > 0x40000000)
{
LOG_FATAL(GENERAL, "LLVM: Out of code memory (size=0x%x, align=0x%x)", size, align);
return nullptr;
}
const u64 olda = ::align(old, 0x10000);
const u64 newa = ::align(m_code_pos, 0x10000);
if (olda != newa)
{
// Commit more memory
utils::memory_commit(m_code + olda, newa - olda, utils::protection::wx);
}
if (!sec_id && sec_name.empty())
{
// Special case: don't log
return m_code + pos;
}
LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%x, align=0x%x)", sec_id, sec_name.data(), m_code + pos, size, align);
return m_code + pos;
} }
u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
{ {
std::lock_guard lock(s_mutex); return jit_runtime::alloc(size, align, false);
// Simple allocation
const u64 old = m_data_pos;
const u64 pos = ::align(m_data_pos, align);
m_data_pos = ::align(pos + size, align);
if (m_data_pos > 0x40000000)
{
LOG_FATAL(GENERAL, "LLVM: Out of data memory (size=0x%x, align=0x%x)", size, align);
return nullptr;
}
const u64 olda = ::align(old, 0x10000);
const u64 newa = ::align(m_data_pos, 0x10000);
if (olda != newa)
{
// Commit more memory
utils::memory_commit(m_data + olda, newa - olda);
}
LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%x, align=0x%x, %s)", sec_id, sec_name.data(), m_data + pos, size, align, is_ro ? "ro" : "rw");
return m_data + pos;
} }
bool finalizeMemory(std::string* = nullptr) override bool finalizeMemory(std::string* = nullptr) override
@ -662,12 +762,4 @@ u64 jit_compiler::get(const std::string& name)
return m_engine->getGlobalValueAddress(name); return m_engine->getGlobalValueAddress(name);
} }
u8* jit_compiler::alloc(u32 size)
{
// Dummy memory manager object
MemoryManager2 mm;
return mm.allocateCodeSection(size, 16, 0, {});
}
#endif #endif

View File

@ -7,10 +7,32 @@
#include <array> #include <array>
#include <functional> #include <functional>
// ASMJIT runtime for emitting code in a single 2G region
struct jit_runtime final : asmjit::HostRuntime
{
jit_runtime();
~jit_runtime() override;
// Allocate executable memory
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
// Do nothing (deallocation is delayed)
asmjit::Error _release(void* p) noexcept override;
// Allocate memory
static u8* alloc(std::size_t size, uint align, bool exec = true) noexcept;
// Should be called at least once after global initialization
static void initialize();
// Deallocate all memory
static void finalize() noexcept;
};
namespace asmjit namespace asmjit
{ {
// Should only be used to build global functions // Should only be used to build global functions
JitRuntime& get_global_runtime(); ::jit_runtime& get_global_runtime();
// Emit xbegin and adjacent loop, return label at xbegin // Emit xbegin and adjacent loop, return label at xbegin
Label build_transaction_enter(X86Assembler& c, Label fallback); Label build_transaction_enter(X86Assembler& c, Label fallback);
@ -130,9 +152,6 @@ public:
// Get compiled function address // Get compiled function address
u64 get(const std::string& name); u64 get(const std::string& name);
// Allocate writable executable memory (alignment is assumed 16)
static u8* alloc(u32 size);
// Get CPU info // Get CPU info
static std::string cpu(const std::string& _cpu); static std::string cpu(const std::string& _cpu);

View File

@ -43,7 +43,6 @@ void spu_recompiler::init()
{ {
m_cache = fxm::get<spu_cache>(); m_cache = fxm::get<spu_cache>();
m_spurt = fxm::get_always<spu_runtime>(); m_spurt = fxm::get_always<spu_runtime>();
m_asmrt = m_spurt->get_asmjit_rt();
} }
} }
@ -105,7 +104,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
} }
CodeHolder code; CodeHolder code;
code.init(m_asmrt->getCodeInfo()); code.init(m_asmrt.getCodeInfo());
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign; code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
X86Assembler compiler(&code); X86Assembler compiler(&code);
@ -842,11 +841,13 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Compile and get function address // Compile and get function address
spu_function_t fn; spu_function_t fn;
if (m_asmrt->add(&fn, &code)) if (m_asmrt.add(&fn, &code))
{ {
LOG_FATAL(SPU, "Failed to build a function"); LOG_FATAL(SPU, "Failed to build a function");
} }
m_spurt->add(*fn_info.first, fn);
if (g_cfg.core.spu_debug) if (g_cfg.core.spu_debug)
{ {
// Add ASMJIT logs // Add ASMJIT logs
@ -863,239 +864,6 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
m_cache->add(func); m_cache->add(func);
} }
lock.lock();
// Register function (possibly temporarily)
fn_location = fn;
// Generate a dispatcher (übertrampoline)
std::vector<u32> addrv{func[0]};
const auto beg = m_spurt->m_map.lower_bound(addrv);
addrv[0] += 4;
const auto _end = m_spurt->m_map.lower_bound(addrv);
const u32 size0 = std::distance(beg, _end);
if (size0 == 1)
{
m_spurt->m_dispatcher[func[0] / 4] = fn;
}
else
{
CodeHolder code;
code.init(m_asmrt->getCodeInfo());
X86Assembler compiler(&code);
this->c = &compiler;
struct work
{
u32 size;
u32 level;
Label label;
std::map<std::vector<u32>, spu_function_t>::iterator beg;
std::map<std::vector<u32>, spu_function_t>::iterator end;
};
std::vector<work> workload;
workload.reserve(size0);
workload.emplace_back();
workload.back().size = size0;
workload.back().level = 1;
workload.back().beg = beg;
workload.back().end = _end;
for (std::size_t i = 0; i < workload.size(); i++)
{
// Get copy of the workload info
work w = workload[i];
// Split range in two parts
auto it = w.beg;
auto it2 = w.beg;
u32 size1 = w.size / 2;
u32 size2 = w.size - size1;
std::advance(it2, w.size / 2);
while (true)
{
it = it2;
size1 = w.size - size2;
if (w.level >= w.beg->first.size())
{
// Cannot split: smallest function is a prefix of bigger ones (TODO)
break;
}
const u32 x1 = w.beg->first.at(w.level);
if (!x1)
{
// Cannot split: some functions contain holes at this level
w.level++;
continue;
}
// Adjust ranges (forward)
while (it != w.end && x1 == it->first.at(w.level))
{
it++;
size1++;
}
if (it == w.end)
{
// Cannot split: words are identical within the range at this level
w.level++;
}
else
{
size2 = w.size - size1;
break;
}
}
if (w.label.isValid())
{
c->align(kAlignCode, 16);
c->bind(w.label);
}
if (w.level >= w.beg->first.size())
{
// If functions cannot be compared, assume smallest function
LOG_ERROR(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level);
c->jmp(imm_ptr(w.beg->second ? w.beg->second : &dispatch));
continue;
}
// Value for comparison
const u32 x = it->first.at(w.level);
// Adjust ranges (backward)
while (true)
{
it--;
if (it->first.at(w.level) != x)
{
it++;
break;
}
verify(HERE), it != w.beg;
size1--;
size2++;
}
c->cmp(x86::dword_ptr(*ls, start + (w.level - 1) * 4), x);
// Low subrange target label
Label label_below;
if (size1 == 1)
{
label_below = c->newLabel();
c->jb(label_below);
}
else
{
workload.push_back(w);
workload.back().end = it;
workload.back().size = size1;
workload.back().label = c->newLabel();
c->jb(workload.back().label);
}
// Second subrange target
const auto target = it->second ? it->second : &dispatch;
if (size2 == 1)
{
c->jmp(imm_ptr(target));
}
else
{
it2 = it;
// Select additional midrange for equality comparison
while (it2 != w.end && it2->first.at(w.level) == x)
{
size2--;
it2++;
}
if (it2 != w.end)
{
// High subrange target label
Label label_above;
if (size2 == 1)
{
label_above = c->newLabel();
c->ja(label_above);
}
else
{
workload.push_back(w);
workload.back().beg = it2;
workload.back().size = size2;
workload.back().label = c->newLabel();
c->ja(workload.back().label);
}
const u32 size3 = w.size - size1 - size2;
if (size3 == 1)
{
c->jmp(imm_ptr(target));
}
else
{
workload.push_back(w);
workload.back().beg = it;
workload.back().end = it2;
workload.back().size = size3;
workload.back().label = c->newLabel();
c->jmp(workload.back().label);
}
if (label_above.isValid())
{
c->bind(label_above);
c->jmp(imm_ptr(it2->second ? it2->second : &dispatch));
}
}
else
{
workload.push_back(w);
workload.back().beg = it;
workload.back().size = w.size - size1;
workload.back().label = c->newLabel();
c->jmp(workload.back().label);
}
}
if (label_below.isValid())
{
c->bind(label_below);
c->jmp(imm_ptr(w.beg->second ? w.beg->second : &dispatch));
}
}
spu_function_t tr;
if (m_asmrt->add(&tr, &code))
{
LOG_FATAL(SPU, "Failed to build a trampoline");
}
m_spurt->m_dispatcher[func[0] / 4] = tr;
}
lock.unlock();
m_spurt->m_cond.notify_all();
return fn; return fn;
} }

View File

@ -21,7 +21,7 @@ public:
private: private:
// ASMJIT runtime // ASMJIT runtime
asmjit::JitRuntime* m_asmrt; ::jit_runtime m_asmrt;
// emitter: // emitter:
asmjit::X86Assembler* c; asmjit::X86Assembler* c;

View File

@ -253,15 +253,6 @@ spu_runtime::spu_runtime()
LOG_SUCCESS(SPU, "SPU Recompiler Runtime initialized..."); LOG_SUCCESS(SPU, "SPU Recompiler Runtime initialized...");
} }
asmjit::JitRuntime* spu_runtime::get_asmjit_rt()
{
std::lock_guard lock(m_mutex);
m_asmjit_rts.emplace_back(std::make_unique<asmjit::JitRuntime>());
return m_asmjit_rts.back().get();
}
void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where, spu_function_t compiled) void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where, spu_function_t compiled)
{ {
std::unique_lock lock(m_mutex); std::unique_lock lock(m_mutex);
@ -289,11 +280,7 @@ void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where,
else else
{ {
// Allocate some writable executable memory // Allocate some writable executable memory
#ifdef LLVM_AVAILABLE u8* const wxptr = jit_runtime::alloc(size0 * 20, 16);
const auto wxptr = jit_compiler::alloc(size0 * 20);
#else
u8* const wxptr = new u8[size0 * 20]; // dummy
#endif
// Raw assembly pointer // Raw assembly pointer
u8* raw = wxptr; u8* raw = wxptr;
@ -315,11 +302,7 @@ void spu_runtime::add(std::pair<const std::vector<u32>, spu_function_t>& where,
if (!target && !tr_dispatch) if (!target && !tr_dispatch)
{ {
// Generate a special trampoline with pause instruction // Generate a special trampoline with pause instruction
#ifdef LLVM_AVAILABLE u8* const trptr = jit_runtime::alloc(16, 16);
const auto trptr = jit_compiler::alloc(16);
#else
u8* const trptr = new u8[16]; // dummy
#endif
trptr[0] = 0xf3; // pause trptr[0] = 0xf3; // pause
trptr[1] = 0x90; trptr[1] = 0x90;
trptr[2] = 0xff; // jmp [rip] trptr[2] = 0xff; // jmp [rip]
@ -623,6 +606,7 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
else else
{ {
// Far jumps: extremely rare and disabled due to implementation complexity // Far jumps: extremely rare and disabled due to implementation complexity
LOG_ERROR(SPU, "Impossible far jump");
bytes[0] = 0x0f; // nop (8-byte form) bytes[0] = 0x0f; // nop (8-byte form)
bytes[1] = 0x1f; bytes[1] = 0x1f;
bytes[2] = 0x84; bytes[2] = 0x84;

View File

@ -51,18 +51,12 @@ public:
std::string m_cache_path; std::string m_cache_path;
private: private:
// Temporarily: asmjit runtime collection
std::deque<std::unique_ptr<asmjit::JitRuntime>> m_asmjit_rts;
// Trampoline to spu_recompiler_base::dispatch // Trampoline to spu_recompiler_base::dispatch
spu_function_t tr_dispatch = nullptr; spu_function_t tr_dispatch = nullptr;
public: public:
spu_runtime(); spu_runtime();
// Get new ASMJIT runtime
asmjit::JitRuntime* get_asmjit_rt();
// Add compiled function and generate trampoline if necessary // Add compiled function and generate trampoline if necessary
void add(std::pair<const std::vector<u32>, spu_function_t>& where, spu_function_t compiled); void add(std::pair<const std::vector<u32>, spu_function_t>& where, spu_function_t compiled);
}; };

View File

@ -39,6 +39,7 @@
#include "Utilities/GDBDebugServer.h" #include "Utilities/GDBDebugServer.h"
#include "Utilities/sysinfo.h" #include "Utilities/sysinfo.h"
#include "Utilities/JIT.h"
#if defined(_WIN32) || defined(HAVE_VULKAN) #if defined(_WIN32) || defined(HAVE_VULKAN)
#include "Emu/RSX/VK/VulkanAPI.h" #include "Emu/RSX/VK/VulkanAPI.h"
@ -274,6 +275,8 @@ void fmt_class_string<enter_button_assign>::format(std::string& out, u64 arg)
void Emulator::Init() void Emulator::Init()
{ {
jit_runtime::initialize();
if (!g_tty) if (!g_tty)
{ {
g_tty.open(fs::get_cache_dir() + "TTY.log", fs::rewrite + fs::append); g_tty.open(fs::get_cache_dir() + "TTY.log", fs::rewrite + fs::append);
@ -1537,6 +1540,7 @@ void Emulator::Stop(bool restart)
extern void jit_finalize(); extern void jit_finalize();
jit_finalize(); jit_finalize();
#endif #endif
jit_runtime::finalize();
if (restart) if (restart)
{ {