diff --git a/.gitmodules b/.gitmodules
index 2379e83933..fac2313a0f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,7 +5,7 @@
[submodule "asmjit"]
path = 3rdparty/asmjit/asmjit
url = ../../asmjit/asmjit.git
- branch = oldstable
+ branch = aarch64
ignore = dirty
[submodule "llvm"]
path = llvm
diff --git a/3rdparty/asmjit/asmjit b/3rdparty/asmjit/asmjit
index 723f58581a..eae7197fce 160000
--- a/3rdparty/asmjit/asmjit
+++ b/3rdparty/asmjit/asmjit
@@ -1 +1 @@
-Subproject commit 723f58581afc0f4cb16ba13396ff77e425896847
+Subproject commit eae7197fce03fd52a6e71ca89207a88ce270fb1a
diff --git a/3rdparty/asmjit/asmjit.vcxproj b/3rdparty/asmjit/asmjit.vcxproj
index a786d83b84..6b1af24e38 100644
--- a/3rdparty/asmjit/asmjit.vcxproj
+++ b/3rdparty/asmjit/asmjit.vcxproj
@@ -11,66 +11,146 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
{AC40FF01-426E-4838-A317-66354CEFAE88}
diff --git a/3rdparty/asmjit/asmjit.vcxproj.filters b/3rdparty/asmjit/asmjit.vcxproj.filters
index 0505172685..ac62c220ba 100644
--- a/3rdparty/asmjit/asmjit.vcxproj.filters
+++ b/3rdparty/asmjit/asmjit.vcxproj.filters
@@ -1,65 +1,145 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp
index 7029850d25..ced02827e1 100644
--- a/Utilities/JIT.cpp
+++ b/Utilities/JIT.cpp
@@ -113,8 +113,32 @@ static u8* add_jit_memory(usz size, uint align)
return pointer + pos;
}
+const asmjit::Environment& jit_runtime_base::environment() const noexcept
+{
+ static const asmjit::Environment g_env = asmjit::Environment::host();
+
+ return g_env;
+}
+
+void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept
+{
+ ensure(!code->flatten());
+ ensure(!code->resolveUnresolvedLinks());
+ usz codeSize = ensure(code->codeSize());
+ auto p = ensure(this->_alloc(codeSize, 64));
+ ensure(!code->relocateToBase(uptr(p)));
+
+ asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize);
+
+ for (asmjit::Section* section : code->_sections)
+ {
+ std::memcpy(p + section->offset(), section->data(), section->bufferSize());
+ }
+
+ return p;
+}
+
jit_runtime::jit_runtime()
- : HostRuntime()
{
}
@@ -122,38 +146,9 @@ jit_runtime::~jit_runtime()
{
}
-asmjit::Error jit_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
+uchar* jit_runtime::_alloc(usz size, usz align) noexcept
{
- usz codeSize = code->getCodeSize();
- if (!codeSize) [[unlikely]]
- {
- *dst = nullptr;
- return asmjit::kErrorNoCodeGenerated;
- }
-
- void* p = jit_runtime::alloc(codeSize, 16);
- if (!p) [[unlikely]]
- {
- *dst = nullptr;
- return asmjit::kErrorNoVirtualMemory;
- }
-
- usz relocSize = code->relocate(p);
- if (!relocSize) [[unlikely]]
- {
- *dst = nullptr;
- return asmjit::kErrorInvalidState;
- }
-
- flush(p, relocSize);
- *dst = p;
-
- return asmjit::kErrorOk;
-}
-
-asmjit::Error jit_runtime::_release(void*) noexcept
-{
- return asmjit::kErrorOk;
+ return jit_runtime::alloc(size, align, true);
}
u8* jit_runtime::alloc(usz size, uint align, bool exec) noexcept
@@ -200,12 +195,12 @@ void jit_runtime::finalize() noexcept
std::memcpy(alloc(s_data_init.size(), 1, false), s_data_init.data(), s_data_init.size());
}
-asmjit::Runtime& asmjit::get_global_runtime()
+jit_runtime_base& asmjit::get_global_runtime()
{
// 16 MiB for internal needs
static constexpr u64 size = 1024 * 1024 * 16;
- struct custom_runtime final : asmjit::HostRuntime
+ struct custom_runtime final : jit_runtime_base
{
custom_runtime() noexcept
{
@@ -214,7 +209,7 @@ asmjit::Runtime& asmjit::get_global_runtime()
{
if (auto ptr = utils::memory_reserve(size, reinterpret_cast(addr)))
{
- m_pos.raw() = static_cast(ptr);
+ m_pos.raw() = static_cast(ptr);
break;
}
}
@@ -226,49 +221,26 @@ asmjit::Runtime& asmjit::get_global_runtime()
utils::memory_commit(m_pos, size, utils::protection::wx);
}
- custom_runtime(const custom_runtime&) = delete;
-
- custom_runtime& operator=(const custom_runtime&) = delete;
-
- asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override
+ uchar* _alloc(usz size, usz align) noexcept override
{
- usz codeSize = code->getCodeSize();
- if (!codeSize) [[unlikely]]
+ return m_pos.atomic_op([&](uchar*& pos) -> uchar*
{
- *dst = nullptr;
- return asmjit::kErrorNoCodeGenerated;
- }
+ const auto r = reinterpret_cast(utils::align(uptr(pos), align));
- void* p = m_pos.fetch_add(utils::align(codeSize, 64));
- if (!p || m_pos > m_max) [[unlikely]]
- {
- *dst = nullptr;
- jit_log.fatal("Out of memory (static asmjit)");
- return asmjit::kErrorNoVirtualMemory;
- }
+ if (r >= pos && r + size > pos && r + size <= m_max)
+ {
+ pos = r + size;
+ return r;
+ }
- usz relocSize = code->relocate(p);
- if (!relocSize) [[unlikely]]
- {
- *dst = nullptr;
- return asmjit::kErrorInvalidState;
- }
-
- flush(p, relocSize);
- *dst = p;
-
- return asmjit::kErrorOk;
- }
-
- asmjit::Error _release(void*) noexcept override
- {
- return asmjit::kErrorOk;
+ return nullptr;
+ });
}
private:
- atomic_t m_pos{};
+ atomic_t m_pos{};
- std::byte* m_max{};
+ uchar* m_max{};
};
// Magic static
@@ -276,37 +248,17 @@ asmjit::Runtime& asmjit::get_global_runtime()
return g_rt;
}
-asmjit::Error asmjit::inline_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
+asmjit::inline_runtime::inline_runtime(uchar* data, usz size)
+ : m_data(data)
+ , m_size(size)
{
- usz codeSize = code->getCodeSize();
- if (!codeSize) [[unlikely]]
- {
- *dst = nullptr;
- return asmjit::kErrorNoCodeGenerated;
- }
-
- if (utils::align(codeSize, 4096) > m_size) [[unlikely]]
- {
- *dst = nullptr;
- return asmjit::kErrorNoVirtualMemory;
- }
-
- usz relocSize = code->relocate(m_data);
- if (!relocSize) [[unlikely]]
- {
- *dst = nullptr;
- return asmjit::kErrorInvalidState;
- }
-
- flush(m_data, relocSize);
- *dst = m_data;
-
- return asmjit::kErrorOk;
}
-asmjit::Error asmjit::inline_runtime::_release(void*) noexcept
+uchar* asmjit::inline_runtime::_alloc(usz size, usz align) noexcept
{
- return asmjit::kErrorOk;
+ ensure(align <= 4096);
+
+ return size <= m_size ? m_data : nullptr;
}
asmjit::inline_runtime::~inline_runtime()
@@ -397,19 +349,19 @@ static u64 make_null_function(const std::string& name)
using namespace asmjit;
// Build a "null" function that contains its name
- const auto func = build_function_asm("NULL", [&](X86Assembler& c, auto& args)
+ const auto func = build_function_asm("NULL", [&](x86::Assembler& c, auto& args)
{
Label data = c.newLabel();
c.lea(args[0], x86::qword_ptr(data, 0));
- c.jmp(imm_ptr(&null));
- c.align(kAlignCode, 16);
+ c.jmp(Imm(&null));
+ c.align(AlignMode::kCode, 16);
c.bind(data);
// Copy function name bytes
for (char ch : name)
c.db(ch);
c.db(0);
- c.align(kAlignData, 16);
+ c.align(AlignMode::kData, 16);
});
func_ptr = reinterpret_cast(func);
diff --git a/Utilities/JIT.h b/Utilities/JIT.h
index 1b111eba0e..68e0c8bd30 100644
--- a/Utilities/JIT.h
+++ b/Utilities/JIT.h
@@ -4,7 +4,9 @@
// Include asmjit with warnings ignored
#define ASMJIT_EMBED
-#define ASMJIT_DEBUG
+#define ASMJIT_STATIC
+#define ASMJIT_BUILD_DEBUG
+#undef Bool
#ifdef _MSC_VER
#pragma warning(push, 0)
@@ -49,17 +51,27 @@ enum class jit_class
spu_data,
};
+struct jit_runtime_base
+{
+ jit_runtime_base() noexcept = default;
+ virtual ~jit_runtime_base() = default;
+
+ jit_runtime_base(const jit_runtime_base&) = delete;
+ jit_runtime_base& operator=(const jit_runtime_base&) = delete;
+
+ const asmjit::Environment& environment() const noexcept;
+ void* _add(asmjit::CodeHolder* code) noexcept;
+ virtual uchar* _alloc(usz size, usz align) noexcept = 0;
+};
+
// ASMJIT runtime for emitting code in a single 2G region
-struct jit_runtime final : asmjit::HostRuntime
+struct jit_runtime final : jit_runtime_base
{
jit_runtime();
~jit_runtime() override;
// Allocate executable memory
- asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
-
- // Do nothing (deallocation is delayed)
- asmjit::Error _release(void* p) noexcept override;
+ uchar* _alloc(usz size, usz align) noexcept override;
// Allocate memory
static u8* alloc(usz size, uint align, bool exec = true) noexcept;
@@ -74,35 +86,25 @@ struct jit_runtime final : asmjit::HostRuntime
namespace asmjit
{
// Should only be used to build global functions
- asmjit::Runtime& get_global_runtime();
+ jit_runtime_base& get_global_runtime();
// Don't use directly
- class inline_runtime : public HostRuntime
+ class inline_runtime : public jit_runtime_base
{
uchar* m_data;
usz m_size;
public:
- inline_runtime(const inline_runtime&) = delete;
-
- inline_runtime& operator=(const inline_runtime&) = delete;
-
- inline_runtime(uchar* data, usz size)
- : m_data(data)
- , m_size(size)
- {
- }
-
- asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
-
- asmjit::Error _release(void*) noexcept override;
+ inline_runtime(uchar* data, usz size);
~inline_runtime();
+
+ uchar* _alloc(usz size, usz align) noexcept override;
};
// Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please)
template
- [[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, F func)
+ [[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::x86::Assembler& c, asmjit::Label fallback, F func)
{
Label fall = c.newLabel();
Label begin = c.newLabel();
@@ -117,7 +119,7 @@ namespace asmjit
func();
// Other bad statuses are ignored regardless of repeat flag (TODO)
- c.align(kAlignCode, 16);
+ c.align(AlignMode::kCode, 16);
c.bind(begin);
return fall;
@@ -125,7 +127,7 @@ namespace asmjit
}
// Helper to spill RDX (EDX) register for RDTSC
- inline void build_swap_rdx_with(asmjit::X86Assembler& c, std::array& args, const asmjit::X86Gp& with)
+ inline void build_swap_rdx_with(asmjit::x86::Assembler& c, std::array& args, const asmjit::x86::Gp& with)
{
#ifdef _WIN32
c.xchg(args[1], with);
@@ -137,7 +139,7 @@ namespace asmjit
}
// Get full RDTSC value into chosen register (clobbers rax/rdx or saves only rax with other target)
- inline void build_get_tsc(asmjit::X86Assembler& c, const asmjit::X86Gp& to = asmjit::x86::rax)
+ inline void build_get_tsc(asmjit::x86::Assembler& c, const asmjit::x86::Gp& to = asmjit::x86::rax)
{
if (&to != &x86::rax && &to != &x86::rdx)
{
@@ -164,6 +166,8 @@ namespace asmjit
c.or_(to.r64(), x86::rdx);
}
}
+
+ using imm_ptr = Imm;
}
// Build runtime function with asmjit::X86Assembler
@@ -175,10 +179,9 @@ inline FT build_function_asm(std::string_view name, F&& builder)
auto& rt = get_global_runtime();
CodeHolder code;
- code.init(rt.getCodeInfo());
- code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
+ code.init(rt.environment());
- std::array args;
+ std::array args;
#ifdef _WIN32
args[0] = x86::rcx;
args[1] = x86::rdx;
@@ -191,19 +194,12 @@ inline FT build_function_asm(std::string_view name, F&& builder)
args[3] = x86::rcx;
#endif
- X86Assembler compiler(&code);
+ x86::Assembler compiler(&code);
+ compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
builder(std::ref(compiler), args);
- ensure(compiler.getLastError() == 0);
-
- FT result;
-
- if (rt.add(&result, &code))
- {
- return nullptr;
- }
-
- jit_announce(result, code.getCodeSize(), name);
- return result;
+ const auto result = rt._add(&code);
+ jit_announce(result, code.codeSize(), name);
+ return reinterpret_cast(uptr(result));
}
#ifdef __APPLE__
@@ -253,10 +249,9 @@ public:
inline_runtime rt(m_data, Size);
CodeHolder code;
- code.init(rt.getCodeInfo());
- code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
+ code.init(rt.environment());
- std::array args;
+ std::array args;
#ifdef _WIN32
args[0] = x86::rcx;
args[1] = x86::rdx;
@@ -269,19 +264,10 @@ public:
args[3] = x86::rcx;
#endif
- X86Assembler compiler(&code);
+ x86::Assembler compiler(&code);
+ compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
builder(std::ref(compiler), args);
-
- FT result;
-
- if (compiler.getLastError() || rt.add(&result, &code))
- {
- ensure(false);
- }
- else
- {
- jit_announce(result, code.getCodeSize(), name);
- }
+ jit_announce(rt._add(&code), code.codeSize(), name);
}
operator FT() const noexcept
diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp
index 077464845c..eb05a235b5 100644
--- a/Utilities/Thread.cpp
+++ b/Utilities/Thread.cpp
@@ -2190,7 +2190,7 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept
thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* _base))
{
- return build_function_asm("thread_base_trampoline", [&](asmjit::X86Assembler& c, auto& args)
+ return build_function_asm("thread_base_trampoline", [&](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -2203,7 +2203,7 @@ thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base*
// Call finalize, return if zero
c.mov(args[0], x86::rax);
- c.call(imm_ptr(finalize));
+ c.call(imm_ptr(static_cast(&finalize)));
c.test(x86::rax, x86::rax);
c.jz(_ret);
diff --git a/rpcs3/Emu/Cell/PPUFunction.cpp b/rpcs3/Emu/Cell/PPUFunction.cpp
index 8c12152836..853adcd0f0 100644
--- a/rpcs3/Emu/Cell/PPUFunction.cpp
+++ b/rpcs3/Emu/Cell/PPUFunction.cpp
@@ -1910,14 +1910,14 @@ std::vector& ppu_function_manager::access(bool ghc)
static std::vector list_ghc
{
- build_function_asm("ppu_unregistered", [](asmjit::X86Assembler& c, auto& args)
+ build_function_asm("ppu_unregistered", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
c.mov(args[0], x86::rbp);
c.jmp(imm_ptr(list[0]));
}),
- build_function_asm("ppu_return", [](asmjit::X86Assembler& c, auto& args)
+ build_function_asm("ppu_return", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -1937,7 +1937,7 @@ u32 ppu_function_manager::add_function(ppu_function_t function)
list.push_back(function);
// Generate trampoline
- list2.push_back(build_function_asm("ppu_trampolinea", [&](asmjit::X86Assembler& c, auto& args)
+ list2.push_back(build_function_asm("ppu_trampolinea", [&](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
index 10fefd6a2a..ed7a265155 100644
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -147,7 +147,7 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op);
extern void do_cell_atomic_128_store(u32 addr, const void* to_write);
-const auto ppu_gateway = built_function("ppu_gateway", [](asmjit::X86Assembler& c, auto& args)
+const auto ppu_gateway = built_function("ppu_gateway", [](asmjit::x86::Assembler& c, auto& args)
{
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
using namespace asmjit;
@@ -248,7 +248,7 @@ const auto ppu_gateway = built_function("ppu_gateway", [](
c.ret();
});
-const extern auto ppu_escape = build_function_asm("ppu_escape", [](asmjit::X86Assembler& c, auto& args)
+const extern auto ppu_escape = build_function_asm("ppu_escape", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -262,7 +262,7 @@ const extern auto ppu_escape = build_function_asm("ppu_esc
void ppu_recompiler_fallback(ppu_thread& ppu);
-const auto ppu_recompiler_fallback_ghc = build_function_asm("ppu_trampolineb", [](asmjit::X86Assembler& c, auto& args)
+const auto ppu_recompiler_fallback_ghc = build_function_asm("ppu_trampolineb", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -1817,7 +1817,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
return ppu_load_acquire_reservation(ppu, addr);
}
-const auto ppu_stcx_accurate_tx = built_function("ppu_stcx_accurate_tx", [](asmjit::X86Assembler& c, auto& args)
+const auto ppu_stcx_accurate_tx = built_function("ppu_stcx_accurate_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -1859,10 +1859,10 @@ const auto ppu_stcx_accurate_tx = built_functionc = &compiler;
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
@@ -137,7 +137,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
this->qw1 = &x86::rcx;
#endif
- const std::array vec_vars
+ const std::array vec_vars
{
&x86::xmm0,
&x86::xmm1,
@@ -333,20 +333,20 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
});
c->setExtraReg(x86::k7);
- c->z().vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
+ c->z().vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off));
}
else
{
- c->vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
+ c->vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off));
}
if (first)
{
- c->vpcmpud(x86::k1, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
+ c->vpcmpud(x86::k1, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4);
}
else
{
- c->vpcmpud(x86::k3, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
+ c->vpcmpud(x86::k3, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4);
c->korw(x86::k1, x86::k3, x86::k1);
}
@@ -378,15 +378,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
if (cmask == 0xff)
{
- c->vmovdqa(x86::ymm0, x86::yword_ptr(*ls, starta));
+ c->vmovdqa(x86::ymm0, x86::ymmword_ptr(*ls, starta));
}
else
{
c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0);
- c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask);
+ c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask);
}
- c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
+ c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@@ -401,9 +401,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
const u32 cmask1 = get_code_mask(starta + 32, enda);
c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0);
- c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0);
- c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1);
- c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
+ c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0);
+ c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1);
+ c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@@ -453,21 +453,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
xmm2z = true;
}
- c->vpblendd(x86::ymm1, x86::ymm2, x86::yword_ptr(*qw1, j - ls_off), cmask);
+ c->vpblendd(x86::ymm1, x86::ymm2, x86::ymmword_ptr(*qw1, j - ls_off), cmask);
}
else
{
- c->vmovdqa32(x86::ymm1, x86::yword_ptr(*qw1, j - ls_off));
+ c->vmovdqa32(x86::ymm1, x86::ymmword_ptr(*qw1, j - ls_off));
}
// Perform bitwise comparison and accumulate
if (first)
{
- c->vpxor(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off));
+ c->vpxor(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off));
}
else
{
- c->vpternlogd(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */);
+ c->vpternlogd(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */);
}
for (u32 i = j; i < j + 32; i += 4)
@@ -500,15 +500,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
if (cmask == 0xff)
{
- c->vmovaps(x86::ymm0, x86::yword_ptr(*ls, starta));
+ c->vmovaps(x86::ymm0, x86::ymmword_ptr(*ls, starta));
}
else
{
c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0);
- c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask);
+ c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask);
}
- c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
+ c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@@ -523,9 +523,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
const u32 cmask1 = get_code_mask(starta + 32, enda);
c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0);
- c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0);
- c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1);
- c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
+ c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0);
+ c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1);
+ c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@@ -586,21 +586,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
xmm2z = true;
}
- c->vblendps(reg1, x86::ymm2, x86::yword_ptr(*ls, j - ls_off), cmask);
+ c->vblendps(reg1, x86::ymm2, x86::ymmword_ptr(*ls, j - ls_off), cmask);
}
else
{
- c->vmovaps(reg1, x86::yword_ptr(*ls, j - ls_off));
+ c->vmovaps(reg1, x86::ymmword_ptr(*ls, j - ls_off));
}
// Perform bitwise comparison and accumulate
if (!order++)
{
- c->vxorps(reg0, reg1, x86::yword_ptr(x86::rax, code_off));
+ c->vxorps(reg0, reg1, x86::ymmword_ptr(x86::rax, code_off));
}
else
{
- c->vxorps(reg1, reg1, x86::yword_ptr(x86::rax, code_off));
+ c->vxorps(reg1, reg1, x86::ymmword_ptr(x86::rax, code_off));
c->vorps(reg0, reg1, reg0);
}
@@ -800,7 +800,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
{
if (m_preds.count(pos))
{
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
}
c->bind(found->second);
@@ -832,7 +832,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
}
// Simply return
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
c->bind(label_stop);
c->add(x86::rsp, 0x28);
c->ret();
@@ -840,7 +840,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
if (g_cfg.core.spu_verification)
{
// Dispatch
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
c->bind(label_diff);
c->inc(SPU_OFF_64(block_failure));
c->add(x86::rsp, 0x28);
@@ -855,7 +855,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
// Build instruction dispatch table
if (instr_table.isValid())
{
- c->align(kAlignData, 8);
+ c->align(AlignMode::kData, 8);
c->bind(instr_table);
// Get actual instruction table bounds
@@ -877,7 +877,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
}
}
- c->align(kAlignData, words_align);
+ c->align(AlignMode::kData, words_align);
c->bind(label_code);
for (u32 d : words)
c->dd(d);
@@ -893,20 +893,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
xmm_consts.clear();
// Compile and get function address
- spu_function_t fn;
+ spu_function_t fn = reinterpret_cast(m_asmrt._add(&code));
- if (auto err = m_asmrt.add(&fn, &code))
+ if (!fn)
{
- if (err == asmjit::ErrorCode::kErrorNoVirtualMemory)
- {
- return nullptr;
- }
-
spu_log.fatal("Failed to build a function");
}
else
{
- jit_announce(fn, code.getCodeSize(), fmt::format("spu-b-%s", fmt::base57(be_t(m_hash_start))));
+ jit_announce(fn, code.codeSize(), fmt::format("spu-b-%s", fmt::base57(be_t(m_hash_start))));
}
// Install compiled function pointer
@@ -927,7 +922,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
{
// Add ASMJIT logs
fmt::append(log, "Address: %p\n\n", fn);
- log += logger.getString();
+ log.append(logger._content.data(), logger._content.size());
log += "\n\n\n";
// Append log file
@@ -962,7 +957,7 @@ spu_recompiler::XmmLink spu_recompiler::XmmGet(s8 reg, XmmType type) // get xmm
return result;
}
-inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
+inline asmjit::x86::Mem spu_recompiler::XmmConst(const v128& data)
{
// Find existing const
auto& xmm_label = xmm_consts[std::make_pair(data._u64[0], data._u64[1])];
@@ -973,7 +968,7 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
consts.emplace_back([=, this]
{
- c->align(asmjit::kAlignData, 16);
+ c->align(asmjit::AlignMode::kData, 16);
c->bind(xmm_label);
c->dq(data._u64[0]);
c->dq(data._u64[1]);
@@ -983,17 +978,17 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
return asmjit::x86::oword_ptr(xmm_label);
}
-inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128& data)
+inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128& data)
{
return XmmConst(v128::fromF(data));
}
-inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128i& data)
+inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128i& data)
{
return XmmConst(v128::fromV(data));
}
-inline asmjit::X86Mem spu_recompiler::get_pc(u32 addr)
+inline asmjit::x86::Mem spu_recompiler::get_pc(u32 addr)
{
return asmjit::x86::qword_ptr(*pc0, addr - m_base);
}
@@ -1108,7 +1103,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->mov(SPU_OFF_32(pc), *addr);
c->mov(*arg0, *cpu);
c->add(x86::rsp, 0x28);
- c->jmp(imm_ptr(_throw));
+ c->jmp(imm_ptr(+_throw));
// Save addr in srr0 and disable interrupts
c->bind(intr);
@@ -1123,7 +1118,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->test(*addr, 0xff80007f);
c->cmovnz(*addr, rip->r32());
c->shr(*addr, 5);
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
c->bind(no_intr);
}
@@ -1210,7 +1205,7 @@ void spu_recompiler::branch_set_link(u32 target)
after.emplace_back([=, this, target = local->second]
{
// Clear return info after use
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
c->bind(ret);
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
c->and_(qw1->r32(), 0x3fff0);
@@ -1246,9 +1241,9 @@ void spu_recompiler::fall(spu_opcode_t op)
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.opcode);
- c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast(g_spu_interpreter_fast.decode(op.opcode))));
+ c->mov(*qw0, asmjit::imm_ptr(g_spu_interpreter_fast.decode(op.opcode)));
c->mov(*arg0, *cpu);
- c->call(asmjit::imm_ptr(gate));
+ c->call(asmjit::imm_ptr(+gate));
}
void spu_recompiler::UNK(spu_opcode_t op)
@@ -1266,7 +1261,7 @@ void spu_recompiler::UNK(spu_opcode_t op)
c->mov(arg1->r32(), op.opcode);
c->mov(*arg0, *cpu);
c->add(asmjit::x86::rsp, 0x28);
- c->jmp(asmjit::imm_ptr(gate));
+ c->jmp(asmjit::imm_ptr(+gate));
m_pos = -1;
}
@@ -1295,7 +1290,7 @@ void spu_recompiler::STOP(spu_opcode_t op)
c->mov(arg1->r32(), op.opcode & 0x3fff);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_stop));
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
c->bind(ret);
c->add(SPU_OFF_32(pc), 4);
@@ -1362,14 +1357,14 @@ void spu_recompiler::RDCH(spu_opcode_t op)
{
using namespace asmjit;
- auto read_channel = [&](X86Mem channel_ptr, bool sync = true)
+ auto read_channel = [&](x86::Mem channel_ptr, bool sync = true)
{
Label wait = c->newLabel();
Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(addr->r64(), channel_ptr);
c->xor_(qw0->r32(), qw0->r32());
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
c->bind(again);
c->bt(addr->r64(), spu_channel::off_count);
c->jnc(wait);
@@ -1380,7 +1375,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
- c->mov(arg1->r32(), op.ra);
+ c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_rdch));
c->jmp(ret);
@@ -1482,13 +1477,12 @@ void spu_recompiler::RDCH(spu_opcode_t op)
*_res = v128::from32r(out);
};
- using ftype = void (*)(spu_thread*, v128*);
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->lea(*arg1, SPU_OFF_128(gpr, op.rt));
c->mov(*arg0, *cpu);
- c->call(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr(sub1) : asmjit::imm_ptr(sub2));
+ c->call(asmjit::imm_ptr(g_cfg.core.spu_loop_detection ? +sub1 : +sub2));
return;
}
case SPU_RdEventMask:
@@ -1523,7 +1517,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
- c->mov(arg1->r32(), op.ra);
+ c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_rdch));
c->movd(x86::xmm0, *addr);
@@ -1540,7 +1534,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
{
using namespace asmjit;
- auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false)
+ auto ch_cnt = [&](x86::Mem channel_ptr, bool inv = false)
{
// Load channel count
const XmmLink& vr = XmmAlloc();
@@ -1631,7 +1625,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
- c->mov(arg1->r32(), op.ra);
+ c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_rchcnt));
break;
@@ -2320,7 +2314,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox));
- c->align(kAlignCode, 16);
+ c->align(AlignMode::kCode, 16);
c->bind(again);
c->mov(qw0->r32(), qw0->r32());
c->bt(addr->r64(), spu_channel::off_count);
@@ -2332,7 +2326,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
- c->mov(arg1->r32(), op.ra);
+ c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
c->jmp(ret);
@@ -2359,7 +2353,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
- c->lea(arg1->r32(), MFC_WrTagMask);
+ c->mov(arg1->r32(), MFC_WrTagMask);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
c->jmp(ret);
@@ -2383,7 +2377,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
- c->mov(arg1->r32(), op.ra);
+ c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
c->jmp(ret);
@@ -2476,7 +2470,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->btr(SPU_OFF_32(ch_stall_mask), arg1->r32());
c->jnc(ret);
c->mov(*arg0, *cpu);
- c->call(imm_ptr(sub));
+ c->call(imm_ptr(+sub));
c->bind(ret);
return;
}
@@ -2488,7 +2482,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
};
c->mov(*arg0, *cpu);
- c->call(imm_ptr(sub));
+ c->call(imm_ptr(+sub));
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(SPU_OFF_32(ch_dec_value), qw0->r32());
return;
@@ -2515,7 +2509,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
- c->mov(arg1->r32(), op.ra);
+ c->mov(arg1->r32(), +op.ra);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
@@ -2529,7 +2523,7 @@ void spu_recompiler::BIZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@@ -2545,7 +2539,7 @@ void spu_recompiler::BINZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@@ -2561,7 +2555,7 @@ void spu_recompiler::BIHZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@@ -2577,7 +2571,7 @@ void spu_recompiler::BIHNZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@@ -2669,13 +2663,13 @@ void spu_recompiler::BISLED(spu_opcode_t op)
asmjit::Label branch_label = c->newLabel();
c->mov(*arg0, *cpu);
- c->call(asmjit::imm_ptr(get_events));
+ c->call(asmjit::imm_ptr(+get_events));
c->test(*addr, 1);
c->jne(branch_label);
after.emplace_back([=, this]()
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->and_(*addr, 0x3fffc);
branch_indirect(op, true, false);
@@ -2895,7 +2889,7 @@ void spu_recompiler::CDX(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
- c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
+ c->mov(*qw0, asmjit::Imm(0x0001020304050607ull));
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
}
@@ -3028,7 +3022,7 @@ void spu_recompiler::CBD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
- if (op.i7) c->add(*addr, op.i7);
+ if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0xf);
@@ -3052,7 +3046,7 @@ void spu_recompiler::CHD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
- if (op.i7) c->add(*addr, op.i7);
+ if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0xe);
@@ -3076,7 +3070,7 @@ void spu_recompiler::CWD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
- if (op.i7) c->add(*addr, op.i7);
+ if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0xc);
@@ -3100,14 +3094,14 @@ void spu_recompiler::CDD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
- if (op.i7) c->add(*addr, op.i7);
+ if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0x8);
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
- c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
+ c->mov(*qw0, asmjit::Imm(0x0001020304050607ull));
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
}
@@ -4048,7 +4042,7 @@ void spu_recompiler::BRZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@@ -4088,7 +4082,7 @@ void spu_recompiler::BRNZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@@ -4109,7 +4103,7 @@ void spu_recompiler::BRHZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@@ -4130,7 +4124,7 @@ void spu_recompiler::BRHNZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
- c->align(asmjit::kAlignCode, 16);
+ c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@@ -4459,7 +4453,7 @@ void spu_recompiler::CGTBI(spu_opcode_t op)
void spu_recompiler::HGTI(spu_opcode_t op)
{
- c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), op.si10);
+ c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), +op.si10);
asmjit::Label label = c->newLabel();
asmjit::Label ret = c->newLabel();
@@ -4503,7 +4497,7 @@ void spu_recompiler::CLGTBI(spu_opcode_t op)
void spu_recompiler::HLGTI(spu_opcode_t op)
{
- c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10);
+ c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10);
asmjit::Label label = c->newLabel();
asmjit::Label ret = c->newLabel();
@@ -4565,7 +4559,7 @@ void spu_recompiler::CEQBI(spu_opcode_t op)
void spu_recompiler::HEQI(spu_opcode_t op)
{
- c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10);
+ c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10);
asmjit::Label label = c->newLabel();
asmjit::Label ret = c->newLabel();
@@ -4636,12 +4630,12 @@ void spu_recompiler::SHUFB(spu_opcode_t op)
c->vpcmpub(asmjit::x86::k1, vc, XmmConst(_mm_set1_epi8(-0x40)), 5 /* GE */);
c->vpxor(vm, vc, XmmConst(_mm_set1_epi8(0xf)));
c->setExtraReg(asmjit::x86::k1);
- c->z().vblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1}
+ c->z().vpblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1}
c->vpcmpub(asmjit::x86::k2, vm, XmmConst(_mm_set1_epi8(-0x20)), 5 /* GE */);
c->vptestmb(asmjit::x86::k1, vm, XmmConst(_mm_set1_epi8(0x10)));
c->vpshufb(vt, va, vm);
c->setExtraReg(asmjit::x86::k2);
- c->z().vblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2}
+ c->z().vpblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2}
c->setExtraReg(asmjit::x86::k1);
c->vpshufb(vt, vb, vm); // {k1}
c->vpternlogd(vt, va, vc, 0xf6 /* orAxorBC */);
diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h
index 7d5073d02f..f221b33ab0 100644
--- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h
+++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h
@@ -24,23 +24,23 @@ private:
u32 m_base;
// emitter:
- asmjit::X86Assembler* c;
+ asmjit::x86::Assembler* c;
// arguments:
- const asmjit::X86Gp* cpu;
- const asmjit::X86Gp* ls;
- const asmjit::X86Gp* rip;
- const asmjit::X86Gp* pc0;
+ const asmjit::x86::Gp* cpu;
+ const asmjit::x86::Gp* ls;
+ const asmjit::x86::Gp* rip;
+ const asmjit::x86::Gp* pc0;
// Native args or temp variables:
- const asmjit::X86Gp* arg0;
- const asmjit::X86Gp* arg1;
- const asmjit::X86Gp* qw0;
- const asmjit::X86Gp* qw1;
+ const asmjit::x86::Gp* arg0;
+ const asmjit::x86::Gp* arg1;
+ const asmjit::x86::Gp* qw0;
+ const asmjit::x86::Gp* qw1;
// temporary:
- const asmjit::X86Gp* addr;
- std::array vec;
+ const asmjit::x86::Gp* addr;
+ std::array vec;
// workload for the end of function:
std::vector> after;
@@ -60,10 +60,10 @@ private:
class XmmLink
{
- const asmjit::X86Xmm* m_var;
+ const asmjit::x86::Xmm* m_var;
public:
- XmmLink(const asmjit::X86Xmm*& xmm_var)
+ XmmLink(const asmjit::x86::Xmm*& xmm_var)
: m_var(xmm_var)
{
xmm_var = nullptr;
@@ -71,7 +71,7 @@ private:
XmmLink(XmmLink&&) = default; // MoveConstructible + delete copy constructor and copy/move operators
- operator const asmjit::X86Xmm&() const
+ operator const asmjit::x86::Xmm&() const
{
return *m_var;
}
@@ -87,11 +87,11 @@ private:
XmmLink XmmAlloc();
XmmLink XmmGet(s8 reg, XmmType type);
- asmjit::X86Mem XmmConst(const v128& data);
- asmjit::X86Mem XmmConst(const __m128& data);
- asmjit::X86Mem XmmConst(const __m128i& data);
+ asmjit::x86::Mem XmmConst(const v128& data);
+ asmjit::x86::Mem XmmConst(const __m128& data);
+ asmjit::x86::Mem XmmConst(const __m128i& data);
- asmjit::X86Mem get_pc(u32 addr);
+ asmjit::x86::Mem get_pc(u32 addr);
void branch_fixed(u32 target, bool absolute = false);
void branch_indirect(spu_opcode_t op, bool jt = false, bool ret = true);
void branch_set_link(u32 target);
diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp
index 9b413421f1..0d4b72c508 100644
--- a/rpcs3/Emu/Cell/SPUInterpreter.cpp
+++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp
@@ -43,7 +43,7 @@ namespace asmjit
static constexpr spu_opcode_t s_op{};
template
- static void build_spu_gpr_load(X86Assembler& c, X86Xmm x, const bf_t&, bool store = false)
+ static void build_spu_gpr_load(x86::Assembler& c, x86::Xmm x, const bf_t&, bool store = false)
{
static_assert(N == 7, "Invalid bitfield");
@@ -87,7 +87,7 @@ namespace asmjit
}
template
- static void build_spu_gpr_store(X86Assembler& c, X86Xmm x, const bf_t&, bool store = true)
+ static void build_spu_gpr_store(x86::Assembler& c, x86::Xmm x, const bf_t&, bool store = true)
{
build_spu_gpr_load(c, x, bf_t{}, store);
}
@@ -1733,7 +1733,7 @@ bool spu_interpreter::SHUFB(spu_thread& spu, spu_opcode_t op)
return true;
}
-const spu_inter_func_t optimized_shufb = build_function_asm("spu_shufb", [](asmjit::X86Assembler& c, auto& /*args*/)
+const spu_inter_func_t optimized_shufb = build_function_asm("spu_shufb", [](asmjit::x86::Assembler& c, auto& /*args*/)
{
using namespace asmjit;
@@ -1793,7 +1793,7 @@ const spu_inter_func_t optimized_shufb = build_function_asm("s
c.mov(x86::eax, 1);
c.ret();
- c.align(kAlignData, 16);
+ c.align(AlignMode::kData, 16);
c.bind(xc0);
c.dq(0xc0c0c0c0c0c0c0c0);
c.dq(0xc0c0c0c0c0c0c0c0);
diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp
index 0eaf6c76f3..dddb48821f 100644
--- a/rpcs3/Emu/Cell/SPURecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPURecompiler.cpp
@@ -160,7 +160,7 @@ DECLARE(spu_runtime::tr_all) = []
return reinterpret_cast(trptr);
}();
-DECLARE(spu_runtime::g_gateway) = built_function("spu_gateway", [](asmjit::X86Assembler& c, auto& args)
+DECLARE(spu_runtime::g_gateway) = built_function("spu_gateway", [](asmjit::x86::Assembler& c, auto& args)
{
// Gateway for SPU dispatcher, converts from native to GHC calling convention, also saves RSP value for spu_escape
using namespace asmjit;
@@ -249,7 +249,7 @@ DECLARE(spu_runtime::g_gateway) = built_function("spu_gateway",
c.ret();
});
-DECLARE(spu_runtime::g_escape) = build_function_asm("spu_escape", [](asmjit::X86Assembler& c, auto& args)
+DECLARE(spu_runtime::g_escape) = build_function_asm("spu_escape", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -261,7 +261,7 @@ DECLARE(spu_runtime::g_escape) = build_function_asm("spu_e
c.ret();
});
-DECLARE(spu_runtime::g_tail_escape) = build_function_asm("spu_tail_escape", [](asmjit::X86Assembler& c, auto& args)
+DECLARE(spu_runtime::g_tail_escape) = build_function_asm("spu_tail_escape", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index 9e0cf8d653..66d28ded23 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -405,7 +405,7 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op)
return res;
}
-const auto spu_putllc_tx = built_function("spu_putllc_tx", [](asmjit::X86Assembler& c, auto& args)
+const auto spu_putllc_tx = built_function("spu_putllc_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -455,14 +455,14 @@ const auto spu_putllc_tx = built_function("spu_putlluc_tx", [](asmjit::X86Assembler& c, auto& args)
+const auto spu_putlluc_tx = built_function("spu_putlluc_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -694,10 +694,10 @@ const auto spu_putlluc_tx = built_function("spu_getllar_tx", [](asmjit::X86Assembler& c, auto& args)
+const auto spu_getllar_tx = built_function("spu_getllar_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@@ -847,10 +847,10 @@ const auto spu_getllar_tx = built_function
- void build_copy_data_swap_u32_avx3(asmjit::X86Assembler& c, std::array& args, const RT& rmask, const RT& rload, const RT& rtest)
+ void build_copy_data_swap_u32_avx3(asmjit::x86::Assembler& c, std::array& args, const RT& rmask, const RT& rload, const RT& rtest)
{
using namespace asmjit;
@@ -173,26 +173,26 @@ namespace
c.and_(args[0], -Size * 4);
c.add(args[2].r32(), args[3].r32());
- c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
+ c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
c.vpshufb(rload, rload, rmask);
if (Compare)
- c.k(x86::k1).z().vpxord(rtest, rload, X86Mem(args[0], 0, Size * 4u));
- c.k(x86::k1).vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload);
+ c.k(x86::k1).z().vpxord(rtest, rload, x86::Mem(args[0], 0, Size * 4u));
+ c.k(x86::k1).vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload);
c.lea(args[0], x86::qword_ptr(args[0], Size * 4));
c.lea(args[1], x86::qword_ptr(args[1], Size * 4));
c.sub(args[2].r32(), Size);
c.or_(x86::eax, -1);
- c.align(kAlignCode, 16);
+ c.align(AlignMode::kCode, 16);
c.bind(loop);
c.cmp(args[2].r32(), Size);
c.jbe(tail);
- c.vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
+ c.vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
c.vpshufb(rload, rload, rmask);
if (Compare)
- c.vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC
- c.vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload);
+ c.vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC
+ c.vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload);
c.lea(args[0], x86::qword_ptr(args[0], Size * 4));
c.lea(args[1], x86::qword_ptr(args[1], Size * 4));
c.sub(args[2].r32(), Size);
@@ -202,11 +202,11 @@ namespace
c.shlx(x86::eax, x86::eax, args[2].r32());
c.not_(x86::eax);
c.kmovw(x86::k1, x86::eax);
- c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
+ c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
c.vpshufb(rload, rload, rmask);
if (Compare)
- c.k(x86::k1).vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6);
- c.k(x86::k1).vmovdqu32(X86Mem(args[0], 0, Size * 4u), rload);
+ c.k(x86::k1).vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6);
+ c.k(x86::k1).vmovdqu32(x86::Mem(args[0], 0, Size * 4u), rload);
if (Compare)
{
@@ -230,7 +230,7 @@ namespace
}
template
- void build_copy_data_swap_u32(asmjit::X86Assembler& c, std::array& args)
+ void build_copy_data_swap_u32(asmjit::x86::Assembler& c, std::array& args)
{
using namespace asmjit;