mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
Initial Linux Aarch64 support
* Update asmjit dependency (aarch64 branch) * Disable USE_DISCORD_RPC by default * Dump some JIT objects in rpcs3 cache dir * Add SIGILL handler for all platforms * Fix resetting zeroing denormals in thread pool * Refactor most v128:: utils into global gv_** functions * Refactor PPU interpreter (incomplete), remove "precise" * - Instruction specializations with multiple accuracy flags * - Adjust calling convention for speed * - Removed precise/fast setting, replaced with static * - Started refactoring interpreters for building at runtime JIT * (I got tired of poor compiler optimizations) * - Expose some accuracy settings (SAT, NJ, VNAN, FPCC) * - Add exec_bytes PPU thread variable (akin to cycle count) * PPU LLVM: fix VCTUXS+VCTSXS instruction NaN results * SPU interpreter: remove "precise" for now (extremely non-portable) * - As with PPU, settings changed to static/dynamic for interpreters. * - Precise options will be implemented later * Fix termination after fatal error dialog
This commit is contained in:
parent
d6aa834b5f
commit
580bd2b25e
2
3rdparty/asmjit/asmjit
vendored
2
3rdparty/asmjit/asmjit
vendored
@ -1 +1 @@
|
||||
Subproject commit eae7197fce03fd52a6e71ca89207a88ce270fb1a
|
||||
Subproject commit fc2a5d82f7434d7d03161275a764c051f970f41c
|
2
3rdparty/discord-rpc/CMakeLists.txt
vendored
2
3rdparty/discord-rpc/CMakeLists.txt
vendored
@ -2,7 +2,7 @@
|
||||
add_library(3rdparty_discordRPC INTERFACE)
|
||||
|
||||
# We don't want Discord Rich Presence on the BSDs and other OSes
|
||||
if (USE_DISCORD_RPC AND (WIN32 OR CMAKE_SYSTEM MATCHES "Linux" OR APPLE))
|
||||
if (USE_DISCORD_RPC AND (WIN32 OR CMAKE_SYSTEM MATCHES "Linux" OR APPLE) AND COMPILER_X86)
|
||||
if (WIN32 AND NOT MSVC)
|
||||
ExternalProject_Add(discordRPC
|
||||
GIT_REPOSITORY https://github.com/discordapp/discord-rpc
|
||||
|
14
3rdparty/llvm.cmake
vendored
14
3rdparty/llvm.cmake
vendored
@ -1,8 +1,10 @@
|
||||
if(WITH_LLVM)
|
||||
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
|
||||
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM)
|
||||
|
||||
if(BUILD_LLVM_SUBMODULE)
|
||||
message(STATUS "LLVM will be built from the submodule.")
|
||||
|
||||
set(LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
|
||||
option(LLVM_BUILD_RUNTIME OFF)
|
||||
option(LLVM_BUILD_TOOLS OFF)
|
||||
option(LLVM_INCLUDE_BENCHMARKS OFF)
|
||||
@ -61,7 +63,15 @@ if(WITH_LLVM)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(LLVM_LIBS LLVMMCJIT LLVMX86CodeGen LLVMX86AsmParser)
|
||||
set(LLVM_LIBS LLVMMCJIT)
|
||||
|
||||
if(COMPILER_X86)
|
||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser)
|
||||
endif()
|
||||
|
||||
if(COMPILER_ARM)
|
||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser LLVMARMCodeGen LLVMARMAsmParser)
|
||||
endif()
|
||||
|
||||
if(WIN32 OR CMAKE_SYSTEM MATCHES "Linux")
|
||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMIntelJITEvents)
|
||||
|
@ -17,7 +17,7 @@ option(WITH_LLVM "Enable usage of LLVM library" ON)
|
||||
option(BUILD_LLVM_SUBMODULE "Build LLVM from git submodule" ON)
|
||||
option(USE_FAUDIO "FAudio audio backend" ON)
|
||||
option(USE_LIBEVDEV "libevdev-based joystick support" ON)
|
||||
option(USE_DISCORD_RPC "Discord rich presence integration" ON)
|
||||
option(USE_DISCORD_RPC "Discord rich presence integration" OFF)
|
||||
option(USE_SYSTEM_ZLIB "Prefer system ZLIB instead of the builtin one" ON)
|
||||
option(USE_VULKAN "Vulkan render backend" ON)
|
||||
option(USE_PRECOMPILED_HEADERS "Use precompiled headers" OFF)
|
||||
|
@ -18,6 +18,12 @@ LOG_CHANNEL(jit_log, "JIT");
|
||||
|
||||
void jit_announce(uptr func, usz size, std::string_view name)
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
jit_log.error("Empty function announced: %s (%p)", name, func);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
static const fs::file s_map(fmt::format("/tmp/perf-%d.map", getpid()), fs::rewrite + fs::append);
|
||||
|
||||
@ -124,15 +130,31 @@ void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept
|
||||
{
|
||||
ensure(!code->flatten());
|
||||
ensure(!code->resolveUnresolvedLinks());
|
||||
usz codeSize = ensure(code->codeSize());
|
||||
usz codeSize = code->codeSize();
|
||||
if (!codeSize)
|
||||
return nullptr;
|
||||
|
||||
auto p = ensure(this->_alloc(codeSize, 64));
|
||||
ensure(!code->relocateToBase(uptr(p)));
|
||||
|
||||
asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize);
|
||||
|
||||
for (asmjit::Section* section : code->_sections)
|
||||
{
|
||||
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
|
||||
asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize);
|
||||
|
||||
for (asmjit::Section* section : code->_sections)
|
||||
{
|
||||
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
|
||||
}
|
||||
}
|
||||
|
||||
if (!dump_name.empty())
|
||||
{
|
||||
// If directory ASMJIT doesn't exist, nothing will be written
|
||||
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), dump_name), fs::rewrite);
|
||||
|
||||
if (dump)
|
||||
{
|
||||
dump.write(p, codeSize);
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
@ -349,8 +371,9 @@ static u64 make_null_function(const std::string& name)
|
||||
using namespace asmjit;
|
||||
|
||||
// Build a "null" function that contains its name
|
||||
const auto func = build_function_asm<void (*)()>("NULL", [&](x86::Assembler& c, auto& args)
|
||||
const auto func = build_function_asm<void (*)()>("NULL", [&](native_asm& c, auto& args)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
Label data = c.newLabel();
|
||||
c.lea(args[0], x86::qword_ptr(data, 0));
|
||||
c.jmp(Imm(&null));
|
||||
@ -362,6 +385,7 @@ static u64 make_null_function(const std::string& name)
|
||||
c.db(ch);
|
||||
c.db(0);
|
||||
c.align(AlignMode::kData, 16);
|
||||
#endif
|
||||
});
|
||||
|
||||
func_ptr = reinterpret_cast<u64>(func);
|
||||
|
104
Utilities/JIT.h
104
Utilities/JIT.h
@ -22,10 +22,17 @@
|
||||
#pragma GCC diagnostic ignored "-Wredundant-decls"
|
||||
#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
|
||||
#pragma GCC diagnostic ignored "-Weffc++"
|
||||
#ifndef __clang__
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
|
||||
#pragma GCC diagnostic ignored "-Wcast-qual"
|
||||
#else
|
||||
#pragma GCC diagnostic ignored "-Wduplicated-branches"
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-enum-enum-conversion"
|
||||
#endif
|
||||
#include <asmjit/asmjit.h>
|
||||
#if defined(ARCH_ARM64)
|
||||
#include <asmjit/a64.h>
|
||||
#endif
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@ -36,6 +43,14 @@
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
using native_asm = asmjit::x86::Assembler;
|
||||
using native_args = std::array<asmjit::x86::Gp, 4>;
|
||||
#elif defined(ARCH_ARM64)
|
||||
using native_asm = asmjit::a64::Assembler;
|
||||
using native_args = std::array<asmjit::a64::Gp, 4>;
|
||||
#endif
|
||||
|
||||
void jit_announce(uptr func, usz size, std::string_view name);
|
||||
|
||||
void jit_announce(auto* func, usz size, std::string_view name)
|
||||
@ -62,6 +77,8 @@ struct jit_runtime_base
|
||||
const asmjit::Environment& environment() const noexcept;
|
||||
void* _add(asmjit::CodeHolder* code) noexcept;
|
||||
virtual uchar* _alloc(usz size, usz align) noexcept = 0;
|
||||
|
||||
std::string_view dump_name;
|
||||
};
|
||||
|
||||
// ASMJIT runtime for emitting code in a single 2G region
|
||||
@ -167,11 +184,39 @@ namespace asmjit
|
||||
}
|
||||
}
|
||||
|
||||
inline void build_init_args_from_ghc(native_asm& c, native_args& args)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
// TODO: handle case when args don't overlap with r13/rbp/r12/rbx
|
||||
c.mov(args[0], x86::r13);
|
||||
c.mov(args[1], x86::rbp);
|
||||
c.mov(args[2], x86::r12);
|
||||
c.mov(args[3], x86::rbx);
|
||||
#else
|
||||
static_cast<void>(c);
|
||||
static_cast<void>(args);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void build_init_ghc_args(native_asm& c, native_args& args)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
// TODO: handle case when args don't overlap with r13/rbp/r12/rbx
|
||||
c.mov(x86::r13, args[0]);
|
||||
c.mov(x86::rbp, args[1]);
|
||||
c.mov(x86::r12, args[2]);
|
||||
c.mov(x86::rbx, args[3]);
|
||||
#else
|
||||
static_cast<void>(c);
|
||||
static_cast<void>(args);
|
||||
#endif
|
||||
}
|
||||
|
||||
using imm_ptr = Imm;
|
||||
}
|
||||
|
||||
// Build runtime function with asmjit::X86Assembler
|
||||
template <typename FT, typename F>
|
||||
template <typename FT, typename Asm = native_asm, typename F>
|
||||
inline FT build_function_asm(std::string_view name, F&& builder)
|
||||
{
|
||||
using namespace asmjit;
|
||||
@ -181,7 +226,8 @@ inline FT build_function_asm(std::string_view name, F&& builder)
|
||||
CodeHolder code;
|
||||
code.init(rt.environment());
|
||||
|
||||
std::array<x86::Gp, 4> args;
|
||||
#if defined(ARCH_X64)
|
||||
native_args args;
|
||||
#ifdef _WIN32
|
||||
args[0] = x86::rcx;
|
||||
args[1] = x86::rdx;
|
||||
@ -193,16 +239,27 @@ inline FT build_function_asm(std::string_view name, F&& builder)
|
||||
args[2] = x86::rdx;
|
||||
args[3] = x86::rcx;
|
||||
#endif
|
||||
#elif defined(ARCH_ARM64)
|
||||
native_args args;
|
||||
args[0] = a64::x0;
|
||||
args[1] = a64::x1;
|
||||
args[2] = a64::x2;
|
||||
args[3] = a64::x3;
|
||||
#endif
|
||||
|
||||
x86::Assembler compiler(&code);
|
||||
Asm compiler(&code);
|
||||
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
|
||||
builder(std::ref(compiler), args);
|
||||
if constexpr (std::is_invocable_v<F, Asm&, native_args&>)
|
||||
builder(compiler, args);
|
||||
else
|
||||
builder(compiler);
|
||||
rt.dump_name = name;
|
||||
const auto result = rt._add(&code);
|
||||
jit_announce(result, code.codeSize(), name);
|
||||
return reinterpret_cast<FT>(uptr(result));
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
#if !defined(ARCH_X64) || defined(__APPLE__)
|
||||
template <typename FT, usz = 4096>
|
||||
class built_function
|
||||
{
|
||||
@ -213,9 +270,23 @@ public:
|
||||
|
||||
built_function& operator=(const built_function&) = delete;
|
||||
|
||||
template <typename F>
|
||||
built_function(std::string_view name, F&& builder)
|
||||
: m_func(ensure(build_function_asm<FT>(name, std::forward<F>(builder))))
|
||||
template <typename F> requires (std::is_invocable_v<F, native_asm&, native_args&>)
|
||||
built_function(std::string_view name, F&& builder,
|
||||
u32 line = __builtin_LINE(),
|
||||
u32 col = __builtin_COLUMN(),
|
||||
const char* file = __builtin_FILE(),
|
||||
const char* func = __builtin_FUNCTION())
|
||||
: m_func(ensure(build_function_asm<FT>(name, std::forward<F>(builder)), const_str(), line, col, file, func))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename F> requires (std::is_invocable_v<F>)
|
||||
built_function(std::string_view, F&& getter,
|
||||
u32 line = __builtin_LINE(),
|
||||
u32 col = __builtin_COLUMN(),
|
||||
const char* file = __builtin_FILE(),
|
||||
const char* func = __builtin_FUNCTION())
|
||||
: m_func(ensure(getter(), const_str(), line, col, file, func))
|
||||
{
|
||||
}
|
||||
|
||||
@ -251,7 +322,8 @@ public:
|
||||
CodeHolder code;
|
||||
code.init(rt.environment());
|
||||
|
||||
std::array<x86::Gp, 4> args;
|
||||
#if defined(ARCH_X64)
|
||||
native_args args;
|
||||
#ifdef _WIN32
|
||||
args[0] = x86::rcx;
|
||||
args[1] = x86::rdx;
|
||||
@ -263,10 +335,18 @@ public:
|
||||
args[2] = x86::rdx;
|
||||
args[3] = x86::rcx;
|
||||
#endif
|
||||
#elif defined(ARCH_ARM64)
|
||||
native_args args;
|
||||
args[0] = a64::x0;
|
||||
args[1] = a64::x1;
|
||||
args[2] = a64::x2;
|
||||
args[3] = a64::x3;
|
||||
#endif
|
||||
|
||||
x86::Assembler compiler(&code);
|
||||
native_asm compiler(&code);
|
||||
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
|
||||
builder(std::ref(compiler), args);
|
||||
builder(compiler, args);
|
||||
rt.dump_name = name;
|
||||
jit_announce(rt._add(&code), code.codeSize(), name);
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@
|
||||
#include "util/logs.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "Emu/Memory/vm_locking.h"
|
||||
|
||||
@ -189,6 +189,7 @@ bool IsDebuggerPresent()
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
enum x64_reg_t : u32
|
||||
{
|
||||
X64R_RAX = 0,
|
||||
@ -839,6 +840,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, usz
|
||||
#ifdef _WIN32
|
||||
|
||||
typedef CONTEXT x64_context;
|
||||
typedef CONTEXT ucontext_t;
|
||||
|
||||
#define X64REG(context, reg) (&(&(context)->Rax)[reg])
|
||||
#define XMMREG(context, reg) (reinterpret_cast<v128*>(&(&(context)->Xmm0)[reg]))
|
||||
@ -1211,12 +1213,18 @@ usz get_x64_access_size(x64_context* context, x64_op_t op, x64_reg_t reg, usz d_
|
||||
return d_size;
|
||||
}
|
||||
|
||||
#elif defined(ARCH_ARM64)
|
||||
|
||||
#define RIP(context) ((context)->uc_mcontext.pc)
|
||||
|
||||
#endif /* ARCH_ */
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
extern std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler;
|
||||
}
|
||||
|
||||
bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) noexcept
|
||||
bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noexcept
|
||||
{
|
||||
g_tls_fault_all++;
|
||||
|
||||
@ -1243,6 +1251,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) no
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
const u8* const code = reinterpret_cast<u8*>(RIP(context));
|
||||
|
||||
x64_op_t op;
|
||||
@ -1382,6 +1391,9 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) no
|
||||
g_tls_fault_spu++;
|
||||
return true;
|
||||
} while (0);
|
||||
#else
|
||||
static_cast<void>(context);
|
||||
#endif /* ARCH_ */
|
||||
|
||||
if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable))
|
||||
{
|
||||
@ -1545,7 +1557,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) no
|
||||
if (!g_tls_access_violation_recovered)
|
||||
{
|
||||
vm_log.notice("\n%s", dump_useful_thread_info());
|
||||
vm_log.error("Access violation %s location 0x%x (%s) [type=u%u]", is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory", d_size * 8);
|
||||
vm_log.error("Access violation %s location 0x%x (%s)", is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory");
|
||||
}
|
||||
|
||||
// TODO:
|
||||
@ -1582,7 +1594,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) no
|
||||
// Do not log any further access violations in this case.
|
||||
if (!g_tls_access_violation_recovered)
|
||||
{
|
||||
vm_log.fatal("Access violation %s location 0x%x (%s) [type=u%u]", is_writing ? "writing" : (cpu && cpu->id_type() == 1 && cpu->get_pc() == addr ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory", d_size * 8);
|
||||
vm_log.fatal("Access violation %s location 0x%x (%s)", is_writing ? "writing" : (cpu && cpu->id_type() == 1 && cpu->get_pc() == addr ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory");
|
||||
}
|
||||
|
||||
while (Emu.IsPaused())
|
||||
@ -1754,8 +1766,9 @@ const bool s_exception_handler_set = []() -> bool
|
||||
|
||||
static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept
|
||||
{
|
||||
x64_context* context = static_cast<ucontext_t*>(uct);
|
||||
ucontext_t* context = static_cast<ucontext_t*>(uct);
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#ifdef __APPLE__
|
||||
const u64 err = context->uc_mcontext->__es.__err;
|
||||
#elif defined(__DragonFly__) || defined(__FreeBSD__)
|
||||
@ -1770,6 +1783,23 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept
|
||||
|
||||
const bool is_executing = err & 0x10;
|
||||
const bool is_writing = err & 0x2;
|
||||
#elif defined(ARCH_ARM64)
|
||||
const bool is_executing = uptr(info->si_addr) == RIP(context);
|
||||
const u32 insn = is_executing ? 0 : *reinterpret_cast<u32*>(RIP(context));
|
||||
const bool is_writing = (insn & 0xbfff0000) == 0x0c000000
|
||||
|| (insn & 0xbfe00000) == 0x0c800000
|
||||
|| (insn & 0xbfdf0000) == 0x0d000000
|
||||
|| (insn & 0xbfc00000) == 0x0d800000
|
||||
|| (insn & 0x3f400000) == 0x08000000
|
||||
|| (insn & 0x3bc00000) == 0x39000000
|
||||
|| (insn & 0x3fc00000) == 0x3d800000
|
||||
|| (insn & 0x3bc00000) == 0x38000000
|
||||
|| (insn & 0x3fe00000) == 0x3c800000
|
||||
|| (insn & 0x3a400000) == 0x28000000;
|
||||
|
||||
#else
|
||||
#error "signal_handler not implemented"
|
||||
#endif
|
||||
|
||||
const u64 exec64 = (reinterpret_cast<u64>(info->si_addr) - reinterpret_cast<u64>(vm::g_exec_addr)) / 2;
|
||||
const auto cause = is_executing ? "executing" : is_writing ? "writing" : "reading";
|
||||
@ -1809,6 +1839,26 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept
|
||||
thread_ctrl::emergency_exit(msg);
|
||||
}
|
||||
|
||||
static void sigill_handler(int /*sig*/, siginfo_t* info, void* /*uct*/) noexcept
|
||||
{
|
||||
std::string msg = fmt::format("Illegal instruction at %p (%s).\n", info->si_addr, *reinterpret_cast<be_t<u128>*>(info->si_addr));
|
||||
|
||||
append_thread_name(msg);
|
||||
|
||||
if (IsDebuggerPresent())
|
||||
{
|
||||
sys_log.fatal("\n%s", msg);
|
||||
|
||||
sys_log.notice("\n%s", dump_useful_thread_info());
|
||||
|
||||
// Convert to SIGTRAP
|
||||
raise(SIGTRAP);
|
||||
return;
|
||||
}
|
||||
|
||||
thread_ctrl::emergency_exit(msg);
|
||||
}
|
||||
|
||||
void sigpipe_signaling_handler(int)
|
||||
{
|
||||
}
|
||||
@ -1834,6 +1884,13 @@ const bool s_exception_handler_set = []() -> bool
|
||||
}
|
||||
#endif
|
||||
|
||||
sa.sa_sigaction = sigill_handler;
|
||||
if (::sigaction(SIGILL, &sa, NULL) == -1)
|
||||
{
|
||||
std::fprintf(stderr, "sigaction(SIGILL) failed (%d).\n", errno);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
sa.sa_handler = sigpipe_signaling_handler;
|
||||
if (::sigaction(SIGPIPE, &sa, NULL) == -1)
|
||||
{
|
||||
@ -1852,11 +1909,7 @@ const bool s_terminate_handler_set = []() -> bool
|
||||
std::set_terminate([]()
|
||||
{
|
||||
if (IsDebuggerPresent())
|
||||
#ifdef _MSC_VER
|
||||
__debugbreak();
|
||||
#else
|
||||
__asm("int3;");
|
||||
#endif
|
||||
utils::trap();
|
||||
|
||||
report_fatal_error("RPCS3 has abnormally terminated.");
|
||||
});
|
||||
@ -1935,7 +1988,7 @@ void thread_base::initialize(void (*error_cb)())
|
||||
{
|
||||
if (attempts == umax)
|
||||
{
|
||||
g_tls_wait_time += __rdtsc() - stamp0;
|
||||
g_tls_wait_time += utils::get_tsc() - stamp0;
|
||||
}
|
||||
else if (attempts > 1)
|
||||
{
|
||||
@ -2096,6 +2149,8 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept
|
||||
|
||||
std::fesetround(FE_TONEAREST);
|
||||
|
||||
gv_unset_zeroing_denormals();
|
||||
|
||||
static constexpr u64 s_stop_bit = 0x8000'0000'0000'0000ull;
|
||||
|
||||
static atomic_t<u64> s_pool_ctr = []
|
||||
@ -2195,10 +2250,11 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept
|
||||
|
||||
thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* _base))
|
||||
{
|
||||
return build_function_asm<native_entry>("thread_base_trampoline", [&](asmjit::x86::Assembler& c, auto& args)
|
||||
return build_function_asm<native_entry>("thread_base_trampoline", [&](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
Label _ret = c.newLabel();
|
||||
c.push(x86::rbp);
|
||||
c.sub(x86::rsp, 0x20);
|
||||
@ -2222,6 +2278,7 @@ thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base*
|
||||
c.bind(_ret);
|
||||
c.add(x86::rsp, 0x28);
|
||||
c.ret();
|
||||
#endif
|
||||
});
|
||||
}
|
||||
|
||||
@ -2364,7 +2421,7 @@ bool thread_base::join(bool dtor) const
|
||||
// Hacked for too sleepy threads (1ms) TODO: make sure it's unneeded and remove
|
||||
const auto timeout = dtor && Emu.IsStopped() ? atomic_wait_timeout{1'000'000} : atomic_wait_timeout::inf;
|
||||
|
||||
auto stamp0 = __rdtsc();
|
||||
auto stamp0 = utils::get_tsc();
|
||||
|
||||
for (u64 i = 0; (m_sync & 3) <= 1; i++)
|
||||
{
|
||||
@ -2377,7 +2434,7 @@ bool thread_base::join(bool dtor) const
|
||||
|
||||
if (i >= 16 && !(i & (i - 1)) && timeout != atomic_wait_timeout::inf)
|
||||
{
|
||||
sig_log.error(u8"Thread [%s] is too sleepy. Waiting for it %.3fµs already!", *m_tname.load(), (__rdtsc() - stamp0) / (utils::get_tsc_freq() / 1000000.));
|
||||
sig_log.error(u8"Thread [%s] is too sleepy. Waiting for it %.3fµs already!", *m_tname.load(), (utils::get_tsc() - stamp0) / (utils::get_tsc_freq() / 1000000.));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2522,17 +2579,8 @@ void thread_base::exec()
|
||||
|
||||
sig_log.fatal("Thread terminated due to fatal error: %s", reason);
|
||||
|
||||
#ifdef _WIN32
|
||||
if (IsDebuggerPresent())
|
||||
{
|
||||
__debugbreak();
|
||||
}
|
||||
#else
|
||||
if (IsDebuggerPresent())
|
||||
{
|
||||
__asm("int3;");
|
||||
}
|
||||
#endif
|
||||
utils::trap();
|
||||
|
||||
if (const auto _this = g_tls_this_thread)
|
||||
{
|
||||
|
@ -478,7 +478,19 @@ class named_thread final : public Context, result_storage<Context>, thread_base
|
||||
return thread::finalize(thread_state::finished);
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
static inline thread::native_entry trampoline = thread::make_trampoline(entry_point);
|
||||
#else
|
||||
static void* trampoline(void* arg)
|
||||
{
|
||||
if (const auto next = thread_base::finalize(entry_point(static_cast<thread_base*>(arg))))
|
||||
{
|
||||
return next(thread_ctrl::get_current());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
friend class thread_ctrl;
|
||||
|
||||
|
@ -20,11 +20,20 @@ else()
|
||||
# Some distros have the compilers set to use PIE by default, but RPCS3 doesn't work with PIE, so we need to disable it.
|
||||
CHECK_CXX_COMPILER_FLAG("-no-pie" HAS_NO_PIE)
|
||||
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
|
||||
CHECK_CXX_COMPILER_FLAG("-march=armv8.1-a" COMPILER_ARM)
|
||||
|
||||
add_compile_options(-Wall)
|
||||
add_compile_options(-fno-exceptions)
|
||||
add_compile_options(-fstack-protector)
|
||||
add_compile_options(-msse -msse2 -mcx16)
|
||||
|
||||
if (COMPILER_X86)
|
||||
add_compile_options(-msse -msse2 -mcx16)
|
||||
endif()
|
||||
|
||||
if (COMPILER_ARM)
|
||||
add_compile_options(-march=armv8.1-a)
|
||||
endif()
|
||||
|
||||
add_compile_options(-Werror=old-style-cast)
|
||||
add_compile_options(-Werror=sign-compare)
|
||||
|
@ -461,8 +461,10 @@ int aes_setkey_enc( aes_context *ctx, const unsigned char *key, unsigned int key
|
||||
|
||||
ctx->rk = RK = ctx->buf;
|
||||
|
||||
#if defined(__SSE2__) || defined(_M_X64)
|
||||
if( aesni_supports( POLARSSL_AESNI_AES ) )
|
||||
return( aesni_setkey_enc( reinterpret_cast<unsigned char*>(ctx->rk), key, keysize ) );
|
||||
#endif
|
||||
|
||||
for( i = 0; i < (keysize >> 5); i++ )
|
||||
{
|
||||
@ -564,12 +566,14 @@ int aes_setkey_dec( aes_context *ctx, const unsigned char *key, unsigned int key
|
||||
if( ret != 0 )
|
||||
return( ret );
|
||||
|
||||
#if defined(__SSE2__) || defined(_M_X64)
|
||||
if( aesni_supports( POLARSSL_AESNI_AES ) )
|
||||
{
|
||||
aesni_inverse_key( reinterpret_cast<unsigned char*>(ctx->rk),
|
||||
reinterpret_cast<const unsigned char*>(cty.rk), ctx->nr );
|
||||
goto done;
|
||||
}
|
||||
#endif
|
||||
|
||||
SK = cty.rk + cty.nr * 4;
|
||||
|
||||
@ -658,8 +662,10 @@ int aes_crypt_ecb( aes_context *ctx,
|
||||
int i;
|
||||
uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3;
|
||||
|
||||
#if defined(__SSE2__) || defined(_M_X64)
|
||||
if( aesni_supports( POLARSSL_AESNI_AES ) )
|
||||
return( aesni_crypt_ecb( ctx, mode, input, output ) );
|
||||
#endif
|
||||
|
||||
RK = ctx->rk;
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#if defined(__SSE2__) || defined(_M_X64)
|
||||
|
||||
/*
|
||||
* AES-NI support functions
|
||||
*
|
||||
@ -680,3 +682,5 @@ int aesni_setkey_enc( unsigned char *rk,
|
||||
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -17,7 +17,9 @@
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
DECLARE(cpu_thread::g_threads_created){0};
|
||||
DECLARE(cpu_thread::g_threads_deleted){0};
|
||||
@ -410,20 +412,6 @@ void cpu_thread::operator()()
|
||||
{
|
||||
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(id_type() == 1 ? thread_class::ppu : thread_class::spu));
|
||||
}
|
||||
if (id_type() == 2)
|
||||
{
|
||||
// force input/output denormals to zero for SPU threads (FTZ/DAZ)
|
||||
_mm_setcsr( _mm_getcsr() | 0x8040 );
|
||||
|
||||
const volatile int a = 0x1fc00000;
|
||||
__m128 b = _mm_castsi128_ps(_mm_set1_epi32(a));
|
||||
int c = _mm_cvtsi128_si32(_mm_castps_si128(_mm_mul_ps(b,b)));
|
||||
|
||||
if (c != 0)
|
||||
{
|
||||
sys_log.fatal("Could not disable denormals.");
|
||||
}
|
||||
}
|
||||
|
||||
while (!g_fxo->is_init<cpu_profiler>())
|
||||
{
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include "CPUTranslator.h"
|
||||
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
|
||||
llvm::LLVMContext g_llvm_ctx;
|
||||
|
||||
|
@ -2961,11 +2961,11 @@ public:
|
||||
}
|
||||
|
||||
// Call external function: provide name and function pointer
|
||||
template <typename RT, typename... FArgs, LLVMValue... Args>
|
||||
template <typename RetT = void, typename RT, typename... FArgs, LLVMValue... Args>
|
||||
llvm::CallInst* call(std::string_view lame, RT(*_func)(FArgs...), Args... args)
|
||||
{
|
||||
static_assert(sizeof...(FArgs) == sizeof...(Args), "spu_llvm_recompiler::call(): unexpected arg number");
|
||||
const auto type = llvm::FunctionType::get(get_type<RT>(), {args->getType()...}, false);
|
||||
const auto type = llvm::FunctionType::get(get_type<std::conditional_t<std::is_void_v<RetT>, RT, RetT>>(), {args->getType()...}, false);
|
||||
const auto func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction({lame.data(), lame.size()}, type).getCallee());
|
||||
#ifdef _WIN32
|
||||
func->setCallingConv(llvm::CallingConv::Win64);
|
||||
@ -3680,31 +3680,4 @@ struct fmt_unveil<llvm::TypeSize, void>
|
||||
}
|
||||
};
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
template <>
|
||||
struct llvm_value_t<__m128> : llvm_value_t<f32[4]>
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
template <>
|
||||
struct llvm_value_t<__m128d> : llvm_value_t<f64[2]>
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
template <>
|
||||
struct llvm_value_t<__m128i> : llvm_value_t<u8[16]>
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
8776
rpcs3/Emu/CPU/sse2neon.h
Normal file
8776
rpcs3/Emu/CPU/sse2neon.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,9 +6,12 @@
|
||||
#include "Emu/Cell/lv2/sys_event.h"
|
||||
#include "cellAudio.h"
|
||||
|
||||
#include "emmintrin.h"
|
||||
#include <cmath>
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#include "emmintrin.h"
|
||||
#endif
|
||||
|
||||
LOG_CHANNEL(cellAudio);
|
||||
|
||||
vm::gvar<char, AUDIO_PORT_OFFSET * AUDIO_PORT_COUNT> g_audio_buffer;
|
||||
@ -1118,6 +1121,7 @@ void cell_audio_thread::mix(float *out_buffer, s32 offset)
|
||||
// 2x CVTPS2DQ (converts float to s32)
|
||||
// PACKSSDW (converts s32 to s16 with signed saturation)
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
for (usz i = 0; i < out_buffer_sz; i += 8)
|
||||
{
|
||||
const auto scale = _mm_set1_ps(0x8000);
|
||||
@ -1125,6 +1129,9 @@ void cell_audio_thread::mix(float *out_buffer, s32 offset)
|
||||
_mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(out_buffer + i), scale)),
|
||||
_mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(out_buffer + i + 4), scale)))));
|
||||
}
|
||||
#else
|
||||
fmt::throw_exception("Not supported");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
|
||||
LOG_CHANNEL(cellSpurs);
|
||||
|
||||
@ -738,7 +738,7 @@ s32 _spurs::create_handler(vm::ptr<CellSpurs> spurs, u32 ppuPriority)
|
||||
|
||||
void non_task()
|
||||
{
|
||||
BIND_FUNC(_spurs::handler_entry)(*this);
|
||||
//BIND_FUNC(_spurs::handler_entry)(*this);
|
||||
}
|
||||
};
|
||||
|
||||
@ -933,7 +933,7 @@ s32 _spurs::create_event_helper(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 p
|
||||
|
||||
void non_task()
|
||||
{
|
||||
BIND_FUNC(_spurs::event_helper_entry)(*this);
|
||||
//BIND_FUNC(_spurs::event_helper_entry)(*this);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
|
||||
LOG_CHANNEL(cellSpurs);
|
||||
|
||||
@ -1434,7 +1434,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
|
||||
|
||||
// Verify taskset state is valid
|
||||
if ((waiting & running) != v128{} || (ready & pready) != v128{} ||
|
||||
(v128::andnot(enabled, running | ready | pready | signalled | waiting) != v128{}))
|
||||
(gv_andn(enabled, running | ready | pready | signalled | waiting) != v128{}))
|
||||
{
|
||||
spu_log.error("Invalid taskset state");
|
||||
spursHalt(spu);
|
||||
@ -1442,7 +1442,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
|
||||
|
||||
// Find the number of tasks that have become ready since the last iteration
|
||||
{
|
||||
v128 newlyReadyTasks = v128::andnot(ready, signalled | pready);
|
||||
v128 newlyReadyTasks = gv_andn(ready, signalled | pready);
|
||||
|
||||
numNewlyReadyTasks = utils::popcnt128(newlyReadyTasks._u);
|
||||
}
|
||||
@ -1491,7 +1491,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
|
||||
}
|
||||
case SPURS_TASKSET_REQUEST_POLL:
|
||||
{
|
||||
readyButNotRunning = v128::andnot(running, ready0);
|
||||
readyButNotRunning = gv_andn(running, ready0);
|
||||
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK)
|
||||
{
|
||||
readyButNotRunning._u &= ~(u128{1} << (~taskset->wkl_flag_wait_task & 127));
|
||||
@ -1526,7 +1526,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
|
||||
}
|
||||
case SPURS_TASKSET_REQUEST_SELECT_TASK:
|
||||
{
|
||||
readyButNotRunning = v128::andnot(running, ready0);
|
||||
readyButNotRunning = gv_andn(running, ready0);
|
||||
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK)
|
||||
{
|
||||
readyButNotRunning._u &= ~(u128{1} << (~taskset->wkl_flag_wait_task & 127));
|
||||
|
@ -203,18 +203,31 @@ struct ppu_itype
|
||||
VCFSX,
|
||||
VCFUX,
|
||||
VCMPBFP,
|
||||
VCMPBFP_,
|
||||
VCMPEQFP,
|
||||
VCMPEQFP_,
|
||||
VCMPEQUB,
|
||||
VCMPEQUB_,
|
||||
VCMPEQUH,
|
||||
VCMPEQUH_,
|
||||
VCMPEQUW,
|
||||
VCMPEQUW_,
|
||||
VCMPGEFP,
|
||||
VCMPGEFP_,
|
||||
VCMPGTFP,
|
||||
VCMPGTFP_,
|
||||
VCMPGTSB,
|
||||
VCMPGTSB_,
|
||||
VCMPGTSH,
|
||||
VCMPGTSH_,
|
||||
VCMPGTSW,
|
||||
VCMPGTSW_,
|
||||
VCMPGTUB,
|
||||
VCMPGTUB_,
|
||||
VCMPGTUH,
|
||||
VCMPGTUH_,
|
||||
VCMPGTUW,
|
||||
VCMPGTUW_,
|
||||
VCTSXS,
|
||||
VCTUXS,
|
||||
VEXPTEFP,
|
||||
@ -367,7 +380,9 @@ struct ppu_itype
|
||||
LVSL,
|
||||
LVEBX,
|
||||
SUBFC,
|
||||
SUBFCO,
|
||||
ADDC,
|
||||
ADDCO,
|
||||
MULHDU,
|
||||
MULHWU,
|
||||
MFOCRF,
|
||||
@ -382,6 +397,7 @@ struct ppu_itype
|
||||
LVSR,
|
||||
LVEHX,
|
||||
SUBF,
|
||||
SUBFO,
|
||||
LDUX,
|
||||
DCBST,
|
||||
LWZUX,
|
||||
@ -396,11 +412,14 @@ struct ppu_itype
|
||||
LBZX,
|
||||
LVX,
|
||||
NEG,
|
||||
NEGO,
|
||||
LBZUX,
|
||||
NOR,
|
||||
STVEBX,
|
||||
SUBFE,
|
||||
SUBFEO,
|
||||
ADDE,
|
||||
ADDEO,
|
||||
MTOCRF,
|
||||
STDX,
|
||||
STWCX,
|
||||
@ -410,17 +429,24 @@ struct ppu_itype
|
||||
STWUX,
|
||||
STVEWX,
|
||||
SUBFZE,
|
||||
SUBFZEO,
|
||||
ADDZE,
|
||||
ADDZEO,
|
||||
STDCX,
|
||||
STBX,
|
||||
STVX,
|
||||
SUBFME,
|
||||
SUBFMEO,
|
||||
MULLD,
|
||||
MULLDO,
|
||||
ADDME,
|
||||
ADDMEO,
|
||||
MULLW,
|
||||
MULLWO,
|
||||
DCBTST,
|
||||
STBUX,
|
||||
ADD,
|
||||
ADDO,
|
||||
DCBT,
|
||||
LHZX,
|
||||
EQV,
|
||||
@ -442,13 +468,17 @@ struct ppu_itype
|
||||
STHUX,
|
||||
OR,
|
||||
DIVDU,
|
||||
DIVDUO,
|
||||
DIVWU,
|
||||
DIVWUO,
|
||||
MTSPR,
|
||||
DCBI,
|
||||
NAND,
|
||||
STVXL,
|
||||
DIVD,
|
||||
DIVDO,
|
||||
DIVW,
|
||||
DIVWO,
|
||||
LVLX,
|
||||
LDBRX,
|
||||
LSWX,
|
||||
@ -558,6 +588,112 @@ struct ppu_itype
|
||||
FCTID,
|
||||
FCTIDZ,
|
||||
FCFID,
|
||||
|
||||
SUBFCO_,
|
||||
ADDCO_,
|
||||
SUBFO_,
|
||||
NEGO_,
|
||||
SUBFEO_,
|
||||
ADDEO_,
|
||||
SUBFZEO_,
|
||||
ADDZEO_,
|
||||
SUBFMEO_,
|
||||
MULLDO_,
|
||||
ADDMEO_,
|
||||
MULLWO_,
|
||||
ADDO_,
|
||||
DIVDUO_,
|
||||
DIVWUO_,
|
||||
DIVDO_,
|
||||
DIVWO_,
|
||||
|
||||
RLWIMI_,
|
||||
RLWINM_,
|
||||
RLWNM_,
|
||||
RLDICL_,
|
||||
RLDICR_,
|
||||
RLDIC_,
|
||||
RLDIMI_,
|
||||
RLDCL_,
|
||||
RLDCR_,
|
||||
SUBFC_,
|
||||
MULHDU_,
|
||||
ADDC_,
|
||||
MULHWU_,
|
||||
SLW_,
|
||||
CNTLZW_,
|
||||
SLD_,
|
||||
AND_,
|
||||
SUBF_,
|
||||
CNTLZD_,
|
||||
ANDC_,
|
||||
MULHD_,
|
||||
MULHW_,
|
||||
NEG_,
|
||||
NOR_,
|
||||
SUBFE_,
|
||||
ADDE_,
|
||||
SUBFZE_,
|
||||
ADDZE_,
|
||||
MULLD_,
|
||||
SUBFME_,
|
||||
ADDME_,
|
||||
MULLW_,
|
||||
ADD_,
|
||||
EQV_,
|
||||
XOR_,
|
||||
ORC_,
|
||||
OR_,
|
||||
DIVDU_,
|
||||
DIVWU_,
|
||||
NAND_,
|
||||
DIVD_,
|
||||
DIVW_,
|
||||
SRW_,
|
||||
SRD_,
|
||||
SRAW_,
|
||||
SRAD_,
|
||||
SRAWI_,
|
||||
SRADI_,
|
||||
EXTSH_,
|
||||
EXTSB_,
|
||||
EXTSW_,
|
||||
FDIVS_,
|
||||
FSUBS_,
|
||||
FADDS_,
|
||||
FSQRTS_,
|
||||
FRES_,
|
||||
FMULS_,
|
||||
FMADDS_,
|
||||
FMSUBS_,
|
||||
FNMSUBS_,
|
||||
FNMADDS_,
|
||||
MTFSB1_,
|
||||
MTFSB0_,
|
||||
MTFSFI_,
|
||||
MFFS_,
|
||||
MTFSF_,
|
||||
FRSP_,
|
||||
FCTIW_,
|
||||
FCTIWZ_,
|
||||
FDIV_,
|
||||
FSUB_,
|
||||
FADD_,
|
||||
FSQRT_,
|
||||
FSEL_,
|
||||
FMUL_,
|
||||
FRSQRTE_,
|
||||
FMSUB_,
|
||||
FMADD_,
|
||||
FNMSUB_,
|
||||
FNMADD_,
|
||||
FNEG_,
|
||||
FMR_,
|
||||
FNABS_,
|
||||
FABS_,
|
||||
FCTID_,
|
||||
FCTIDZ_,
|
||||
FCFID_,
|
||||
};
|
||||
|
||||
// Enable address-of operator for ppu_decoder<>
|
||||
@ -570,6 +706,7 @@ struct ppu_itype
|
||||
struct ppu_iname
|
||||
{
|
||||
#define NAME(x) static constexpr const char& x = *#x;
|
||||
#define NAME_(x) static constexpr const char& x##_ = *#x ".";
|
||||
NAME(UNK)
|
||||
NAME(MFVSCR)
|
||||
NAME(MTVSCR)
|
||||
@ -595,18 +732,31 @@ struct ppu_iname
|
||||
NAME(VCFSX)
|
||||
NAME(VCFUX)
|
||||
NAME(VCMPBFP)
|
||||
NAME_(VCMPBFP)
|
||||
NAME(VCMPEQFP)
|
||||
NAME_(VCMPEQFP)
|
||||
NAME(VCMPEQUB)
|
||||
NAME_(VCMPEQUB)
|
||||
NAME(VCMPEQUH)
|
||||
NAME_(VCMPEQUH)
|
||||
NAME(VCMPEQUW)
|
||||
NAME_(VCMPEQUW)
|
||||
NAME(VCMPGEFP)
|
||||
NAME_(VCMPGEFP)
|
||||
NAME(VCMPGTFP)
|
||||
NAME_(VCMPGTFP)
|
||||
NAME(VCMPGTSB)
|
||||
NAME_(VCMPGTSB)
|
||||
NAME(VCMPGTSH)
|
||||
NAME_(VCMPGTSH)
|
||||
NAME(VCMPGTSW)
|
||||
NAME_(VCMPGTSW)
|
||||
NAME(VCMPGTUB)
|
||||
NAME_(VCMPGTUB)
|
||||
NAME(VCMPGTUH)
|
||||
NAME_(VCMPGTUH)
|
||||
NAME(VCMPGTUW)
|
||||
NAME_(VCMPGTUW)
|
||||
NAME(VCTSXS)
|
||||
NAME(VCTUXS)
|
||||
NAME(VEXPTEFP)
|
||||
@ -950,7 +1100,132 @@ struct ppu_iname
|
||||
NAME(FCTID)
|
||||
NAME(FCTIDZ)
|
||||
NAME(FCFID)
|
||||
|
||||
NAME(SUBFCO)
|
||||
NAME(ADDCO)
|
||||
NAME(SUBFO)
|
||||
NAME(NEGO)
|
||||
NAME(SUBFEO)
|
||||
NAME(ADDEO)
|
||||
NAME(SUBFZEO)
|
||||
NAME(ADDZEO)
|
||||
NAME(SUBFMEO)
|
||||
NAME(MULLDO)
|
||||
NAME(ADDMEO)
|
||||
NAME(MULLWO)
|
||||
NAME(ADDO)
|
||||
NAME(DIVDUO)
|
||||
NAME(DIVWUO)
|
||||
NAME(DIVDO)
|
||||
NAME(DIVWO)
|
||||
|
||||
NAME_(SUBFCO)
|
||||
NAME_(ADDCO)
|
||||
NAME_(SUBFO)
|
||||
NAME_(NEGO)
|
||||
NAME_(SUBFEO)
|
||||
NAME_(ADDEO)
|
||||
NAME_(SUBFZEO)
|
||||
NAME_(ADDZEO)
|
||||
NAME_(SUBFMEO)
|
||||
NAME_(MULLDO)
|
||||
NAME_(ADDMEO)
|
||||
NAME_(MULLWO)
|
||||
NAME_(ADDO)
|
||||
NAME_(DIVDUO)
|
||||
NAME_(DIVWUO)
|
||||
NAME_(DIVDO)
|
||||
NAME_(DIVWO)
|
||||
|
||||
NAME_(RLWIMI)
|
||||
NAME_(RLWINM)
|
||||
NAME_(RLWNM)
|
||||
NAME_(RLDICL)
|
||||
NAME_(RLDICR)
|
||||
NAME_(RLDIC)
|
||||
NAME_(RLDIMI)
|
||||
NAME_(RLDCL)
|
||||
NAME_(RLDCR)
|
||||
NAME_(SUBFC)
|
||||
NAME_(MULHDU)
|
||||
NAME_(ADDC)
|
||||
NAME_(MULHWU)
|
||||
NAME_(SLW)
|
||||
NAME_(CNTLZW)
|
||||
NAME_(SLD)
|
||||
NAME_(AND)
|
||||
NAME_(SUBF)
|
||||
NAME_(CNTLZD)
|
||||
NAME_(ANDC)
|
||||
NAME_(MULHD)
|
||||
NAME_(MULHW)
|
||||
NAME_(NEG)
|
||||
NAME_(NOR)
|
||||
NAME_(SUBFE)
|
||||
NAME_(ADDE)
|
||||
NAME_(SUBFZE)
|
||||
NAME_(ADDZE)
|
||||
NAME_(MULLD)
|
||||
NAME_(SUBFME)
|
||||
NAME_(ADDME)
|
||||
NAME_(MULLW)
|
||||
NAME_(ADD)
|
||||
NAME_(EQV)
|
||||
NAME_(XOR)
|
||||
NAME_(ORC)
|
||||
NAME_(OR)
|
||||
NAME_(DIVDU)
|
||||
NAME_(DIVWU)
|
||||
NAME_(NAND)
|
||||
NAME_(DIVD)
|
||||
NAME_(DIVW)
|
||||
NAME_(SRW)
|
||||
NAME_(SRD)
|
||||
NAME_(SRAW)
|
||||
NAME_(SRAD)
|
||||
NAME_(SRAWI)
|
||||
NAME_(SRADI)
|
||||
NAME_(EXTSH)
|
||||
NAME_(EXTSB)
|
||||
NAME_(EXTSW)
|
||||
NAME_(FDIVS)
|
||||
NAME_(FSUBS)
|
||||
NAME_(FADDS)
|
||||
NAME_(FSQRTS)
|
||||
NAME_(FRES)
|
||||
NAME_(FMULS)
|
||||
NAME_(FMADDS)
|
||||
NAME_(FMSUBS)
|
||||
NAME_(FNMSUBS)
|
||||
NAME_(FNMADDS)
|
||||
NAME_(MTFSB1)
|
||||
NAME_(MTFSB0)
|
||||
NAME_(MTFSFI)
|
||||
NAME_(MFFS)
|
||||
NAME_(MTFSF)
|
||||
NAME_(FRSP)
|
||||
NAME_(FCTIW)
|
||||
NAME_(FCTIWZ)
|
||||
NAME_(FDIV)
|
||||
NAME_(FSUB)
|
||||
NAME_(FADD)
|
||||
NAME_(FSQRT)
|
||||
NAME_(FSEL)
|
||||
NAME_(FMUL)
|
||||
NAME_(FRSQRTE)
|
||||
NAME_(FMSUB)
|
||||
NAME_(FMADD)
|
||||
NAME_(FNMSUB)
|
||||
NAME_(FNMADD)
|
||||
NAME_(FNEG)
|
||||
NAME_(FMR)
|
||||
NAME_(FNABS)
|
||||
NAME_(FABS)
|
||||
NAME_(FCTID)
|
||||
NAME_(FCTIDZ)
|
||||
NAME_(FCFID)
|
||||
#undef NAME
|
||||
#undef NAME_
|
||||
};
|
||||
|
||||
// PPU Analyser Context
|
||||
|
@ -351,18 +351,31 @@ public:
|
||||
void VCFSX(ppu_opcode_t op);
|
||||
void VCFUX(ppu_opcode_t op);
|
||||
void VCMPBFP(ppu_opcode_t op);
|
||||
void VCMPBFP_(ppu_opcode_t op) { return VCMPBFP(op); }
|
||||
void VCMPEQFP(ppu_opcode_t op);
|
||||
void VCMPEQFP_(ppu_opcode_t op) { return VCMPEQFP(op); }
|
||||
void VCMPEQUB(ppu_opcode_t op);
|
||||
void VCMPEQUB_(ppu_opcode_t op) { return VCMPEQUB(op); }
|
||||
void VCMPEQUH(ppu_opcode_t op);
|
||||
void VCMPEQUH_(ppu_opcode_t op) { return VCMPEQUH(op); }
|
||||
void VCMPEQUW(ppu_opcode_t op);
|
||||
void VCMPEQUW_(ppu_opcode_t op) { return VCMPEQUW(op); }
|
||||
void VCMPGEFP(ppu_opcode_t op);
|
||||
void VCMPGEFP_(ppu_opcode_t op) { return VCMPGEFP(op); }
|
||||
void VCMPGTFP(ppu_opcode_t op);
|
||||
void VCMPGTFP_(ppu_opcode_t op) { return VCMPGTFP(op); }
|
||||
void VCMPGTSB(ppu_opcode_t op);
|
||||
void VCMPGTSB_(ppu_opcode_t op) { return VCMPGTSB(op); }
|
||||
void VCMPGTSH(ppu_opcode_t op);
|
||||
void VCMPGTSH_(ppu_opcode_t op) { return VCMPGTSH(op); }
|
||||
void VCMPGTSW(ppu_opcode_t op);
|
||||
void VCMPGTSW_(ppu_opcode_t op) { return VCMPGTSW(op); }
|
||||
void VCMPGTUB(ppu_opcode_t op);
|
||||
void VCMPGTUB_(ppu_opcode_t op) { return VCMPGTUB(op); }
|
||||
void VCMPGTUH(ppu_opcode_t op);
|
||||
void VCMPGTUH_(ppu_opcode_t op) { return VCMPGTUH(op); }
|
||||
void VCMPGTUW(ppu_opcode_t op);
|
||||
void VCMPGTUW_(ppu_opcode_t op) { return VCMPGTUW(op); }
|
||||
void VCTSXS(ppu_opcode_t op);
|
||||
void VCTUXS(ppu_opcode_t op);
|
||||
void VEXPTEFP(ppu_opcode_t op);
|
||||
@ -708,4 +721,128 @@ public:
|
||||
void FCFID(ppu_opcode_t op);
|
||||
|
||||
void UNK(ppu_opcode_t op);
|
||||
|
||||
void SUBFCO(ppu_opcode_t op) { return SUBFC(op); }
|
||||
void ADDCO(ppu_opcode_t op) { return ADDC(op); }
|
||||
void SUBFO(ppu_opcode_t op) { return SUBF(op); }
|
||||
void NEGO(ppu_opcode_t op) { return NEG(op); }
|
||||
void SUBFEO(ppu_opcode_t op) { return SUBFE(op); }
|
||||
void ADDEO(ppu_opcode_t op) { return ADDE(op); }
|
||||
void SUBFZEO(ppu_opcode_t op) { return SUBFZE(op); }
|
||||
void ADDZEO(ppu_opcode_t op) { return ADDZE(op); }
|
||||
void SUBFMEO(ppu_opcode_t op) { return SUBFME(op); }
|
||||
void MULLDO(ppu_opcode_t op) { return MULLD(op); }
|
||||
void ADDMEO(ppu_opcode_t op) { return ADDME(op); }
|
||||
void MULLWO(ppu_opcode_t op) { return MULLW(op); }
|
||||
void ADDO(ppu_opcode_t op) { return ADD(op); }
|
||||
void DIVDUO(ppu_opcode_t op) { return DIVDU(op); }
|
||||
void DIVWUO(ppu_opcode_t op) { return DIVWU(op); }
|
||||
void DIVDO(ppu_opcode_t op) { return DIVD(op); }
|
||||
void DIVWO(ppu_opcode_t op) { return DIVW(op); }
|
||||
|
||||
void SUBFCO_(ppu_opcode_t op) { return SUBFC(op); }
|
||||
void ADDCO_(ppu_opcode_t op) { return ADDC(op); }
|
||||
void SUBFO_(ppu_opcode_t op) { return SUBF(op); }
|
||||
void NEGO_(ppu_opcode_t op) { return NEG(op); }
|
||||
void SUBFEO_(ppu_opcode_t op) { return SUBFE(op); }
|
||||
void ADDEO_(ppu_opcode_t op) { return ADDE(op); }
|
||||
void SUBFZEO_(ppu_opcode_t op) { return SUBFZE(op); }
|
||||
void ADDZEO_(ppu_opcode_t op) { return ADDZE(op); }
|
||||
void SUBFMEO_(ppu_opcode_t op) { return SUBFME(op); }
|
||||
void MULLDO_(ppu_opcode_t op) { return MULLD(op); }
|
||||
void ADDMEO_(ppu_opcode_t op) { return ADDME(op); }
|
||||
void MULLWO_(ppu_opcode_t op) { return MULLW(op); }
|
||||
void ADDO_(ppu_opcode_t op) { return ADD(op); }
|
||||
void DIVDUO_(ppu_opcode_t op) { return DIVDU(op); }
|
||||
void DIVWUO_(ppu_opcode_t op) { return DIVWU(op); }
|
||||
void DIVDO_(ppu_opcode_t op) { return DIVD(op); }
|
||||
void DIVWO_(ppu_opcode_t op) { return DIVW(op); }
|
||||
|
||||
void RLWIMI_(ppu_opcode_t op) { return RLWIMI(op); }
|
||||
void RLWINM_(ppu_opcode_t op) { return RLWINM(op); }
|
||||
void RLWNM_(ppu_opcode_t op) { return RLWNM(op); }
|
||||
void RLDICL_(ppu_opcode_t op) { return RLDICL(op); }
|
||||
void RLDICR_(ppu_opcode_t op) { return RLDICR(op); }
|
||||
void RLDIC_(ppu_opcode_t op) { return RLDIC(op); }
|
||||
void RLDIMI_(ppu_opcode_t op) { return RLDIMI(op); }
|
||||
void RLDCL_(ppu_opcode_t op) { return RLDCL(op); }
|
||||
void RLDCR_(ppu_opcode_t op) { return RLDCR(op); }
|
||||
void SUBFC_(ppu_opcode_t op) { return SUBFC(op); }
|
||||
void MULHDU_(ppu_opcode_t op) { return MULHDU(op); }
|
||||
void ADDC_(ppu_opcode_t op) { return ADDC(op); }
|
||||
void MULHWU_(ppu_opcode_t op) { return MULHWU(op); }
|
||||
void SLW_(ppu_opcode_t op) { return SLW(op); }
|
||||
void CNTLZW_(ppu_opcode_t op) { return CNTLZW(op); }
|
||||
void SLD_(ppu_opcode_t op) { return SLD(op); }
|
||||
void AND_(ppu_opcode_t op) { return AND(op); }
|
||||
void SUBF_(ppu_opcode_t op) { return SUBF(op); }
|
||||
void CNTLZD_(ppu_opcode_t op) { return CNTLZD(op); }
|
||||
void ANDC_(ppu_opcode_t op) { return ANDC(op); }
|
||||
void MULHD_(ppu_opcode_t op) { return MULHD(op); }
|
||||
void MULHW_(ppu_opcode_t op) { return MULHW(op); }
|
||||
void NEG_(ppu_opcode_t op) { return NEG(op); }
|
||||
void NOR_(ppu_opcode_t op) { return NOR(op); }
|
||||
void SUBFE_(ppu_opcode_t op) { return SUBFE(op); }
|
||||
void ADDE_(ppu_opcode_t op) { return ADDE(op); }
|
||||
void SUBFZE_(ppu_opcode_t op) { return SUBFZE(op); }
|
||||
void ADDZE_(ppu_opcode_t op) { return ADDZE(op); }
|
||||
void MULLD_(ppu_opcode_t op) { return MULLD(op); }
|
||||
void SUBFME_(ppu_opcode_t op) { return SUBFME(op); }
|
||||
void ADDME_(ppu_opcode_t op) { return ADDME(op); }
|
||||
void MULLW_(ppu_opcode_t op) { return MULLW(op); }
|
||||
void ADD_(ppu_opcode_t op) { return ADD(op); }
|
||||
void EQV_(ppu_opcode_t op) { return EQV(op); }
|
||||
void XOR_(ppu_opcode_t op) { return XOR(op); }
|
||||
void ORC_(ppu_opcode_t op) { return ORC(op); }
|
||||
void OR_(ppu_opcode_t op) { return OR(op); }
|
||||
void DIVDU_(ppu_opcode_t op) { return DIVDU(op); }
|
||||
void DIVWU_(ppu_opcode_t op) { return DIVWU(op); }
|
||||
void NAND_(ppu_opcode_t op) { return NAND(op); }
|
||||
void DIVD_(ppu_opcode_t op) { return DIVD(op); }
|
||||
void DIVW_(ppu_opcode_t op) { return DIVW(op); }
|
||||
void SRW_(ppu_opcode_t op) { return SRW(op); }
|
||||
void SRD_(ppu_opcode_t op) { return SRD(op); }
|
||||
void SRAW_(ppu_opcode_t op) { return SRAW(op); }
|
||||
void SRAD_(ppu_opcode_t op) { return SRAD(op); }
|
||||
void SRAWI_(ppu_opcode_t op) { return SRAWI(op); }
|
||||
void SRADI_(ppu_opcode_t op) { return SRADI(op); }
|
||||
void EXTSH_(ppu_opcode_t op) { return EXTSH(op); }
|
||||
void EXTSB_(ppu_opcode_t op) { return EXTSB(op); }
|
||||
void EXTSW_(ppu_opcode_t op) { return EXTSW(op); }
|
||||
void FDIVS_(ppu_opcode_t op) { return FDIVS(op); }
|
||||
void FSUBS_(ppu_opcode_t op) { return FSUBS(op); }
|
||||
void FADDS_(ppu_opcode_t op) { return FADDS(op); }
|
||||
void FSQRTS_(ppu_opcode_t op) { return FSQRTS(op); }
|
||||
void FRES_(ppu_opcode_t op) { return FRES(op); }
|
||||
void FMULS_(ppu_opcode_t op) { return FMULS(op); }
|
||||
void FMADDS_(ppu_opcode_t op) { return FMADDS(op); }
|
||||
void FMSUBS_(ppu_opcode_t op) { return FMSUBS(op); }
|
||||
void FNMSUBS_(ppu_opcode_t op) { return FNMSUBS(op); }
|
||||
void FNMADDS_(ppu_opcode_t op) { return FNMADDS(op); }
|
||||
void MTFSB1_(ppu_opcode_t op) { return MTFSB1(op); }
|
||||
void MTFSB0_(ppu_opcode_t op) { return MTFSB0(op); }
|
||||
void MTFSFI_(ppu_opcode_t op) { return MTFSFI(op); }
|
||||
void MFFS_(ppu_opcode_t op) { return MFFS(op); }
|
||||
void MTFSF_(ppu_opcode_t op) { return MTFSF(op); }
|
||||
void FRSP_(ppu_opcode_t op) { return FRSP(op); }
|
||||
void FCTIW_(ppu_opcode_t op) { return FCTIW(op); }
|
||||
void FCTIWZ_(ppu_opcode_t op) { return FCTIWZ(op); }
|
||||
void FDIV_(ppu_opcode_t op) { return FDIV(op); }
|
||||
void FSUB_(ppu_opcode_t op) { return FSUB(op); }
|
||||
void FADD_(ppu_opcode_t op) { return FADD(op); }
|
||||
void FSQRT_(ppu_opcode_t op) { return FSQRT(op); }
|
||||
void FSEL_(ppu_opcode_t op) { return FSEL(op); }
|
||||
void FMUL_(ppu_opcode_t op) { return FMUL(op); }
|
||||
void FRSQRTE_(ppu_opcode_t op) { return FRSQRTE(op); }
|
||||
void FMSUB_(ppu_opcode_t op) { return FMSUB(op); }
|
||||
void FMADD_(ppu_opcode_t op) { return FMADD(op); }
|
||||
void FNMSUB_(ppu_opcode_t op) { return FNMSUB(op); }
|
||||
void FNMADD_(ppu_opcode_t op) { return FNMADD(op); }
|
||||
void FNEG_(ppu_opcode_t op) { return FNEG(op); }
|
||||
void FMR_(ppu_opcode_t op) { return FMR(op); }
|
||||
void FNABS_(ppu_opcode_t op) { return FNABS(op); }
|
||||
void FABS_(ppu_opcode_t op) { return FABS(op); }
|
||||
void FCTID_(ppu_opcode_t op) { return FCTID(op); }
|
||||
void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); }
|
||||
void FCFID_(ppu_opcode_t op) { return FCFID(op); }
|
||||
};
|
||||
|
@ -1889,47 +1889,56 @@ extern std::string ppu_get_variable_name(const std::string& _module, u32 vnid)
|
||||
return fmt::format("0x%08X", vnid);
|
||||
}
|
||||
|
||||
std::vector<ppu_function_t>& ppu_function_manager::access(bool ghc)
|
||||
std::vector<ppu_intrp_func_t>& ppu_function_manager::access(bool ghc)
|
||||
{
|
||||
static std::vector<ppu_function_t> list
|
||||
static std::vector<ppu_intrp_func_t> list
|
||||
{
|
||||
[](ppu_thread& ppu) -> bool
|
||||
[](ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*)
|
||||
{
|
||||
ppu.cia = vm::get_addr(this_op);
|
||||
ppu_log.error("Unregistered function called (LR=0x%x)", ppu.lr);
|
||||
ppu.gpr[3] = 0;
|
||||
ppu.cia = static_cast<u32>(ppu.lr) & ~3;
|
||||
return false;
|
||||
},
|
||||
[](ppu_thread& ppu) -> bool
|
||||
[](ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*)
|
||||
{
|
||||
ppu.state += cpu_flag::ret;
|
||||
ppu.cia += 4;
|
||||
return false;
|
||||
ppu.cia = vm::get_addr(this_op) + 4;
|
||||
},
|
||||
};
|
||||
|
||||
static std::vector<ppu_function_t> list_ghc
|
||||
#if defined(ARCH_X64)
|
||||
static std::vector<ppu_intrp_func_t> list_ghc
|
||||
{
|
||||
build_function_asm<ppu_function_t>("ppu_unregistered", [](asmjit::x86::Assembler& c, auto& args)
|
||||
build_function_asm<ppu_intrp_func_t>("ppu_unregistered", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
// Take second ghc arg
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.mov(args[2].r32(), x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)));
|
||||
c.add(args[2], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.jmp(imm_ptr(list[0]));
|
||||
}),
|
||||
build_function_asm<ppu_function_t>("ppu_return", [](asmjit::x86::Assembler& c, auto& args)
|
||||
build_function_asm<ppu_intrp_func_t>("ppu_return", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
// Take second ghc arg
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.mov(args[2].r32(), x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)));
|
||||
c.add(args[2], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.jmp(imm_ptr(list[1]));
|
||||
}),
|
||||
};
|
||||
#elif defined(ARCH_ARM64)
|
||||
static std::vector<ppu_intrp_func_t> list_ghc(list);
|
||||
#endif
|
||||
|
||||
return ghc ? list_ghc : list;
|
||||
}
|
||||
|
||||
u32 ppu_function_manager::add_function(ppu_function_t function)
|
||||
u32 ppu_function_manager::add_function(ppu_intrp_func_t function)
|
||||
{
|
||||
auto& list = access();
|
||||
auto& list2 = access(true);
|
||||
@ -1937,13 +1946,22 @@ u32 ppu_function_manager::add_function(ppu_function_t function)
|
||||
list.push_back(function);
|
||||
|
||||
// Generate trampoline
|
||||
list2.push_back(build_function_asm<ppu_function_t>("ppu_trampolinea", [&](asmjit::x86::Assembler& c, auto& args)
|
||||
#if defined(ARCH_X64)
|
||||
list2.push_back(build_function_asm<ppu_intrp_func_t>("ppu_trampolinea", [&](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
// Take second ghc arg
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.mov(args[2].r32(), x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia)));
|
||||
c.add(args[2], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.jmp(imm_ptr(function));
|
||||
}));
|
||||
#elif defined(ARCH_ARM64)
|
||||
list2.push_back(function);
|
||||
#else
|
||||
#error "Not implemented"
|
||||
#endif
|
||||
|
||||
return ::size32(list) - 1;
|
||||
}
|
||||
|
@ -1,23 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include "PPUThread.h"
|
||||
#include "PPUInterpreter.h"
|
||||
|
||||
#include "util/v128.hpp"
|
||||
|
||||
using ppu_function_t = bool(*)(ppu_thread&);
|
||||
|
||||
// BIND_FUNC macro "converts" any appropriate HLE function to ppu_function_t, binding it to PPU thread context.
|
||||
#define BIND_FUNC(func, ...) (static_cast<ppu_function_t>([](ppu_thread& ppu) -> bool {\
|
||||
// BIND_FUNC macro "converts" any appropriate HLE function to ppu_intrp_func_t, binding it to PPU thread context.
|
||||
#define BIND_FUNC(func, ...) (static_cast<ppu_intrp_func_t>([](ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*) {\
|
||||
const auto old_f = ppu.current_function;\
|
||||
if (!old_f) ppu.last_function = #func;\
|
||||
ppu.current_function = #func;\
|
||||
ppu.cia = vm::get_addr(this_op); \
|
||||
std::memcpy(ppu.syscall_args, ppu.gpr + 3, sizeof(ppu.syscall_args)); \
|
||||
ppu_func_detail::do_call(ppu, func);\
|
||||
static_cast<void>(ppu.test_stopped());\
|
||||
ppu.current_function = old_f;\
|
||||
ppu.cia += 4;\
|
||||
__VA_ARGS__;\
|
||||
return false;\
|
||||
}))
|
||||
|
||||
struct ppu_va_args_t
|
||||
@ -257,9 +256,9 @@ class ppu_function_manager
|
||||
};
|
||||
|
||||
// Access global function list
|
||||
static std::vector<ppu_function_t>& access(bool ghc = false);
|
||||
static std::vector<ppu_intrp_func_t>& access(bool ghc = false);
|
||||
|
||||
static u32 add_function(ppu_function_t function);
|
||||
static u32 add_function(ppu_intrp_func_t function);
|
||||
|
||||
public:
|
||||
ppu_function_manager() = default;
|
||||
@ -270,7 +269,7 @@ public:
|
||||
|
||||
// Register function (shall only be called during global initialization)
|
||||
template<typename T, T Func>
|
||||
static inline u32 register_function(ppu_function_t func)
|
||||
static inline u32 register_function(ppu_intrp_func_t func)
|
||||
{
|
||||
return registered<T, Func>::index = add_function(func);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,454 +4,41 @@
|
||||
|
||||
class ppu_thread;
|
||||
|
||||
using ppu_inter_func_t = bool(*)(ppu_thread& ppu, ppu_opcode_t op);
|
||||
using ppu_intrp_func_t = void(*)(ppu_thread& ppu_, ppu_opcode_t op, be_t<u32>* this_op, struct ppu_intrp_func* next_fn);
|
||||
|
||||
struct ppu_interpreter
|
||||
struct ppu_intrp_func
|
||||
{
|
||||
static bool MFVSCR(ppu_thread&, ppu_opcode_t);
|
||||
static bool MTVSCR(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDCUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUBM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUHM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUWM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VAND(ppu_thread&, ppu_opcode_t);
|
||||
static bool VANDC(ppu_thread&, ppu_opcode_t);
|
||||
static bool VAVGSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VAVGSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VAVGSW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VAVGUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VAVGUH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VAVGUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCFSX(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCFUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPBFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPEQFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPEQUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPEQUH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPEQUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGEFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGTFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGTSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGTSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGTSW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGTUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGTUH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCMPGTUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VEXPTEFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VLOGEFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXSW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXUH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMAXUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMINFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMINSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMINSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMINSW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMINUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMINUH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMINUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMLADDUHM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMRGHB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMRGHH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMRGHW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMRGLB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMRGLH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMRGLW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMMBM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMSHM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMUBM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMUHM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULESB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULESH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULEUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULEUH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULOSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULOSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULOUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMULOUH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VNOR(ppu_thread&, ppu_opcode_t);
|
||||
static bool VOR(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPERM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKPX(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKUHUM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKUWUM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VREFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRFIM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRFIN(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRFIP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRFIZ(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRLB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRLH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRLW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VRSQRTEFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSEL(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSL(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSLB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSLDOI(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSLH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSLO(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSLW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSPLTB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSPLTH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSPLTISB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSPLTISH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSPLTISW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSPLTW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSR(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSRAB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSRAH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSRAW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSRB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSRH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSRO(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSRW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBCUW(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUBM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUHM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUWM(ppu_thread&, ppu_opcode_t);
|
||||
static bool VUPKHPX(ppu_thread&, ppu_opcode_t);
|
||||
static bool VUPKHSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VUPKHSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VUPKLPX(ppu_thread&, ppu_opcode_t);
|
||||
static bool VUPKLSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool VUPKLSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool VXOR(ppu_thread&, ppu_opcode_t);
|
||||
static bool TDI(ppu_thread&, ppu_opcode_t);
|
||||
static bool TWI(ppu_thread&, ppu_opcode_t);
|
||||
static bool MULLI(ppu_thread&, ppu_opcode_t);
|
||||
static bool SUBFIC(ppu_thread&, ppu_opcode_t);
|
||||
static bool CMPLI(ppu_thread&, ppu_opcode_t);
|
||||
static bool CMPI(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADDIC(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADDI(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADDIS(ppu_thread&, ppu_opcode_t);
|
||||
static bool BC(ppu_thread&, ppu_opcode_t);
|
||||
static bool SC(ppu_thread&, ppu_opcode_t);
|
||||
static bool B(ppu_thread&, ppu_opcode_t);
|
||||
static bool MCRF(ppu_thread&, ppu_opcode_t);
|
||||
static bool BCLR(ppu_thread&, ppu_opcode_t);
|
||||
static bool CRNOR(ppu_thread&, ppu_opcode_t);
|
||||
static bool CRANDC(ppu_thread&, ppu_opcode_t);
|
||||
static bool ISYNC(ppu_thread&, ppu_opcode_t);
|
||||
static bool CRXOR(ppu_thread&, ppu_opcode_t);
|
||||
static bool CRNAND(ppu_thread&, ppu_opcode_t);
|
||||
static bool CRAND(ppu_thread&, ppu_opcode_t);
|
||||
static bool CREQV(ppu_thread&, ppu_opcode_t);
|
||||
static bool CRORC(ppu_thread&, ppu_opcode_t);
|
||||
static bool CROR(ppu_thread&, ppu_opcode_t);
|
||||
static bool BCCTR(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLWIMI(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLWINM(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLWNM(ppu_thread&, ppu_opcode_t);
|
||||
static bool ORI(ppu_thread&, ppu_opcode_t);
|
||||
static bool ORIS(ppu_thread&, ppu_opcode_t);
|
||||
static bool XORI(ppu_thread&, ppu_opcode_t);
|
||||
static bool XORIS(ppu_thread&, ppu_opcode_t);
|
||||
static bool ANDI(ppu_thread&, ppu_opcode_t);
|
||||
static bool ANDIS(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLDICL(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLDICR(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLDIC(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLDIMI(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLDCL(ppu_thread&, ppu_opcode_t);
|
||||
static bool RLDCR(ppu_thread&, ppu_opcode_t);
|
||||
static bool CMP(ppu_thread&, ppu_opcode_t);
|
||||
static bool TW(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVSL(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVEBX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SUBFC(ppu_thread&, ppu_opcode_t);
|
||||
static bool MULHDU(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADDC(ppu_thread&, ppu_opcode_t);
|
||||
static bool MULHWU(ppu_thread&, ppu_opcode_t);
|
||||
static bool MFOCRF(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWARX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LDX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWZX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SLW(ppu_thread&, ppu_opcode_t);
|
||||
static bool CNTLZW(ppu_thread&, ppu_opcode_t);
|
||||
static bool SLD(ppu_thread&, ppu_opcode_t);
|
||||
static bool AND(ppu_thread&, ppu_opcode_t);
|
||||
static bool CMPL(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVSR(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVEHX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SUBF(ppu_thread&, ppu_opcode_t);
|
||||
static bool LDUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool DCBST(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWZUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool CNTLZD(ppu_thread&, ppu_opcode_t);
|
||||
static bool ANDC(ppu_thread&, ppu_opcode_t);
|
||||
static bool TD(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVEWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool MULHD(ppu_thread&, ppu_opcode_t);
|
||||
static bool MULHW(ppu_thread&, ppu_opcode_t);
|
||||
static bool LDARX(ppu_thread&, ppu_opcode_t);
|
||||
static bool DCBF(ppu_thread&, ppu_opcode_t);
|
||||
static bool LBZX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVX(ppu_thread&, ppu_opcode_t);
|
||||
static bool NEG(ppu_thread&, ppu_opcode_t);
|
||||
static bool LBZUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool NOR(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVEBX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SUBFE(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADDE(ppu_thread&, ppu_opcode_t);
|
||||
static bool MTOCRF(ppu_thread&, ppu_opcode_t);
|
||||
static bool STDX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STWCX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVEHX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STDUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STWUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVEWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SUBFZE(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADDZE(ppu_thread&, ppu_opcode_t);
|
||||
static bool STDCX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STBX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVX(ppu_thread&, ppu_opcode_t);
|
||||
static bool MULLD(ppu_thread&, ppu_opcode_t);
|
||||
static bool SUBFME(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADDME(ppu_thread&, ppu_opcode_t);
|
||||
static bool MULLW(ppu_thread&, ppu_opcode_t);
|
||||
static bool DCBTST(ppu_thread&, ppu_opcode_t);
|
||||
static bool STBUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool ADD(ppu_thread&, ppu_opcode_t);
|
||||
static bool DCBT(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHZX(ppu_thread&, ppu_opcode_t);
|
||||
static bool EQV(ppu_thread&, ppu_opcode_t);
|
||||
static bool ECIWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHZUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool XOR(ppu_thread&, ppu_opcode_t);
|
||||
static bool MFSPR(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWAX(ppu_thread&, ppu_opcode_t);
|
||||
static bool DST(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHAX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVXL(ppu_thread&, ppu_opcode_t);
|
||||
static bool MFTB(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWAUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool DSTST(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHAUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STHX(ppu_thread&, ppu_opcode_t);
|
||||
static bool ORC(ppu_thread&, ppu_opcode_t);
|
||||
static bool ECOWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STHUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool OR(ppu_thread&, ppu_opcode_t);
|
||||
static bool DIVDU(ppu_thread&, ppu_opcode_t);
|
||||
static bool DIVWU(ppu_thread&, ppu_opcode_t);
|
||||
static bool MTSPR(ppu_thread&, ppu_opcode_t);
|
||||
static bool DCBI(ppu_thread&, ppu_opcode_t);
|
||||
static bool NAND(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVXL(ppu_thread&, ppu_opcode_t);
|
||||
static bool DIVD(ppu_thread&, ppu_opcode_t);
|
||||
static bool DIVW(ppu_thread&, ppu_opcode_t);
|
||||
static bool LDBRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LSWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWBRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFSX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SRW(ppu_thread&, ppu_opcode_t);
|
||||
static bool SRD(ppu_thread&, ppu_opcode_t);
|
||||
static bool LSWI(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFSUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SYNC(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFDX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFDUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STDBRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STSWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STWBRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFSX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFSUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STSWI(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFDX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFDUX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHBRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool SRAW(ppu_thread&, ppu_opcode_t);
|
||||
static bool SRAD(ppu_thread&, ppu_opcode_t);
|
||||
static bool DSS(ppu_thread&, ppu_opcode_t);
|
||||
static bool SRAWI(ppu_thread&, ppu_opcode_t);
|
||||
static bool SRADI(ppu_thread&, ppu_opcode_t);
|
||||
static bool EIEIO(ppu_thread&, ppu_opcode_t);
|
||||
static bool STHBRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool EXTSH(ppu_thread&, ppu_opcode_t);
|
||||
static bool EXTSB(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFIWX(ppu_thread&, ppu_opcode_t);
|
||||
static bool EXTSW(ppu_thread&, ppu_opcode_t);
|
||||
static bool ICBI(ppu_thread&, ppu_opcode_t);
|
||||
static bool DCBZ(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWZ(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWZU(ppu_thread&, ppu_opcode_t);
|
||||
static bool LBZ(ppu_thread&, ppu_opcode_t);
|
||||
static bool LBZU(ppu_thread&, ppu_opcode_t);
|
||||
static bool STW(ppu_thread&, ppu_opcode_t);
|
||||
static bool STWU(ppu_thread&, ppu_opcode_t);
|
||||
static bool STB(ppu_thread&, ppu_opcode_t);
|
||||
static bool STBU(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHZ(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHZU(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHA(ppu_thread&, ppu_opcode_t);
|
||||
static bool LHAU(ppu_thread&, ppu_opcode_t);
|
||||
static bool STH(ppu_thread&, ppu_opcode_t);
|
||||
static bool STHU(ppu_thread&, ppu_opcode_t);
|
||||
static bool LMW(ppu_thread&, ppu_opcode_t);
|
||||
static bool STMW(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFS(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFSU(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFD(ppu_thread&, ppu_opcode_t);
|
||||
static bool LFDU(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFS(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFSU(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFD(ppu_thread&, ppu_opcode_t);
|
||||
static bool STFDU(ppu_thread&, ppu_opcode_t);
|
||||
static bool LD(ppu_thread&, ppu_opcode_t);
|
||||
static bool LDU(ppu_thread&, ppu_opcode_t);
|
||||
static bool LWA(ppu_thread&, ppu_opcode_t);
|
||||
static bool STD(ppu_thread&, ppu_opcode_t);
|
||||
static bool STDU(ppu_thread&, ppu_opcode_t);
|
||||
static bool MTFSB1(ppu_thread&, ppu_opcode_t);
|
||||
static bool MCRFS(ppu_thread&, ppu_opcode_t);
|
||||
static bool MTFSB0(ppu_thread&, ppu_opcode_t);
|
||||
static bool MTFSFI(ppu_thread&, ppu_opcode_t);
|
||||
static bool MFFS(ppu_thread&, ppu_opcode_t);
|
||||
static bool MTFSF(ppu_thread&, ppu_opcode_t);
|
||||
static bool FCMPU(ppu_thread&, ppu_opcode_t);
|
||||
static bool FCTIW(ppu_thread&, ppu_opcode_t);
|
||||
static bool FCTIWZ(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSEL(ppu_thread&, ppu_opcode_t);
|
||||
static bool FCMPO(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNEG(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMR(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNABS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FABS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FCTID(ppu_thread&, ppu_opcode_t);
|
||||
static bool FCTIDZ(ppu_thread&, ppu_opcode_t);
|
||||
static bool FCFID(ppu_thread&, ppu_opcode_t);
|
||||
|
||||
static bool LVLX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVLXL(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool LVRXL(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVLX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVLXL(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVRX(ppu_thread&, ppu_opcode_t);
|
||||
static bool STVRXL(ppu_thread&, ppu_opcode_t);
|
||||
|
||||
static bool UNK(ppu_thread&, ppu_opcode_t);
|
||||
ppu_intrp_func_t fn;
|
||||
};
|
||||
|
||||
struct ppu_interpreter_precise final : ppu_interpreter
|
||||
template <typename IT>
|
||||
struct ppu_interpreter_t;
|
||||
|
||||
namespace asmjit
|
||||
{
|
||||
static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKSWSS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKSWUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKUHUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKUWUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDSBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDSWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBSBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBSWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMHADDSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMHRADDSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMUHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUMSWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM2SWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM4SBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM4SHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM4UBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTSXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTUXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMADDFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VNMSUBFP(ppu_thread&, ppu_opcode_t);
|
||||
struct ppu_builder;
|
||||
}
|
||||
|
||||
static bool FDIVS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FADDS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSQRTS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FRES(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMULS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMADDS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMSUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMSUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMADDS(ppu_thread&, ppu_opcode_t);
|
||||
struct ppu_interpreter_rt_base
|
||||
{
|
||||
protected:
|
||||
std::unique_ptr<ppu_interpreter_t<ppu_intrp_func_t>> ptrs;
|
||||
|
||||
static bool FRSP(ppu_thread&, ppu_opcode_t);
|
||||
static bool FDIV(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool FADD(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSQRT(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMUL(ppu_thread&, ppu_opcode_t);
|
||||
static bool FRSQRTE(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMSUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMADD(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMSUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMADD(ppu_thread&, ppu_opcode_t);
|
||||
ppu_interpreter_rt_base() noexcept;
|
||||
|
||||
ppu_interpreter_rt_base(const ppu_interpreter_rt_base&) = delete;
|
||||
|
||||
ppu_interpreter_rt_base& operator=(const ppu_interpreter_rt_base&) = delete;
|
||||
|
||||
virtual ~ppu_interpreter_rt_base();
|
||||
};
|
||||
|
||||
struct ppu_interpreter_fast final : ppu_interpreter
|
||||
struct ppu_interpreter_rt : ppu_interpreter_rt_base
|
||||
{
|
||||
static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKSWSS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKSWUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKUHUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VPKUWUS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDSBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDSWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VADDUWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBSBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBSWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUBUWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMHADDSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMHRADDSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMSHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMSUMUHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUMSWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM2SWS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM4SBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM4SHS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VSUM4UBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTSXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VCTUXS(ppu_thread&, ppu_opcode_t);
|
||||
static bool VMADDFP(ppu_thread&, ppu_opcode_t);
|
||||
static bool VNMSUBFP(ppu_thread&, ppu_opcode_t);
|
||||
ppu_interpreter_rt() noexcept;
|
||||
|
||||
static bool FDIVS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FADDS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSQRTS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FRES(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMULS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMADDS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMSUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMSUBS(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMADDS(ppu_thread&, ppu_opcode_t);
|
||||
ppu_intrp_func_t decode(u32 op) const noexcept;
|
||||
|
||||
static bool FRSP(ppu_thread&, ppu_opcode_t);
|
||||
static bool FDIV(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool FADD(ppu_thread&, ppu_opcode_t);
|
||||
static bool FSQRT(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMUL(ppu_thread&, ppu_opcode_t);
|
||||
static bool FRSQRTE(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMSUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool FMADD(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMSUB(ppu_thread&, ppu_opcode_t);
|
||||
static bool FNMADD(ppu_thread&, ppu_opcode_t);
|
||||
private:
|
||||
ppu_decoder<ppu_interpreter_t<ppu_intrp_func_t>, ppu_intrp_func_t> table;
|
||||
};
|
||||
|
@ -30,7 +30,7 @@ LOG_CHANNEL(ppu_loader);
|
||||
extern std::string ppu_get_function_name(const std::string& _module, u32 fnid);
|
||||
extern std::string ppu_get_variable_name(const std::string& _module, u32 vnid);
|
||||
extern void ppu_register_range(u32 addr, u32 size);
|
||||
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr);
|
||||
extern void ppu_register_function_at(u32 addr, u32 size, ppu_intrp_func_t ptr);
|
||||
|
||||
extern void sys_initialize_tls(ppu_thread&, u64, u32, u32, u32);
|
||||
|
||||
@ -275,7 +275,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link)
|
||||
};
|
||||
|
||||
// Initialize double-purpose fake OPD array for HLE functions
|
||||
const auto& hle_funcs = ppu_function_manager::get(g_cfg.core.ppu_decoder == ppu_decoder_type::llvm);
|
||||
const auto& hle_funcs = ppu_function_manager::get(g_cfg.core.ppu_decoder != ppu_decoder_type::_static);
|
||||
|
||||
u32& hle_funcs_addr = g_fxo->get<ppu_function_manager>().addr;
|
||||
|
||||
|
@ -123,7 +123,7 @@ public:
|
||||
static void initialize_modules();
|
||||
|
||||
template <auto* Func>
|
||||
static auto& register_static_function(const char* _module, const char* name, ppu_function_t func, u32 fnid)
|
||||
static auto& register_static_function(const char* _module, const char* name, ppu_intrp_func_t func, u32 fnid)
|
||||
{
|
||||
auto& info = access_static_function(_module, fnid);
|
||||
|
||||
|
@ -84,19 +84,22 @@ class ppu_decoder
|
||||
struct instruction_info
|
||||
{
|
||||
u32 value;
|
||||
T pointer;
|
||||
T ptr0;
|
||||
T ptr_rc;
|
||||
u32 magn; // Non-zero for "columns" (effectively, number of most significant bits "eaten")
|
||||
|
||||
constexpr instruction_info(u32 v, T p, u32 m = 0)
|
||||
constexpr instruction_info(u32 v, T p, T p_rc, u32 m = 0)
|
||||
: value(v)
|
||||
, pointer(p)
|
||||
, ptr0(p)
|
||||
, ptr_rc(p_rc)
|
||||
, magn(m)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr instruction_info(u32 v, const T* p, u32 m = 0)
|
||||
constexpr instruction_info(u32 v, const T* p, const T* p_rc, u32 m = 0)
|
||||
: value(v)
|
||||
, pointer(*p)
|
||||
, ptr0(*p)
|
||||
, ptr_rc(*p_rc)
|
||||
, magn(m)
|
||||
{
|
||||
}
|
||||
@ -113,7 +116,8 @@ class ppu_decoder
|
||||
{
|
||||
for (u32 j = 0; j < 1u << sh; j++)
|
||||
{
|
||||
m_table.at((((((i << (count - v.magn)) | v.value) << sh) | j) << 6) | main_op) = v.pointer;
|
||||
const u32 k = (((i << (count - v.magn)) | v.value) << sh) | j;
|
||||
m_table.at((k << 6) | main_op) = k & 1 ? v.ptr_rc : v.ptr0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -125,454 +129,498 @@ class ppu_decoder
|
||||
{
|
||||
for (u32 i = 0; i < 1u << 11; i++)
|
||||
{
|
||||
m_table.at(i << 6 | v.value) = v.pointer;
|
||||
m_table.at(i << 6 | v.value) = i & 1 ? v.ptr_rc : v.ptr0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
ppu_decoder() noexcept
|
||||
// Helper
|
||||
static const D& _first(const D& arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
|
||||
public:
|
||||
template <typename... Args>
|
||||
ppu_decoder(const Args&... args) noexcept
|
||||
{
|
||||
// If an object is passed to the constructor, assign values from that object
|
||||
#define GET_(name) [&]{ if constexpr (sizeof...(Args) > 0) return _first(args...).name; else return &D::name; }()
|
||||
#define GET(name) GET_(name), GET_(name)
|
||||
#define GETRC(name) GET_(name), GET_(name##_)
|
||||
|
||||
static_assert(sizeof...(Args) <= 1);
|
||||
|
||||
for (auto& x : m_table)
|
||||
{
|
||||
x = &D::UNK;
|
||||
x = GET(UNK);
|
||||
}
|
||||
|
||||
// Main opcodes (field 0..5)
|
||||
fill_table(0x00, 6, -1,
|
||||
{
|
||||
{ 0x02, &D::TDI },
|
||||
{ 0x03, &D::TWI },
|
||||
{ 0x07, &D::MULLI },
|
||||
{ 0x08, &D::SUBFIC },
|
||||
{ 0x0a, &D::CMPLI },
|
||||
{ 0x0b, &D::CMPI },
|
||||
{ 0x0c, &D::ADDIC },
|
||||
{ 0x0d, &D::ADDIC },
|
||||
{ 0x0e, &D::ADDI },
|
||||
{ 0x0f, &D::ADDIS },
|
||||
{ 0x10, &D::BC },
|
||||
{ 0x11, &D::SC },
|
||||
{ 0x12, &D::B },
|
||||
{ 0x14, &D::RLWIMI },
|
||||
{ 0x15, &D::RLWINM },
|
||||
{ 0x17, &D::RLWNM },
|
||||
{ 0x18, &D::ORI },
|
||||
{ 0x19, &D::ORIS },
|
||||
{ 0x1a, &D::XORI },
|
||||
{ 0x1b, &D::XORIS },
|
||||
{ 0x1c, &D::ANDI },
|
||||
{ 0x1d, &D::ANDIS },
|
||||
{ 0x20, &D::LWZ },
|
||||
{ 0x21, &D::LWZU },
|
||||
{ 0x22, &D::LBZ },
|
||||
{ 0x23, &D::LBZU },
|
||||
{ 0x24, &D::STW },
|
||||
{ 0x25, &D::STWU },
|
||||
{ 0x26, &D::STB },
|
||||
{ 0x27, &D::STBU },
|
||||
{ 0x28, &D::LHZ },
|
||||
{ 0x29, &D::LHZU },
|
||||
{ 0x2a, &D::LHA },
|
||||
{ 0x2b, &D::LHAU },
|
||||
{ 0x2c, &D::STH },
|
||||
{ 0x2d, &D::STHU },
|
||||
{ 0x2e, &D::LMW },
|
||||
{ 0x2f, &D::STMW },
|
||||
{ 0x30, &D::LFS },
|
||||
{ 0x31, &D::LFSU },
|
||||
{ 0x32, &D::LFD },
|
||||
{ 0x33, &D::LFDU },
|
||||
{ 0x34, &D::STFS },
|
||||
{ 0x35, &D::STFSU },
|
||||
{ 0x36, &D::STFD },
|
||||
{ 0x37, &D::STFDU },
|
||||
{ 0x02, GET(TDI) },
|
||||
{ 0x03, GET(TWI) },
|
||||
{ 0x07, GET(MULLI) },
|
||||
{ 0x08, GET(SUBFIC) },
|
||||
{ 0x0a, GET(CMPLI) },
|
||||
{ 0x0b, GET(CMPI) },
|
||||
{ 0x0c, GET(ADDIC) },
|
||||
{ 0x0d, GET(ADDIC) },
|
||||
{ 0x0e, GET(ADDI) },
|
||||
{ 0x0f, GET(ADDIS) },
|
||||
{ 0x10, GET(BC) },
|
||||
{ 0x11, GET(SC) },
|
||||
{ 0x12, GET(B) },
|
||||
{ 0x14, GETRC(RLWIMI) },
|
||||
{ 0x15, GETRC(RLWINM) },
|
||||
{ 0x17, GETRC(RLWNM) },
|
||||
{ 0x18, GET(ORI) },
|
||||
{ 0x19, GET(ORIS) },
|
||||
{ 0x1a, GET(XORI) },
|
||||
{ 0x1b, GET(XORIS) },
|
||||
{ 0x1c, GET(ANDI) },
|
||||
{ 0x1d, GET(ANDIS) },
|
||||
{ 0x20, GET(LWZ) },
|
||||
{ 0x21, GET(LWZU) },
|
||||
{ 0x22, GET(LBZ) },
|
||||
{ 0x23, GET(LBZU) },
|
||||
{ 0x24, GET(STW) },
|
||||
{ 0x25, GET(STWU) },
|
||||
{ 0x26, GET(STB) },
|
||||
{ 0x27, GET(STBU) },
|
||||
{ 0x28, GET(LHZ) },
|
||||
{ 0x29, GET(LHZU) },
|
||||
{ 0x2a, GET(LHA) },
|
||||
{ 0x2b, GET(LHAU) },
|
||||
{ 0x2c, GET(STH) },
|
||||
{ 0x2d, GET(STHU) },
|
||||
{ 0x2e, GET(LMW) },
|
||||
{ 0x2f, GET(STMW) },
|
||||
{ 0x30, GET(LFS) },
|
||||
{ 0x31, GET(LFSU) },
|
||||
{ 0x32, GET(LFD) },
|
||||
{ 0x33, GET(LFDU) },
|
||||
{ 0x34, GET(STFS) },
|
||||
{ 0x35, GET(STFSU) },
|
||||
{ 0x36, GET(STFD) },
|
||||
{ 0x37, GET(STFDU) },
|
||||
});
|
||||
|
||||
// Group 0x04 opcodes (field 21..31)
|
||||
fill_table(0x04, 11, 0,
|
||||
{
|
||||
{ 0x0, &D::VADDUBM },
|
||||
{ 0x2, &D::VMAXUB },
|
||||
{ 0x4, &D::VRLB },
|
||||
{ 0x6, &D::VCMPEQUB, 1 },
|
||||
{ 0x8, &D::VMULOUB },
|
||||
{ 0xa, &D::VADDFP },
|
||||
{ 0xc, &D::VMRGHB },
|
||||
{ 0xe, &D::VPKUHUM },
|
||||
{ 0x0, GET(VADDUBM) },
|
||||
{ 0x2, GET(VMAXUB) },
|
||||
{ 0x4, GET(VRLB) },
|
||||
{ 0x006, GET(VCMPEQUB) },
|
||||
{ 0x406, GET(VCMPEQUB_) },
|
||||
{ 0x8, GET(VMULOUB) },
|
||||
{ 0xa, GET(VADDFP) },
|
||||
{ 0xc, GET(VMRGHB) },
|
||||
{ 0xe, GET(VPKUHUM) },
|
||||
|
||||
{ 0x20, &D::VMHADDSHS, 5 },
|
||||
{ 0x21, &D::VMHRADDSHS, 5 },
|
||||
{ 0x22, &D::VMLADDUHM, 5 },
|
||||
{ 0x24, &D::VMSUMUBM, 5 },
|
||||
{ 0x25, &D::VMSUMMBM, 5 },
|
||||
{ 0x26, &D::VMSUMUHM, 5 },
|
||||
{ 0x27, &D::VMSUMUHS, 5 },
|
||||
{ 0x28, &D::VMSUMSHM, 5 },
|
||||
{ 0x29, &D::VMSUMSHS, 5 },
|
||||
{ 0x2a, &D::VSEL, 5 },
|
||||
{ 0x2b, &D::VPERM, 5 },
|
||||
{ 0x2c, &D::VSLDOI, 5 },
|
||||
{ 0x2e, &D::VMADDFP, 5 },
|
||||
{ 0x2f, &D::VNMSUBFP, 5 },
|
||||
{ 0x20, GET(VMHADDSHS), 5 },
|
||||
{ 0x21, GET(VMHRADDSHS), 5 },
|
||||
{ 0x22, GET(VMLADDUHM), 5 },
|
||||
{ 0x24, GET(VMSUMUBM), 5 },
|
||||
{ 0x25, GET(VMSUMMBM), 5 },
|
||||
{ 0x26, GET(VMSUMUHM), 5 },
|
||||
{ 0x27, GET(VMSUMUHS), 5 },
|
||||
{ 0x28, GET(VMSUMSHM), 5 },
|
||||
{ 0x29, GET(VMSUMSHS), 5 },
|
||||
{ 0x2a, GET(VSEL), 5 },
|
||||
{ 0x2b, GET(VPERM), 5 },
|
||||
{ 0x2c, GET(VSLDOI), 5 },
|
||||
{ 0x2e, GET(VMADDFP), 5 },
|
||||
{ 0x2f, GET(VNMSUBFP), 5 },
|
||||
|
||||
{ 0x40, &D::VADDUHM },
|
||||
{ 0x42, &D::VMAXUH },
|
||||
{ 0x44, &D::VRLH },
|
||||
{ 0x46, &D::VCMPEQUH, 1 },
|
||||
{ 0x48, &D::VMULOUH },
|
||||
{ 0x4a, &D::VSUBFP },
|
||||
{ 0x4c, &D::VMRGHH },
|
||||
{ 0x4e, &D::VPKUWUM },
|
||||
{ 0x80, &D::VADDUWM },
|
||||
{ 0x82, &D::VMAXUW },
|
||||
{ 0x84, &D::VRLW },
|
||||
{ 0x86, &D::VCMPEQUW, 1 },
|
||||
{ 0x8c, &D::VMRGHW },
|
||||
{ 0x8e, &D::VPKUHUS },
|
||||
{ 0xc6, &D::VCMPEQFP, 1 },
|
||||
{ 0xce, &D::VPKUWUS },
|
||||
{ 0x40, GET(VADDUHM) },
|
||||
{ 0x42, GET(VMAXUH) },
|
||||
{ 0x44, GET(VRLH) },
|
||||
{ 0x046, GET(VCMPEQUH) },
|
||||
{ 0x446, GET(VCMPEQUH_) },
|
||||
{ 0x48, GET(VMULOUH) },
|
||||
{ 0x4a, GET(VSUBFP) },
|
||||
{ 0x4c, GET(VMRGHH) },
|
||||
{ 0x4e, GET(VPKUWUM) },
|
||||
{ 0x80, GET(VADDUWM) },
|
||||
{ 0x82, GET(VMAXUW) },
|
||||
{ 0x84, GET(VRLW) },
|
||||
{ 0x086, GET(VCMPEQUW) },
|
||||
{ 0x486, GET(VCMPEQUW_) },
|
||||
{ 0x8c, GET(VMRGHW) },
|
||||
{ 0x8e, GET(VPKUHUS) },
|
||||
{ 0x0c6, GET(VCMPEQFP) },
|
||||
{ 0x4c6, GET(VCMPEQFP_) },
|
||||
{ 0xce, GET(VPKUWUS) },
|
||||
|
||||
{ 0x102, &D::VMAXSB },
|
||||
{ 0x104, &D::VSLB },
|
||||
{ 0x108, &D::VMULOSB },
|
||||
{ 0x10a, &D::VREFP },
|
||||
{ 0x10c, &D::VMRGLB },
|
||||
{ 0x10e, &D::VPKSHUS },
|
||||
{ 0x142, &D::VMAXSH },
|
||||
{ 0x144, &D::VSLH },
|
||||
{ 0x148, &D::VMULOSH },
|
||||
{ 0x14a, &D::VRSQRTEFP },
|
||||
{ 0x14c, &D::VMRGLH },
|
||||
{ 0x14e, &D::VPKSWUS },
|
||||
{ 0x180, &D::VADDCUW },
|
||||
{ 0x182, &D::VMAXSW },
|
||||
{ 0x184, &D::VSLW },
|
||||
{ 0x18a, &D::VEXPTEFP },
|
||||
{ 0x18c, &D::VMRGLW },
|
||||
{ 0x18e, &D::VPKSHSS },
|
||||
{ 0x1c4, &D::VSL },
|
||||
{ 0x1c6, &D::VCMPGEFP, 1 },
|
||||
{ 0x1ca, &D::VLOGEFP },
|
||||
{ 0x1ce, &D::VPKSWSS },
|
||||
{ 0x200, &D::VADDUBS },
|
||||
{ 0x202, &D::VMINUB },
|
||||
{ 0x204, &D::VSRB },
|
||||
{ 0x206, &D::VCMPGTUB, 1 },
|
||||
{ 0x208, &D::VMULEUB },
|
||||
{ 0x20a, &D::VRFIN },
|
||||
{ 0x20c, &D::VSPLTB },
|
||||
{ 0x20e, &D::VUPKHSB },
|
||||
{ 0x240, &D::VADDUHS },
|
||||
{ 0x242, &D::VMINUH },
|
||||
{ 0x244, &D::VSRH },
|
||||
{ 0x246, &D::VCMPGTUH, 1 },
|
||||
{ 0x248, &D::VMULEUH },
|
||||
{ 0x24a, &D::VRFIZ },
|
||||
{ 0x24c, &D::VSPLTH },
|
||||
{ 0x24e, &D::VUPKHSH },
|
||||
{ 0x280, &D::VADDUWS },
|
||||
{ 0x282, &D::VMINUW },
|
||||
{ 0x284, &D::VSRW },
|
||||
{ 0x286, &D::VCMPGTUW, 1 },
|
||||
{ 0x28a, &D::VRFIP },
|
||||
{ 0x28c, &D::VSPLTW },
|
||||
{ 0x28e, &D::VUPKLSB },
|
||||
{ 0x2c4, &D::VSR },
|
||||
{ 0x2c6, &D::VCMPGTFP, 1 },
|
||||
{ 0x2ca, &D::VRFIM },
|
||||
{ 0x2ce, &D::VUPKLSH },
|
||||
{ 0x300, &D::VADDSBS },
|
||||
{ 0x302, &D::VMINSB },
|
||||
{ 0x304, &D::VSRAB },
|
||||
{ 0x306, &D::VCMPGTSB, 1 },
|
||||
{ 0x308, &D::VMULESB },
|
||||
{ 0x30a, &D::VCFUX },
|
||||
{ 0x30c, &D::VSPLTISB },
|
||||
{ 0x30e, &D::VPKPX },
|
||||
{ 0x340, &D::VADDSHS },
|
||||
{ 0x342, &D::VMINSH },
|
||||
{ 0x344, &D::VSRAH },
|
||||
{ 0x346, &D::VCMPGTSH, 1 },
|
||||
{ 0x348, &D::VMULESH },
|
||||
{ 0x34a, &D::VCFSX },
|
||||
{ 0x34c, &D::VSPLTISH },
|
||||
{ 0x34e, &D::VUPKHPX },
|
||||
{ 0x380, &D::VADDSWS },
|
||||
{ 0x382, &D::VMINSW },
|
||||
{ 0x384, &D::VSRAW },
|
||||
{ 0x386, &D::VCMPGTSW, 1 },
|
||||
{ 0x38a, &D::VCTUXS },
|
||||
{ 0x38c, &D::VSPLTISW },
|
||||
{ 0x3c6, &D::VCMPBFP, 1 },
|
||||
{ 0x3ca, &D::VCTSXS },
|
||||
{ 0x3ce, &D::VUPKLPX },
|
||||
{ 0x400, &D::VSUBUBM },
|
||||
{ 0x402, &D::VAVGUB },
|
||||
{ 0x404, &D::VAND },
|
||||
{ 0x40a, &D::VMAXFP },
|
||||
{ 0x40c, &D::VSLO },
|
||||
{ 0x440, &D::VSUBUHM },
|
||||
{ 0x442, &D::VAVGUH },
|
||||
{ 0x444, &D::VANDC },
|
||||
{ 0x44a, &D::VMINFP },
|
||||
{ 0x44c, &D::VSRO },
|
||||
{ 0x480, &D::VSUBUWM },
|
||||
{ 0x482, &D::VAVGUW },
|
||||
{ 0x484, &D::VOR },
|
||||
{ 0x4c4, &D::VXOR },
|
||||
{ 0x502, &D::VAVGSB },
|
||||
{ 0x504, &D::VNOR },
|
||||
{ 0x542, &D::VAVGSH },
|
||||
{ 0x580, &D::VSUBCUW },
|
||||
{ 0x582, &D::VAVGSW },
|
||||
{ 0x600, &D::VSUBUBS },
|
||||
{ 0x604, &D::MFVSCR },
|
||||
{ 0x608, &D::VSUM4UBS },
|
||||
{ 0x640, &D::VSUBUHS },
|
||||
{ 0x644, &D::MTVSCR },
|
||||
{ 0x648, &D::VSUM4SHS },
|
||||
{ 0x680, &D::VSUBUWS },
|
||||
{ 0x688, &D::VSUM2SWS },
|
||||
{ 0x700, &D::VSUBSBS },
|
||||
{ 0x708, &D::VSUM4SBS },
|
||||
{ 0x740, &D::VSUBSHS },
|
||||
{ 0x780, &D::VSUBSWS },
|
||||
{ 0x788, &D::VSUMSWS },
|
||||
{ 0x102, GET(VMAXSB) },
|
||||
{ 0x104, GET(VSLB) },
|
||||
{ 0x108, GET(VMULOSB) },
|
||||
{ 0x10a, GET(VREFP) },
|
||||
{ 0x10c, GET(VMRGLB) },
|
||||
{ 0x10e, GET(VPKSHUS) },
|
||||
{ 0x142, GET(VMAXSH) },
|
||||
{ 0x144, GET(VSLH) },
|
||||
{ 0x148, GET(VMULOSH) },
|
||||
{ 0x14a, GET(VRSQRTEFP) },
|
||||
{ 0x14c, GET(VMRGLH) },
|
||||
{ 0x14e, GET(VPKSWUS) },
|
||||
{ 0x180, GET(VADDCUW) },
|
||||
{ 0x182, GET(VMAXSW) },
|
||||
{ 0x184, GET(VSLW) },
|
||||
{ 0x18a, GET(VEXPTEFP) },
|
||||
{ 0x18c, GET(VMRGLW) },
|
||||
{ 0x18e, GET(VPKSHSS) },
|
||||
{ 0x1c4, GET(VSL) },
|
||||
{ 0x1c6, GET(VCMPGEFP) },
|
||||
{ 0x5c6, GET(VCMPGEFP_) },
|
||||
{ 0x1ca, GET(VLOGEFP) },
|
||||
{ 0x1ce, GET(VPKSWSS) },
|
||||
{ 0x200, GET(VADDUBS) },
|
||||
{ 0x202, GET(VMINUB) },
|
||||
{ 0x204, GET(VSRB) },
|
||||
{ 0x206, GET(VCMPGTUB) },
|
||||
{ 0x606, GET(VCMPGTUB_) },
|
||||
{ 0x208, GET(VMULEUB) },
|
||||
{ 0x20a, GET(VRFIN) },
|
||||
{ 0x20c, GET(VSPLTB) },
|
||||
{ 0x20e, GET(VUPKHSB) },
|
||||
{ 0x240, GET(VADDUHS) },
|
||||
{ 0x242, GET(VMINUH) },
|
||||
{ 0x244, GET(VSRH) },
|
||||
{ 0x246, GET(VCMPGTUH) },
|
||||
{ 0x646, GET(VCMPGTUH_) },
|
||||
{ 0x248, GET(VMULEUH) },
|
||||
{ 0x24a, GET(VRFIZ) },
|
||||
{ 0x24c, GET(VSPLTH) },
|
||||
{ 0x24e, GET(VUPKHSH) },
|
||||
{ 0x280, GET(VADDUWS) },
|
||||
{ 0x282, GET(VMINUW) },
|
||||
{ 0x284, GET(VSRW) },
|
||||
{ 0x286, GET(VCMPGTUW) },
|
||||
{ 0x686, GET(VCMPGTUW_) },
|
||||
{ 0x28a, GET(VRFIP) },
|
||||
{ 0x28c, GET(VSPLTW) },
|
||||
{ 0x28e, GET(VUPKLSB) },
|
||||
{ 0x2c4, GET(VSR) },
|
||||
{ 0x2c6, GET(VCMPGTFP) },
|
||||
{ 0x6c6, GET(VCMPGTFP_) },
|
||||
{ 0x2ca, GET(VRFIM) },
|
||||
{ 0x2ce, GET(VUPKLSH) },
|
||||
{ 0x300, GET(VADDSBS) },
|
||||
{ 0x302, GET(VMINSB) },
|
||||
{ 0x304, GET(VSRAB) },
|
||||
{ 0x306, GET(VCMPGTSB) },
|
||||
{ 0x706, GET(VCMPGTSB_) },
|
||||
{ 0x308, GET(VMULESB) },
|
||||
{ 0x30a, GET(VCFUX) },
|
||||
{ 0x30c, GET(VSPLTISB) },
|
||||
{ 0x30e, GET(VPKPX) },
|
||||
{ 0x340, GET(VADDSHS) },
|
||||
{ 0x342, GET(VMINSH) },
|
||||
{ 0x344, GET(VSRAH) },
|
||||
{ 0x346, GET(VCMPGTSH) },
|
||||
{ 0x746, GET(VCMPGTSH_) },
|
||||
{ 0x348, GET(VMULESH) },
|
||||
{ 0x34a, GET(VCFSX) },
|
||||
{ 0x34c, GET(VSPLTISH) },
|
||||
{ 0x34e, GET(VUPKHPX) },
|
||||
{ 0x380, GET(VADDSWS) },
|
||||
{ 0x382, GET(VMINSW) },
|
||||
{ 0x384, GET(VSRAW) },
|
||||
{ 0x386, GET(VCMPGTSW) },
|
||||
{ 0x786, GET(VCMPGTSW_) },
|
||||
{ 0x38a, GET(VCTUXS) },
|
||||
{ 0x38c, GET(VSPLTISW) },
|
||||
{ 0x3c6, GET(VCMPBFP) },
|
||||
{ 0x7c6, GET(VCMPBFP_) },
|
||||
{ 0x3ca, GET(VCTSXS) },
|
||||
{ 0x3ce, GET(VUPKLPX) },
|
||||
{ 0x400, GET(VSUBUBM) },
|
||||
{ 0x402, GET(VAVGUB) },
|
||||
{ 0x404, GET(VAND) },
|
||||
{ 0x40a, GET(VMAXFP) },
|
||||
{ 0x40c, GET(VSLO) },
|
||||
{ 0x440, GET(VSUBUHM) },
|
||||
{ 0x442, GET(VAVGUH) },
|
||||
{ 0x444, GET(VANDC) },
|
||||
{ 0x44a, GET(VMINFP) },
|
||||
{ 0x44c, GET(VSRO) },
|
||||
{ 0x480, GET(VSUBUWM) },
|
||||
{ 0x482, GET(VAVGUW) },
|
||||
{ 0x484, GET(VOR) },
|
||||
{ 0x4c4, GET(VXOR) },
|
||||
{ 0x502, GET(VAVGSB) },
|
||||
{ 0x504, GET(VNOR) },
|
||||
{ 0x542, GET(VAVGSH) },
|
||||
{ 0x580, GET(VSUBCUW) },
|
||||
{ 0x582, GET(VAVGSW) },
|
||||
{ 0x600, GET(VSUBUBS) },
|
||||
{ 0x604, GET(MFVSCR) },
|
||||
{ 0x608, GET(VSUM4UBS) },
|
||||
{ 0x640, GET(VSUBUHS) },
|
||||
{ 0x644, GET(MTVSCR) },
|
||||
{ 0x648, GET(VSUM4SHS) },
|
||||
{ 0x680, GET(VSUBUWS) },
|
||||
{ 0x688, GET(VSUM2SWS) },
|
||||
{ 0x700, GET(VSUBSBS) },
|
||||
{ 0x708, GET(VSUM4SBS) },
|
||||
{ 0x740, GET(VSUBSHS) },
|
||||
{ 0x780, GET(VSUBSWS) },
|
||||
{ 0x788, GET(VSUMSWS) },
|
||||
});
|
||||
|
||||
// Group 0x13 opcodes (field 21..30)
|
||||
fill_table(0x13, 10, 1,
|
||||
{
|
||||
{ 0x000, &D::MCRF },
|
||||
{ 0x010, &D::BCLR },
|
||||
{ 0x021, &D::CRNOR },
|
||||
{ 0x081, &D::CRANDC },
|
||||
{ 0x096, &D::ISYNC },
|
||||
{ 0x0c1, &D::CRXOR },
|
||||
{ 0x0e1, &D::CRNAND },
|
||||
{ 0x101, &D::CRAND },
|
||||
{ 0x121, &D::CREQV },
|
||||
{ 0x1a1, &D::CRORC },
|
||||
{ 0x1c1, &D::CROR },
|
||||
{ 0x210, &D::BCCTR },
|
||||
{ 0x000, GET(MCRF) },
|
||||
{ 0x010, GET(BCLR) },
|
||||
{ 0x021, GET(CRNOR) },
|
||||
{ 0x081, GET(CRANDC) },
|
||||
{ 0x096, GET(ISYNC) },
|
||||
{ 0x0c1, GET(CRXOR) },
|
||||
{ 0x0e1, GET(CRNAND) },
|
||||
{ 0x101, GET(CRAND) },
|
||||
{ 0x121, GET(CREQV) },
|
||||
{ 0x1a1, GET(CRORC) },
|
||||
{ 0x1c1, GET(CROR) },
|
||||
{ 0x210, GET(BCCTR) },
|
||||
});
|
||||
|
||||
// Group 0x1e opcodes (field 27..30)
|
||||
fill_table(0x1e, 4, 1,
|
||||
{
|
||||
{ 0x0, &D::RLDICL },
|
||||
{ 0x1, &D::RLDICL },
|
||||
{ 0x2, &D::RLDICR },
|
||||
{ 0x3, &D::RLDICR },
|
||||
{ 0x4, &D::RLDIC },
|
||||
{ 0x5, &D::RLDIC },
|
||||
{ 0x6, &D::RLDIMI },
|
||||
{ 0x7, &D::RLDIMI },
|
||||
{ 0x8, &D::RLDCL },
|
||||
{ 0x9, &D::RLDCR },
|
||||
{ 0x0, GETRC(RLDICL) },
|
||||
{ 0x1, GETRC(RLDICL) },
|
||||
{ 0x2, GETRC(RLDICR) },
|
||||
{ 0x3, GETRC(RLDICR) },
|
||||
{ 0x4, GETRC(RLDIC) },
|
||||
{ 0x5, GETRC(RLDIC) },
|
||||
{ 0x6, GETRC(RLDIMI) },
|
||||
{ 0x7, GETRC(RLDIMI) },
|
||||
{ 0x8, GETRC(RLDCL) },
|
||||
{ 0x9, GETRC(RLDCR) },
|
||||
});
|
||||
|
||||
// Group 0x1f opcodes (field 21..30)
|
||||
fill_table(0x1f, 10, 1,
|
||||
{
|
||||
{ 0x000, &D::CMP },
|
||||
{ 0x004, &D::TW },
|
||||
{ 0x006, &D::LVSL },
|
||||
{ 0x007, &D::LVEBX },
|
||||
{ 0x008, &D::SUBFC, 1 },
|
||||
{ 0x009, &D::MULHDU },
|
||||
{ 0x00a, &D::ADDC, 1 },
|
||||
{ 0x00b, &D::MULHWU },
|
||||
{ 0x013, &D::MFOCRF },
|
||||
{ 0x014, &D::LWARX },
|
||||
{ 0x015, &D::LDX },
|
||||
{ 0x017, &D::LWZX },
|
||||
{ 0x018, &D::SLW },
|
||||
{ 0x01a, &D::CNTLZW },
|
||||
{ 0x01b, &D::SLD },
|
||||
{ 0x01c, &D::AND },
|
||||
{ 0x020, &D::CMPL },
|
||||
{ 0x026, &D::LVSR },
|
||||
{ 0x027, &D::LVEHX },
|
||||
{ 0x028, &D::SUBF, 1 },
|
||||
{ 0x035, &D::LDUX },
|
||||
{ 0x036, &D::DCBST },
|
||||
{ 0x037, &D::LWZUX },
|
||||
{ 0x03a, &D::CNTLZD },
|
||||
{ 0x03c, &D::ANDC },
|
||||
{ 0x044, &D::TD },
|
||||
{ 0x047, &D::LVEWX },
|
||||
{ 0x049, &D::MULHD },
|
||||
{ 0x04b, &D::MULHW },
|
||||
{ 0x054, &D::LDARX },
|
||||
{ 0x056, &D::DCBF },
|
||||
{ 0x057, &D::LBZX },
|
||||
{ 0x067, &D::LVX },
|
||||
{ 0x068, &D::NEG, 1 },
|
||||
{ 0x077, &D::LBZUX },
|
||||
{ 0x07c, &D::NOR },
|
||||
{ 0x087, &D::STVEBX },
|
||||
{ 0x088, &D::SUBFE, 1 },
|
||||
{ 0x08a, &D::ADDE, 1 },
|
||||
{ 0x090, &D::MTOCRF },
|
||||
{ 0x095, &D::STDX },
|
||||
{ 0x096, &D::STWCX },
|
||||
{ 0x097, &D::STWX },
|
||||
{ 0x0a7, &D::STVEHX },
|
||||
{ 0x0b5, &D::STDUX },
|
||||
{ 0x0b7, &D::STWUX },
|
||||
{ 0x0c7, &D::STVEWX },
|
||||
{ 0x0c8, &D::SUBFZE, 1 },
|
||||
{ 0x0ca, &D::ADDZE, 1 },
|
||||
{ 0x0d6, &D::STDCX },
|
||||
{ 0x0d7, &D::STBX },
|
||||
{ 0x0e7, &D::STVX },
|
||||
{ 0x0e8, &D::SUBFME, 1 },
|
||||
{ 0x0e9, &D::MULLD, 1 },
|
||||
{ 0x0ea, &D::ADDME, 1 },
|
||||
{ 0x0eb, &D::MULLW, 1 },
|
||||
{ 0x0f6, &D::DCBTST },
|
||||
{ 0x0f7, &D::STBUX },
|
||||
{ 0x10a, &D::ADD, 1 },
|
||||
{ 0x116, &D::DCBT },
|
||||
{ 0x117, &D::LHZX },
|
||||
{ 0x11c, &D::EQV },
|
||||
{ 0x136, &D::ECIWX },
|
||||
{ 0x137, &D::LHZUX },
|
||||
{ 0x13c, &D::XOR },
|
||||
{ 0x153, &D::MFSPR },
|
||||
{ 0x155, &D::LWAX },
|
||||
{ 0x156, &D::DST },
|
||||
{ 0x157, &D::LHAX },
|
||||
{ 0x167, &D::LVXL },
|
||||
{ 0x173, &D::MFTB },
|
||||
{ 0x175, &D::LWAUX },
|
||||
{ 0x176, &D::DSTST },
|
||||
{ 0x177, &D::LHAUX },
|
||||
{ 0x197, &D::STHX },
|
||||
{ 0x19c, &D::ORC },
|
||||
{ 0x1b6, &D::ECOWX },
|
||||
{ 0x1b7, &D::STHUX },
|
||||
{ 0x1bc, &D::OR },
|
||||
{ 0x1c9, &D::DIVDU, 1 },
|
||||
{ 0x1cb, &D::DIVWU, 1 },
|
||||
{ 0x1d3, &D::MTSPR },
|
||||
{ 0x1d6, &D::DCBI },
|
||||
{ 0x1dc, &D::NAND },
|
||||
{ 0x1e7, &D::STVXL },
|
||||
{ 0x1e9, &D::DIVD, 1 },
|
||||
{ 0x1eb, &D::DIVW, 1 },
|
||||
{ 0x207, &D::LVLX },
|
||||
{ 0x214, &D::LDBRX },
|
||||
{ 0x215, &D::LSWX },
|
||||
{ 0x216, &D::LWBRX },
|
||||
{ 0x217, &D::LFSX },
|
||||
{ 0x218, &D::SRW },
|
||||
{ 0x21b, &D::SRD },
|
||||
{ 0x227, &D::LVRX },
|
||||
{ 0x237, &D::LFSUX },
|
||||
{ 0x255, &D::LSWI },
|
||||
{ 0x256, &D::SYNC },
|
||||
{ 0x257, &D::LFDX },
|
||||
{ 0x277, &D::LFDUX },
|
||||
{ 0x287, &D::STVLX },
|
||||
{ 0x294, &D::STDBRX },
|
||||
{ 0x295, &D::STSWX },
|
||||
{ 0x296, &D::STWBRX },
|
||||
{ 0x297, &D::STFSX },
|
||||
{ 0x2a7, &D::STVRX },
|
||||
{ 0x2b7, &D::STFSUX },
|
||||
{ 0x2d5, &D::STSWI },
|
||||
{ 0x2d7, &D::STFDX },
|
||||
{ 0x2f7, &D::STFDUX },
|
||||
{ 0x307, &D::LVLXL },
|
||||
{ 0x316, &D::LHBRX },
|
||||
{ 0x318, &D::SRAW },
|
||||
{ 0x31a, &D::SRAD },
|
||||
{ 0x327, &D::LVRXL },
|
||||
{ 0x336, &D::DSS },
|
||||
{ 0x338, &D::SRAWI },
|
||||
{ 0x33a, &D::SRADI },
|
||||
{ 0x33b, &D::SRADI },
|
||||
{ 0x356, &D::EIEIO },
|
||||
{ 0x387, &D::STVLXL },
|
||||
{ 0x396, &D::STHBRX },
|
||||
{ 0x39a, &D::EXTSH },
|
||||
{ 0x3a7, &D::STVRXL },
|
||||
{ 0x3ba, &D::EXTSB },
|
||||
{ 0x3d7, &D::STFIWX },
|
||||
{ 0x3da, &D::EXTSW },
|
||||
{ 0x3d6, &D::ICBI },
|
||||
{ 0x3f6, &D::DCBZ },
|
||||
{ 0x000, GET(CMP) },
|
||||
{ 0x004, GET(TW) },
|
||||
{ 0x006, GET(LVSL) },
|
||||
{ 0x007, GET(LVEBX) },
|
||||
{ 0x008, GETRC(SUBFC) },
|
||||
{ 0x208, GETRC(SUBFCO) },
|
||||
{ 0x009, GETRC(MULHDU) },
|
||||
{ 0x00a, GETRC(ADDC) },
|
||||
{ 0x20a, GETRC(ADDCO) },
|
||||
{ 0x00b, GETRC(MULHWU) },
|
||||
{ 0x013, GET(MFOCRF) },
|
||||
{ 0x014, GET(LWARX) },
|
||||
{ 0x015, GET(LDX) },
|
||||
{ 0x017, GET(LWZX) },
|
||||
{ 0x018, GETRC(SLW) },
|
||||
{ 0x01a, GETRC(CNTLZW) },
|
||||
{ 0x01b, GETRC(SLD) },
|
||||
{ 0x01c, GETRC(AND) },
|
||||
{ 0x020, GET(CMPL) },
|
||||
{ 0x026, GET(LVSR) },
|
||||
{ 0x027, GET(LVEHX) },
|
||||
{ 0x028, GETRC(SUBF) },
|
||||
{ 0x228, GETRC(SUBFO) },
|
||||
{ 0x035, GET(LDUX) },
|
||||
{ 0x036, GET(DCBST) },
|
||||
{ 0x037, GET(LWZUX) },
|
||||
{ 0x03a, GETRC(CNTLZD) },
|
||||
{ 0x03c, GETRC(ANDC) },
|
||||
{ 0x044, GET(TD) },
|
||||
{ 0x047, GET(LVEWX) },
|
||||
{ 0x049, GETRC(MULHD) },
|
||||
{ 0x04b, GETRC(MULHW) },
|
||||
{ 0x054, GET(LDARX) },
|
||||
{ 0x056, GET(DCBF) },
|
||||
{ 0x057, GET(LBZX) },
|
||||
{ 0x067, GET(LVX) },
|
||||
{ 0x068, GETRC(NEG) },
|
||||
{ 0x268, GETRC(NEGO) },
|
||||
{ 0x077, GET(LBZUX) },
|
||||
{ 0x07c, GETRC(NOR) },
|
||||
{ 0x087, GET(STVEBX) },
|
||||
{ 0x088, GETRC(SUBFE) },
|
||||
{ 0x288, GETRC(SUBFEO) },
|
||||
{ 0x08a, GETRC(ADDE) },
|
||||
{ 0x28a, GETRC(ADDEO) },
|
||||
{ 0x090, GET(MTOCRF) },
|
||||
{ 0x095, GET(STDX) },
|
||||
{ 0x096, GET(STWCX) },
|
||||
{ 0x097, GET(STWX) },
|
||||
{ 0x0a7, GET(STVEHX) },
|
||||
{ 0x0b5, GET(STDUX) },
|
||||
{ 0x0b7, GET(STWUX) },
|
||||
{ 0x0c7, GET(STVEWX) },
|
||||
{ 0x0c8, GETRC(SUBFZE) },
|
||||
{ 0x2c8, GETRC(SUBFZEO) },
|
||||
{ 0x0ca, GETRC(ADDZE) },
|
||||
{ 0x2ca, GETRC(ADDZEO) },
|
||||
{ 0x0d6, GET(STDCX) },
|
||||
{ 0x0d7, GET(STBX) },
|
||||
{ 0x0e7, GET(STVX) },
|
||||
{ 0x0e8, GETRC(SUBFME) },
|
||||
{ 0x2e8, GETRC(SUBFMEO) },
|
||||
{ 0x0e9, GETRC(MULLD) },
|
||||
{ 0x2e9, GETRC(MULLDO) },
|
||||
{ 0x0ea, GETRC(ADDME) },
|
||||
{ 0x2ea, GETRC(ADDMEO) },
|
||||
{ 0x0eb, GETRC(MULLW) },
|
||||
{ 0x2eb, GETRC(MULLWO) },
|
||||
{ 0x0f6, GET(DCBTST) },
|
||||
{ 0x0f7, GET(STBUX) },
|
||||
{ 0x10a, GETRC(ADD) },
|
||||
{ 0x30a, GETRC(ADDO) },
|
||||
{ 0x116, GET(DCBT) },
|
||||
{ 0x117, GET(LHZX) },
|
||||
{ 0x11c, GETRC(EQV) },
|
||||
{ 0x136, GET(ECIWX) },
|
||||
{ 0x137, GET(LHZUX) },
|
||||
{ 0x13c, GETRC(XOR) },
|
||||
{ 0x153, GET(MFSPR) },
|
||||
{ 0x155, GET(LWAX) },
|
||||
{ 0x156, GET(DST) },
|
||||
{ 0x157, GET(LHAX) },
|
||||
{ 0x167, GET(LVXL) },
|
||||
{ 0x173, GET(MFTB) },
|
||||
{ 0x175, GET(LWAUX) },
|
||||
{ 0x176, GET(DSTST) },
|
||||
{ 0x177, GET(LHAUX) },
|
||||
{ 0x197, GET(STHX) },
|
||||
{ 0x19c, GETRC(ORC) },
|
||||
{ 0x1b6, GET(ECOWX) },
|
||||
{ 0x1b7, GET(STHUX) },
|
||||
{ 0x1bc, GETRC(OR) },
|
||||
{ 0x1c9, GETRC(DIVDU) },
|
||||
{ 0x3c9, GETRC(DIVDUO) },
|
||||
{ 0x1cb, GETRC(DIVWU) },
|
||||
{ 0x3cb, GETRC(DIVWUO) },
|
||||
{ 0x1d3, GET(MTSPR) },
|
||||
{ 0x1d6, GET(DCBI) },
|
||||
{ 0x1dc, GETRC(NAND) },
|
||||
{ 0x1e7, GET(STVXL) },
|
||||
{ 0x1e9, GETRC(DIVD) },
|
||||
{ 0x3e9, GETRC(DIVDO) },
|
||||
{ 0x1eb, GETRC(DIVW) },
|
||||
{ 0x3eb, GETRC(DIVWO) },
|
||||
{ 0x207, GET(LVLX) },
|
||||
{ 0x214, GET(LDBRX) },
|
||||
{ 0x215, GET(LSWX) },
|
||||
{ 0x216, GET(LWBRX) },
|
||||
{ 0x217, GET(LFSX) },
|
||||
{ 0x218, GETRC(SRW) },
|
||||
{ 0x21b, GETRC(SRD) },
|
||||
{ 0x227, GET(LVRX) },
|
||||
{ 0x237, GET(LFSUX) },
|
||||
{ 0x255, GET(LSWI) },
|
||||
{ 0x256, GET(SYNC) },
|
||||
{ 0x257, GET(LFDX) },
|
||||
{ 0x277, GET(LFDUX) },
|
||||
{ 0x287, GET(STVLX) },
|
||||
{ 0x294, GET(STDBRX) },
|
||||
{ 0x295, GET(STSWX) },
|
||||
{ 0x296, GET(STWBRX) },
|
||||
{ 0x297, GET(STFSX) },
|
||||
{ 0x2a7, GET(STVRX) },
|
||||
{ 0x2b7, GET(STFSUX) },
|
||||
{ 0x2d5, GET(STSWI) },
|
||||
{ 0x2d7, GET(STFDX) },
|
||||
{ 0x2f7, GET(STFDUX) },
|
||||
{ 0x307, GET(LVLXL) },
|
||||
{ 0x316, GET(LHBRX) },
|
||||
{ 0x318, GETRC(SRAW) },
|
||||
{ 0x31a, GETRC(SRAD) },
|
||||
{ 0x327, GET(LVRXL) },
|
||||
{ 0x336, GET(DSS) },
|
||||
{ 0x338, GETRC(SRAWI) },
|
||||
{ 0x33a, GETRC(SRADI) },
|
||||
{ 0x33b, GETRC(SRADI) },
|
||||
{ 0x356, GET(EIEIO) },
|
||||
{ 0x387, GET(STVLXL) },
|
||||
{ 0x396, GET(STHBRX) },
|
||||
{ 0x39a, GETRC(EXTSH) },
|
||||
{ 0x3a7, GET(STVRXL) },
|
||||
{ 0x3ba, GETRC(EXTSB) },
|
||||
{ 0x3d7, GET(STFIWX) },
|
||||
{ 0x3da, GETRC(EXTSW) },
|
||||
{ 0x3d6, GET(ICBI) },
|
||||
{ 0x3f6, GET(DCBZ) },
|
||||
});
|
||||
|
||||
// Group 0x3a opcodes (field 30..31)
|
||||
fill_table(0x3a, 2, 0,
|
||||
{
|
||||
{ 0x0, &D::LD },
|
||||
{ 0x1, &D::LDU },
|
||||
{ 0x2, &D::LWA },
|
||||
{ 0x0, GET(LD) },
|
||||
{ 0x1, GET(LDU) },
|
||||
{ 0x2, GET(LWA) },
|
||||
});
|
||||
|
||||
// Group 0x3b opcodes (field 21..30)
|
||||
fill_table(0x3b, 10, 1,
|
||||
{
|
||||
{ 0x12, &D::FDIVS, 5 },
|
||||
{ 0x14, &D::FSUBS, 5 },
|
||||
{ 0x15, &D::FADDS, 5 },
|
||||
{ 0x16, &D::FSQRTS, 5 },
|
||||
{ 0x18, &D::FRES, 5 },
|
||||
{ 0x19, &D::FMULS, 5 },
|
||||
{ 0x1c, &D::FMSUBS, 5 },
|
||||
{ 0x1d, &D::FMADDS, 5 },
|
||||
{ 0x1e, &D::FNMSUBS, 5 },
|
||||
{ 0x1f, &D::FNMADDS, 5 },
|
||||
{ 0x12, GETRC(FDIVS), 5 },
|
||||
{ 0x14, GETRC(FSUBS), 5 },
|
||||
{ 0x15, GETRC(FADDS), 5 },
|
||||
{ 0x16, GETRC(FSQRTS), 5 },
|
||||
{ 0x18, GETRC(FRES), 5 },
|
||||
{ 0x19, GETRC(FMULS), 5 },
|
||||
{ 0x1c, GETRC(FMSUBS), 5 },
|
||||
{ 0x1d, GETRC(FMADDS), 5 },
|
||||
{ 0x1e, GETRC(FNMSUBS), 5 },
|
||||
{ 0x1f, GETRC(FNMADDS), 5 },
|
||||
});
|
||||
|
||||
// Group 0x3e opcodes (field 30..31)
|
||||
fill_table(0x3e, 2, 0,
|
||||
{
|
||||
{ 0x0, &D::STD },
|
||||
{ 0x1, &D::STDU },
|
||||
{ 0x0, GET(STD) },
|
||||
{ 0x1, GET(STDU) },
|
||||
});
|
||||
|
||||
// Group 0x3f opcodes (field 21..30)
|
||||
fill_table(0x3f, 10, 1,
|
||||
{
|
||||
{ 0x026, &D::MTFSB1 },
|
||||
{ 0x040, &D::MCRFS },
|
||||
{ 0x046, &D::MTFSB0 },
|
||||
{ 0x086, &D::MTFSFI },
|
||||
{ 0x247, &D::MFFS },
|
||||
{ 0x2c7, &D::MTFSF },
|
||||
{ 0x026, GETRC(MTFSB1) },
|
||||
{ 0x040, GET(MCRFS) },
|
||||
{ 0x046, GETRC(MTFSB0) },
|
||||
{ 0x086, GETRC(MTFSFI) },
|
||||
{ 0x247, GETRC(MFFS) },
|
||||
{ 0x2c7, GETRC(MTFSF) },
|
||||
|
||||
{ 0x000, &D::FCMPU },
|
||||
{ 0x00c, &D::FRSP },
|
||||
{ 0x00e, &D::FCTIW },
|
||||
{ 0x00f, &D::FCTIWZ },
|
||||
{ 0x000, GET(FCMPU) },
|
||||
{ 0x00c, GETRC(FRSP) },
|
||||
{ 0x00e, GETRC(FCTIW) },
|
||||
{ 0x00f, GETRC(FCTIWZ) },
|
||||
|
||||
{ 0x012, &D::FDIV, 5 },
|
||||
{ 0x014, &D::FSUB, 5 },
|
||||
{ 0x015, &D::FADD, 5 },
|
||||
{ 0x016, &D::FSQRT, 5 },
|
||||
{ 0x017, &D::FSEL, 5 },
|
||||
{ 0x019, &D::FMUL, 5 },
|
||||
{ 0x01a, &D::FRSQRTE, 5 },
|
||||
{ 0x01c, &D::FMSUB, 5 },
|
||||
{ 0x01d, &D::FMADD, 5 },
|
||||
{ 0x01e, &D::FNMSUB, 5 },
|
||||
{ 0x01f, &D::FNMADD, 5 },
|
||||
{ 0x012, GETRC(FDIV), 5 },
|
||||
{ 0x014, GETRC(FSUB), 5 },
|
||||
{ 0x015, GETRC(FADD), 5 },
|
||||
{ 0x016, GETRC(FSQRT), 5 },
|
||||
{ 0x017, GETRC(FSEL), 5 },
|
||||
{ 0x019, GETRC(FMUL), 5 },
|
||||
{ 0x01a, GETRC(FRSQRTE), 5 },
|
||||
{ 0x01c, GETRC(FMSUB), 5 },
|
||||
{ 0x01d, GETRC(FMADD), 5 },
|
||||
{ 0x01e, GETRC(FNMSUB), 5 },
|
||||
{ 0x01f, GETRC(FNMADD), 5 },
|
||||
|
||||
{ 0x020, &D::FCMPO },
|
||||
{ 0x028, &D::FNEG },
|
||||
{ 0x048, &D::FMR },
|
||||
{ 0x088, &D::FNABS },
|
||||
{ 0x108, &D::FABS },
|
||||
{ 0x32e, &D::FCTID },
|
||||
{ 0x32f, &D::FCTIDZ },
|
||||
{ 0x34e, &D::FCFID },
|
||||
{ 0x020, GET(FCMPO) },
|
||||
{ 0x028, GETRC(FNEG) },
|
||||
{ 0x048, GETRC(FMR) },
|
||||
{ 0x088, GETRC(FNABS) },
|
||||
{ 0x108, GETRC(FABS) },
|
||||
{ 0x32e, GETRC(FCTID) },
|
||||
{ 0x32f, GETRC(FCTIDZ) },
|
||||
{ 0x34e, GETRC(FCFID) },
|
||||
});
|
||||
}
|
||||
|
||||
@ -587,6 +635,10 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#undef GET_
|
||||
#undef GET
|
||||
#undef GETRC
|
||||
|
||||
namespace ppu_instructions
|
||||
{
|
||||
namespace fields
|
||||
|
@ -62,7 +62,7 @@
|
||||
#include "util/asm.hpp"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
||||
extern atomic_t<u64> g_watchdog_hold_ctr;
|
||||
@ -131,9 +131,8 @@ void fmt_class_string<typename ppu_thread::call_history_t>::format(std::string&
|
||||
}
|
||||
}
|
||||
|
||||
const ppu_decoder<ppu_interpreter_precise> g_ppu_interpreter_precise;
|
||||
const ppu_decoder<ppu_interpreter_fast> g_ppu_interpreter_fast;
|
||||
const ppu_decoder<ppu_itype> g_ppu_itype;
|
||||
extern const ppu_decoder<ppu_itype> g_ppu_itype{};
|
||||
extern const ppu_decoder<ppu_iname> g_ppu_iname{};
|
||||
|
||||
extern void ppu_initialize();
|
||||
extern void ppu_finalize(const ppu_module& info);
|
||||
@ -143,15 +142,16 @@ extern std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const
|
||||
extern void ppu_unload_prx(const lv2_prx&);
|
||||
extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&, s64 file_offset);
|
||||
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
|
||||
static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op);
|
||||
static void ppu_break(ppu_thread&, ppu_opcode_t, be_t<u32>*, ppu_intrp_func*);
|
||||
|
||||
extern void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
||||
|
||||
const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](asmjit::x86::Assembler& c, auto& args)
|
||||
const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](native_asm& c, auto& args)
|
||||
{
|
||||
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#ifdef _WIN32
|
||||
c.push(x86::r15);
|
||||
c.push(x86::r14);
|
||||
@ -192,10 +192,10 @@ const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::edx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 17);
|
||||
c.shr(x86::rax, 17);
|
||||
c.shr(x86::rdx, 47);
|
||||
c.shl(x86::rdx, 12);
|
||||
c.shl(x86::rax, 16);
|
||||
c.shr(x86::rax, 16);
|
||||
c.shr(x86::rdx, 48);
|
||||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
|
||||
c.mov(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
@ -246,116 +246,113 @@ const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](
|
||||
#endif
|
||||
|
||||
c.ret();
|
||||
#else
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_escape", [](asmjit::x86::Assembler& c, auto& args)
|
||||
const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_escape", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
// Restore native stack pointer (longjmp emulation)
|
||||
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&ppu_thread::saved_native_sp)));
|
||||
|
||||
// Return to the return location
|
||||
c.sub(x86::rsp, 8);
|
||||
c.ret();
|
||||
#endif
|
||||
});
|
||||
|
||||
void ppu_recompiler_fallback(ppu_thread& ppu);
|
||||
|
||||
const auto ppu_recompiler_fallback_ghc = build_function_asm<void(*)(ppu_thread& ppu)>("ppu_trampolineb", [](asmjit::x86::Assembler& c, auto& args)
|
||||
#if defined(ARCH_X64)
|
||||
const auto ppu_recompiler_fallback_ghc = build_function_asm<void(*)(ppu_thread& ppu)>("ppu_trampolineb", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.jmp(imm_ptr(ppu_recompiler_fallback));
|
||||
});
|
||||
#elif defined(ARCH_ARM64)
|
||||
const auto ppu_recompiler_fallback_ghc = &ppu_recompiler_fallback;
|
||||
#endif
|
||||
|
||||
// Get pointer to executable cache
|
||||
static u64& ppu_ref(u32 addr)
|
||||
static ppu_intrp_func_t& ppu_ref(u32 addr)
|
||||
{
|
||||
return *reinterpret_cast<u64*>(vm::g_exec_addr + u64{addr} * 2);
|
||||
return *reinterpret_cast<ppu_intrp_func_t*>(vm::g_exec_addr + u64{addr} * 2);
|
||||
}
|
||||
|
||||
// Get interpreter cache value
|
||||
static u64 ppu_cache(u32 addr)
|
||||
static ppu_intrp_func_t ppu_cache(u32 addr)
|
||||
{
|
||||
if (g_cfg.core.ppu_decoder > ppu_decoder_type::fast)
|
||||
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
|
||||
{
|
||||
fmt::throw_exception("Invalid PPU decoder");
|
||||
}
|
||||
|
||||
// Select opcode table
|
||||
const auto& table = *(
|
||||
g_cfg.core.ppu_decoder == ppu_decoder_type::precise
|
||||
? &g_ppu_interpreter_precise.get_table()
|
||||
: &g_ppu_interpreter_fast.get_table());
|
||||
|
||||
return reinterpret_cast<uptr>(table[ppu_decode(vm::read32(addr))]);
|
||||
return g_fxo->get<ppu_interpreter_rt>().decode(vm::read32(addr));
|
||||
}
|
||||
|
||||
static bool ppu_fallback(ppu_thread& ppu, ppu_opcode_t op)
|
||||
static ppu_intrp_func ppu_ret = {[](ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*)
|
||||
{
|
||||
if (g_cfg.core.ppu_debug)
|
||||
{
|
||||
ppu_log.error("Unregistered instruction: 0x%08x", op.opcode);
|
||||
}
|
||||
// Fix PC and return (step execution)
|
||||
ppu.cia = vm::get_addr(this_op);
|
||||
return;
|
||||
}};
|
||||
|
||||
ppu_ref(ppu.cia) = ppu_cache(ppu.cia);
|
||||
return false;
|
||||
static void ppu_fallback(ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
||||
{
|
||||
const auto _pc = vm::get_addr(this_op);
|
||||
const auto _fn = ppu_cache(_pc);
|
||||
ppu_ref(_pc) = _fn;
|
||||
return _fn(ppu, op, this_op, next_fn);
|
||||
}
|
||||
|
||||
// TODO: Make this a dispatch call
|
||||
void ppu_recompiler_fallback(ppu_thread& ppu)
|
||||
{
|
||||
perf_meter<"PPUFALL1"_u64> perf0;
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
{
|
||||
ppu_log.error("Unregistered PPU Function (LR=0x%llx)", ppu.lr);
|
||||
ppu_log.error("Unregistered PPU Function (LR=0x%x)", ppu.lr);
|
||||
}
|
||||
|
||||
const auto& table = g_ppu_interpreter_fast.get_table();
|
||||
|
||||
u64 ctr = 0;
|
||||
const auto& table = g_fxo->get<ppu_interpreter_rt>();
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (uptr func = ppu_ref(ppu.cia); (func << 17 >> 17) != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
if (uptr func = uptr(ppu_ref(ppu.cia)); (func << 16 >> 16) != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
{
|
||||
// We found a recompiler function at cia, return
|
||||
break;
|
||||
}
|
||||
|
||||
// Run instructions in interpreter
|
||||
if (const u32 op = vm::read32(ppu.cia); ctr++, table[ppu_decode(op)](ppu, {op})) [[likely]]
|
||||
{
|
||||
ppu.cia += 4;
|
||||
continue;
|
||||
}
|
||||
// Run one instruction in interpreter (TODO)
|
||||
const u32 op = vm::read32(ppu.cia);
|
||||
table.decode(op)(ppu, {op}, vm::_ptr<u32>(ppu.cia), &ppu_ret);
|
||||
|
||||
if (ppu.test_stopped())
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
{
|
||||
ppu_log.warning("Exiting interpreter at 0x%x (executed %u functions)", ppu.cia, ctr);
|
||||
}
|
||||
}
|
||||
|
||||
void ppu_reservation_fallback(ppu_thread& ppu)
|
||||
{
|
||||
const auto& table = g_ppu_interpreter_fast.get_table();
|
||||
perf_meter<"PPUFALL2"_u64> perf0;
|
||||
|
||||
const auto& table = g_fxo->get<ppu_interpreter_rt>();
|
||||
|
||||
while (true)
|
||||
{
|
||||
// Run instructions in interpreter
|
||||
// Run one instruction in interpreter (TODO)
|
||||
const u32 op = vm::read32(ppu.cia);
|
||||
|
||||
if (table[ppu_decode(op)](ppu, {op})) [[likely]]
|
||||
{
|
||||
ppu.cia += 4;
|
||||
}
|
||||
table.decode(op)(ppu, {op}, vm::_ptr<u32>(ppu.cia), &ppu_ret);
|
||||
|
||||
if (!ppu.raddr || !ppu.use_full_rdata)
|
||||
{
|
||||
@ -372,7 +369,7 @@ void ppu_reservation_fallback(ppu_thread& ppu)
|
||||
|
||||
static std::unordered_map<u32, u32>* s_ppu_toc;
|
||||
|
||||
static bool ppu_check_toc(ppu_thread& ppu, ppu_opcode_t)
|
||||
static void ppu_check_toc(ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
||||
{
|
||||
// Compare TOC with expected value
|
||||
const auto found = s_ppu_toc->find(ppu.cia);
|
||||
@ -383,18 +380,12 @@ static bool ppu_check_toc(ppu_thread& ppu, ppu_opcode_t)
|
||||
|
||||
if (!ppu.state.test_and_set(cpu_flag::dbg_pause) && ppu.check_state())
|
||||
{
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to the interpreter function
|
||||
const u64 val = ppu_cache(ppu.cia);
|
||||
if (reinterpret_cast<decltype(&ppu_interpreter::UNK)>(val & 0xffffffff)(ppu, {static_cast<u32>(val >> 32)}))
|
||||
{
|
||||
ppu.cia += 4;
|
||||
}
|
||||
|
||||
return false;
|
||||
return ppu_cache(ppu.cia)(ppu, op, this_op, next_fn);
|
||||
}
|
||||
|
||||
extern void ppu_register_range(u32 addr, u32 size)
|
||||
@ -417,7 +408,6 @@ extern void ppu_register_range(u32 addr, u32 size)
|
||||
utils::memory_commit(vm::g_stat_addr + addr, size);
|
||||
}
|
||||
|
||||
const u64 fallback = reinterpret_cast<uptr>(ppu_fallback);
|
||||
const u64 seg_base = addr;
|
||||
|
||||
while (size)
|
||||
@ -425,11 +415,11 @@ extern void ppu_register_range(u32 addr, u32 size)
|
||||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||
{
|
||||
// Assume addr is the start of first segment of PRX
|
||||
ppu_ref(addr) = reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc) | (seg_base << (32 + 3));
|
||||
ppu_ref(addr) = reinterpret_cast<ppu_intrp_func_t>(reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc) | (seg_base << (32 + 3)));
|
||||
}
|
||||
else
|
||||
{
|
||||
ppu_ref(addr) = fallback;
|
||||
ppu_ref(addr) = ppu_fallback;
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
@ -437,14 +427,14 @@ extern void ppu_register_range(u32 addr, u32 size)
|
||||
}
|
||||
}
|
||||
|
||||
static bool ppu_far_jump(ppu_thread& ppu);
|
||||
static void ppu_far_jump(ppu_thread&, ppu_opcode_t, be_t<u32>*, ppu_intrp_func*);
|
||||
|
||||
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr = nullptr)
|
||||
extern void ppu_register_function_at(u32 addr, u32 size, ppu_intrp_func_t ptr = nullptr)
|
||||
{
|
||||
// Initialize specific function
|
||||
if (ptr)
|
||||
{
|
||||
ppu_ref(addr) = (reinterpret_cast<uptr>(ptr) & 0x7fff'ffff'ffffu) | (ppu_ref(addr) & ~0x7fff'ffff'ffffu);
|
||||
ppu_ref(addr) = reinterpret_cast<ppu_intrp_func_t>((reinterpret_cast<uptr>(ptr) & 0xffff'ffff'ffffu) | (uptr(ppu_ref(addr)) & ~0xffff'ffff'ffffu));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -464,12 +454,9 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr = nu
|
||||
}
|
||||
|
||||
// Initialize interpreter cache
|
||||
const u64 _break = reinterpret_cast<uptr>(ppu_break);
|
||||
const u64 far_jump = reinterpret_cast<uptr>(ppu_far_jump);
|
||||
|
||||
while (size)
|
||||
{
|
||||
if (ppu_ref(addr) != _break && ppu_ref(addr) != far_jump)
|
||||
if (ppu_ref(addr) != ppu_break && ppu_ref(addr) != ppu_far_jump)
|
||||
{
|
||||
ppu_ref(addr) = ppu_cache(addr);
|
||||
}
|
||||
@ -481,12 +468,12 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr = nu
|
||||
|
||||
extern void ppu_register_function_at(u32 addr, u32 size, u64 ptr)
|
||||
{
|
||||
return ppu_register_function_at(addr, size, reinterpret_cast<ppu_function_t>(ptr));
|
||||
return ppu_register_function_at(addr, size, reinterpret_cast<ppu_intrp_func_t>(ptr));
|
||||
}
|
||||
|
||||
u32 ppu_get_exported_func_addr(u32 fnid, const std::string& module_name);
|
||||
|
||||
bool ppu_return_from_far_jump(ppu_thread& ppu)
|
||||
void ppu_return_from_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t<u32>*, ppu_intrp_func*)
|
||||
{
|
||||
auto& calls_info = ppu.hle_func_calls_with_toc_info;
|
||||
ensure(!calls_info.empty());
|
||||
@ -498,7 +485,6 @@ bool ppu_return_from_far_jump(ppu_thread& ppu)
|
||||
ppu.gpr[2] = restore_info->saved_r2;
|
||||
|
||||
calls_info.pop_back();
|
||||
return false;
|
||||
}
|
||||
|
||||
static const bool s_init_return_far_jump_func = []
|
||||
@ -586,9 +572,9 @@ u32 ppu_get_far_jump(u32 pc)
|
||||
return g_fxo->get<ppu_far_jumps_t>().get_target(pc);
|
||||
}
|
||||
|
||||
static bool ppu_far_jump(ppu_thread& ppu)
|
||||
static void ppu_far_jump(ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*)
|
||||
{
|
||||
const u32 cia = g_fxo->get<ppu_far_jumps_t>().get_target(ppu.cia, &ppu);
|
||||
const u32 cia = g_fxo->get<ppu_far_jumps_t>().get_target(vm::get_addr(this_op), &ppu);
|
||||
|
||||
if (!vm::check_addr(cia, vm::page_executable))
|
||||
{
|
||||
@ -596,7 +582,6 @@ static bool ppu_far_jump(ppu_thread& ppu)
|
||||
}
|
||||
|
||||
ppu.cia = cia;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, std::string module_name)
|
||||
@ -658,7 +643,7 @@ bool ppu_form_branch_to_code(u32 entry, u32 target, bool link, bool with_toc, st
|
||||
auto& jumps = g_fxo->get<ppu_far_jumps_t>();
|
||||
|
||||
std::lock_guard lock(jumps.mutex);
|
||||
jumps.vals.insert_or_assign(entry, std::type_identity_t<typename ppu_far_jumps_t::all_info_t>{target, link, with_toc, std::move(module_name)});
|
||||
jumps.vals.insert_or_assign(entry, ppu_far_jumps_t::all_info_t{target, link, with_toc, std::move(module_name)});
|
||||
ppu_register_function_at(entry, 4, &ppu_far_jump);
|
||||
|
||||
return true;
|
||||
@ -702,10 +687,13 @@ void ppu_remove_hle_instructions(u32 addr, u32 size)
|
||||
atomic_t<bool> g_debugger_pause_all_threads_on_bp = true;
|
||||
|
||||
// Breakpoint entry point
|
||||
static bool ppu_break(ppu_thread& ppu, ppu_opcode_t)
|
||||
static void ppu_break(ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
||||
{
|
||||
const bool pause_all = g_debugger_pause_all_threads_on_bp;
|
||||
|
||||
const u32 old_cia = vm::get_addr(this_op);
|
||||
ppu.cia = old_cia;
|
||||
|
||||
// Pause
|
||||
ppu.state.atomic_op([&](bs_t<cpu_flag>& state)
|
||||
{
|
||||
@ -719,19 +707,14 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t)
|
||||
Emu.CallAfter([]() { Emu.Pause(); });
|
||||
}
|
||||
|
||||
if (ppu.check_state())
|
||||
if (ppu.check_state() || old_cia != atomic_storage<u32>::load(ppu.cia))
|
||||
{
|
||||
return false;
|
||||
// Do not execute if PC changed
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to the interpreter function
|
||||
const u64 val = ppu_cache(ppu.cia);
|
||||
if (reinterpret_cast<decltype(&ppu_interpreter::UNK)>(val)(ppu, {vm::read32(ppu.cia).get()}))
|
||||
{
|
||||
ppu.cia += 4;
|
||||
}
|
||||
|
||||
return false;
|
||||
return ppu_cache(ppu.cia)(ppu, {*this_op}, this_op, next_fn);
|
||||
}
|
||||
|
||||
// Set or remove breakpoint
|
||||
@ -742,11 +725,9 @@ extern bool ppu_breakpoint(u32 addr, bool is_adding)
|
||||
return false;
|
||||
}
|
||||
|
||||
const u64 _break = reinterpret_cast<uptr>(&ppu_break);
|
||||
|
||||
// Remove breakpoint parameters
|
||||
u64 to_set = 0;
|
||||
u64 expected = _break;
|
||||
ppu_intrp_func_t to_set = 0;
|
||||
ppu_intrp_func_t expected = &ppu_break;
|
||||
|
||||
if (u32 hle_addr{}; g_fxo->is_init<ppu_function_manager>() && (hle_addr = g_fxo->get<ppu_function_manager>().addr))
|
||||
{
|
||||
@ -756,7 +737,7 @@ extern bool ppu_breakpoint(u32 addr, bool is_adding)
|
||||
if (addr % 8 == 4 && index < ppu_function_manager::get().size())
|
||||
{
|
||||
// HLE function placement
|
||||
to_set = reinterpret_cast<uptr>(ppu_function_manager::get()[index]);
|
||||
to_set = ppu_function_manager::get()[index];
|
||||
}
|
||||
}
|
||||
|
||||
@ -766,23 +747,21 @@ extern bool ppu_breakpoint(u32 addr, bool is_adding)
|
||||
to_set = ppu_cache(addr);
|
||||
}
|
||||
|
||||
u64& _ref = ppu_ref(addr);
|
||||
ppu_intrp_func_t& _ref = ppu_ref(addr);
|
||||
|
||||
if (is_adding)
|
||||
{
|
||||
// Swap if adding
|
||||
std::swap(to_set, expected);
|
||||
|
||||
const u64 _fall = reinterpret_cast<uptr>(&ppu_fallback);
|
||||
|
||||
if (_ref == _fall)
|
||||
if (_ref == &ppu_fallback)
|
||||
{
|
||||
ppu_log.error("Unregistered instruction replaced with a breakpoint at 0x%08x", addr);
|
||||
expected = _fall;
|
||||
expected = ppu_fallback;
|
||||
}
|
||||
}
|
||||
|
||||
return atomic_storage<u64>::compare_exchange(_ref, expected, to_set);
|
||||
return atomic_storage<ppu_intrp_func_t>::compare_exchange(_ref, expected, to_set);
|
||||
}
|
||||
|
||||
extern bool ppu_patch(u32 addr, u32 value)
|
||||
@ -812,12 +791,9 @@ extern bool ppu_patch(u32 addr, u32 value)
|
||||
|
||||
*vm::get_super_ptr<u32>(addr) = value;
|
||||
|
||||
const u64 _break = reinterpret_cast<uptr>(&ppu_break);
|
||||
const u64 fallback = reinterpret_cast<uptr>(&ppu_fallback);
|
||||
|
||||
if (is_exec)
|
||||
{
|
||||
if (ppu_ref(addr) != _break && ppu_ref(addr) != fallback)
|
||||
if (ppu_ref(addr) != ppu_break && ppu_ref(addr) != ppu_fallback)
|
||||
{
|
||||
ppu_ref(addr) = ppu_cache(addr);
|
||||
}
|
||||
@ -1182,10 +1158,13 @@ void ppu_thread::cpu_task()
|
||||
{
|
||||
std::fesetround(FE_TONEAREST);
|
||||
|
||||
if (g_cfg.core.set_daz_and_ftz && g_cfg.core.ppu_decoder != ppu_decoder_type::precise)
|
||||
if (g_cfg.core.set_daz_and_ftz)
|
||||
{
|
||||
// Set DAZ and FTZ
|
||||
_mm_setcsr(_mm_getcsr() | 0x8840);
|
||||
gv_set_zeroing_denormals();
|
||||
}
|
||||
else
|
||||
{
|
||||
gv_unset_zeroing_denormals();
|
||||
}
|
||||
|
||||
// Execute cmd_queue
|
||||
@ -1197,9 +1176,7 @@ void ppu_thread::cpu_task()
|
||||
{
|
||||
case ppu_cmd::opcode:
|
||||
{
|
||||
cmd_pop(), g_cfg.core.ppu_decoder == ppu_decoder_type::precise
|
||||
? g_ppu_interpreter_precise.decode(arg)(*this, {arg})
|
||||
: g_ppu_interpreter_fast.decode(arg)(*this, {arg});
|
||||
cmd_pop(), g_fxo->get<ppu_interpreter_rt>().decode(arg)(*this, {arg}, vm::_ptr<u32>(cia - 4), &ppu_ret);
|
||||
break;
|
||||
}
|
||||
case ppu_cmd::set_gpr:
|
||||
@ -1236,7 +1213,7 @@ void ppu_thread::cpu_task()
|
||||
}
|
||||
case ppu_cmd::hle_call:
|
||||
{
|
||||
cmd_pop(), ppu_function_manager::get().at(arg)(*this);
|
||||
cmd_pop(), ppu_function_manager::get().at(arg)(*this, {arg}, vm::_ptr<u32>(cia - 4), &ppu_ret);
|
||||
break;
|
||||
}
|
||||
case ppu_cmd::opd_call:
|
||||
@ -1247,8 +1224,8 @@ void ppu_thread::cpu_task()
|
||||
}
|
||||
case ppu_cmd::ptr_call:
|
||||
{
|
||||
const ppu_function_t func = cmd_get(1).as<ppu_function_t>();
|
||||
cmd_pop(1), func(*this);
|
||||
const ppu_intrp_func_t func = cmd_get(1).as<ppu_intrp_func_t>();
|
||||
cmd_pop(1), func(*this, {}, vm::_ptr<u32>(cia - 4), &ppu_ret);
|
||||
break;
|
||||
}
|
||||
case ppu_cmd::initialize:
|
||||
@ -1323,7 +1300,7 @@ void ppu_thread::cpu_on_stop()
|
||||
|
||||
void ppu_thread::exec_task()
|
||||
{
|
||||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
@ -1340,79 +1317,28 @@ void ppu_thread::exec_task()
|
||||
}
|
||||
|
||||
const auto cache = vm::g_exec_addr;
|
||||
using func_t = decltype(&ppu_interpreter::UNK);
|
||||
const auto mem_ = vm::g_base_addr;
|
||||
|
||||
while (true)
|
||||
{
|
||||
const auto exec_op = [this](u64 op)
|
||||
if (test_stopped()) [[unlikely]]
|
||||
{
|
||||
return reinterpret_cast<func_t>(op)(*this, {vm::read32(cia).get()});
|
||||
};
|
||||
|
||||
if (cia % 8 || state) [[unlikely]]
|
||||
{
|
||||
if (test_stopped()) return;
|
||||
|
||||
// Decode single instruction (may be step)
|
||||
if (exec_op(*reinterpret_cast<u64*>(cache + u64{cia} * 2))) { cia += 4; }
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
|
||||
u64 op0, op1, op2, op3;
|
||||
u64 _pos = u64{cia} * 2;
|
||||
gv_zeroupper();
|
||||
|
||||
// Reinitialize
|
||||
{
|
||||
const v128 _op0 = *reinterpret_cast<const v128*>(cache + _pos);
|
||||
const v128 _op1 = *reinterpret_cast<const v128*>(cache + _pos + 16);
|
||||
op0 = _op0._u64[0];
|
||||
op1 = _op0._u64[1];
|
||||
op2 = _op1._u64[0];
|
||||
op3 = _op1._u64[1];
|
||||
}
|
||||
|
||||
while (exec_op(op0)) [[likely]]
|
||||
{
|
||||
cia += 4;
|
||||
|
||||
if (exec_op(op1)) [[likely]]
|
||||
{
|
||||
cia += 4;
|
||||
|
||||
if (exec_op(op2)) [[likely]]
|
||||
{
|
||||
cia += 4;
|
||||
|
||||
if (exec_op(op3)) [[likely]]
|
||||
{
|
||||
cia += 4;
|
||||
|
||||
if (state) [[unlikely]]
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
_pos += 32;
|
||||
const v128 _op0 = *reinterpret_cast<const v128*>(cache + _pos);
|
||||
const v128 _op1 = *reinterpret_cast<const v128*>(cache + _pos + 16);
|
||||
op0 = _op0._u64[0];
|
||||
op1 = _op0._u64[1];
|
||||
op2 = _op1._u64[0];
|
||||
op3 = _op1._u64[1];
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Execute instruction (may be step; execute only one instruction if state)
|
||||
const auto op = reinterpret_cast<be_t<u32>*>(mem_ + u64{cia});
|
||||
const auto fn = reinterpret_cast<ppu_intrp_func*>(cache + u64{cia} * 2);
|
||||
fn->fn(*this, {*op}, op, state ? &ppu_ret : fn + 1);
|
||||
}
|
||||
}
|
||||
|
||||
ppu_thread::~ppu_thread()
|
||||
{
|
||||
perf_log.notice("Perf stats for STCX reload: successs %u, failure %u", last_succ, last_fail);
|
||||
perf_log.notice("Perf stats for instructions: total %u", exec_bytes / 4);
|
||||
}
|
||||
|
||||
ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached)
|
||||
@ -1638,7 +1564,7 @@ void ppu_thread::stack_pop_verbose(u32 addr, u32 size) noexcept
|
||||
ppu_log.error("Invalid thread");
|
||||
}
|
||||
|
||||
extern ppu_function_t ppu_get_syscall(u64 code);
|
||||
extern ppu_intrp_func_t ppu_get_syscall(u64 code);
|
||||
|
||||
void ppu_trap(ppu_thread& ppu, u64 addr)
|
||||
{
|
||||
@ -1728,7 +1654,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
|
||||
{
|
||||
const auto _inst = v128::loadu(inst + i) & mask_vec;
|
||||
|
||||
if (_mm_movemask_epi8(v128::eq32(_inst, store_vec).vi))
|
||||
if (!gv_testz(gv_eq32(_inst, store_vec)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -1817,10 +1743,11 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
|
||||
return ppu_load_acquire_reservation<u64>(ppu, addr);
|
||||
}
|
||||
|
||||
const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>("ppu_stcx_accurate_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>("ppu_stcx_accurate_tx", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
Label _ret = c.newLabel();
|
||||
@ -2024,6 +1951,9 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
|
||||
c.bind(ret2);
|
||||
#endif
|
||||
c.ret();
|
||||
#else
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
template <typename T>
|
||||
@ -2147,7 +2077,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
utils::prefetch_read(ppu.rdata + 64);
|
||||
ppu.last_faddr = addr;
|
||||
ppu.last_ftime = res.load() & -128;
|
||||
ppu.last_ftsc = __rdtsc();
|
||||
ppu.last_ftsc = utils::get_tsc();
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
@ -2249,7 +2179,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
|
||||
ppu.last_faddr = addr;
|
||||
ppu.last_ftime = old_rtime & -128;
|
||||
ppu.last_ftsc = __rdtsc();
|
||||
ppu.last_ftsc = utils::get_tsc();
|
||||
std::memcpy(&ppu.rdata[addr & 0x78], &old_data, 8);
|
||||
}
|
||||
|
||||
@ -2286,7 +2216,7 @@ namespace
|
||||
// Compiled PPU module info
|
||||
struct jit_module
|
||||
{
|
||||
std::vector<ppu_function_t> funcs;
|
||||
std::vector<ppu_intrp_func_t> funcs;
|
||||
std::shared_ptr<jit_compiler> pjit;
|
||||
bool init = false;
|
||||
};
|
||||
@ -2829,7 +2759,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
if (g_cfg.core.ppu_debug && func.size && func.toc != umax)
|
||||
{
|
||||
s_ppu_toc->emplace(func.addr, func.toc);
|
||||
ppu_ref(func.addr) = reinterpret_cast<uptr>(&ppu_check_toc);
|
||||
ppu_ref(func.addr) = &ppu_check_toc;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3022,7 +2952,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
// Fixup some information
|
||||
entry.name = fmt::format("__0x%x", entry.addr - reloc);
|
||||
|
||||
if (has_mfvscr)
|
||||
if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
|
||||
{
|
||||
// TODO
|
||||
entry.attr += ppu_attr::has_mfvscr;
|
||||
@ -3139,13 +3069,15 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
enum class ppu_settings : u32
|
||||
{
|
||||
non_win32,
|
||||
accurate_fma,
|
||||
accurate_ppu_vector_nan,
|
||||
java_mode_handling,
|
||||
accurate_dfma,
|
||||
fixup_vnan,
|
||||
accurate_jm,
|
||||
accurate_cache_line_stores,
|
||||
reservations_128_byte,
|
||||
greedy_mode,
|
||||
has_mfvscr,
|
||||
accurate_sat,
|
||||
accurate_fpcc,
|
||||
accurate_vnan,
|
||||
|
||||
__bitset_enum_max
|
||||
};
|
||||
@ -3155,20 +3087,24 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
#ifndef _WIN32
|
||||
settings += ppu_settings::non_win32;
|
||||
#endif
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
settings += ppu_settings::accurate_fma;
|
||||
if (g_cfg.core.llvm_ppu_accurate_vector_nan)
|
||||
settings += ppu_settings::accurate_ppu_vector_nan;
|
||||
if (g_cfg.core.llvm_ppu_jm_handling)
|
||||
settings += ppu_settings::java_mode_handling;
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
settings += ppu_settings::accurate_dfma;
|
||||
if (g_cfg.core.ppu_fix_vnan)
|
||||
settings += ppu_settings::fixup_vnan;
|
||||
if (g_cfg.core.ppu_use_nj_bit)
|
||||
settings += ppu_settings::accurate_jm;
|
||||
if (has_dcbz == 2)
|
||||
settings += ppu_settings::accurate_cache_line_stores;
|
||||
if (g_cfg.core.ppu_128_reservations_loop_max_length)
|
||||
settings += ppu_settings::reservations_128_byte;
|
||||
if (g_cfg.core.ppu_llvm_greedy_mode)
|
||||
settings += ppu_settings::greedy_mode;
|
||||
if (has_mfvscr)
|
||||
settings += ppu_settings::has_mfvscr;
|
||||
if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
|
||||
settings += ppu_settings::accurate_sat;
|
||||
if (g_cfg.core.ppu_set_fpcc)
|
||||
settings += ppu_settings::accurate_fpcc, fmt::throw_exception("FPCC Not implemented");
|
||||
if (g_cfg.core.ppu_set_vnan)
|
||||
settings += ppu_settings::accurate_vnan, fmt::throw_exception("VNAN Not implemented");
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
fmt::append(obj_name, "v5-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
@ -3319,10 +3255,10 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
if (!func.size) continue;
|
||||
|
||||
const auto name = fmt::format("__0x%x", func.addr - reloc);
|
||||
const auto addr = ensure(reinterpret_cast<ppu_function_t>(jit->get(name)));
|
||||
const auto addr = ensure(reinterpret_cast<ppu_intrp_func_t>(jit->get(name)));
|
||||
jit_mod.funcs.emplace_back(addr);
|
||||
|
||||
if (ppu_ref(func.addr) != reinterpret_cast<u64>(ppu_far_jump))
|
||||
if (ppu_ref(func.addr) != ppu_far_jump)
|
||||
ppu_register_function_at(func.addr, 4, addr);
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
@ -3342,7 +3278,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
|
||||
const u64 addr = reinterpret_cast<uptr>(ensure(jit_mod.funcs[index++]));
|
||||
|
||||
if (ppu_ref(func.addr) != reinterpret_cast<u64>(ppu_far_jump))
|
||||
if (ppu_ref(func.addr) != ppu_far_jump)
|
||||
ppu_register_function_at(func.addr, 4, addr);
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
|
@ -276,6 +276,7 @@ public:
|
||||
u32 last_faddr = 0;
|
||||
u64 last_fail = 0;
|
||||
u64 last_succ = 0;
|
||||
u64 exec_bytes = 0; // Amount of "bytes" executed (4 for each instruction)
|
||||
|
||||
u32 dbg_step_pc = 0;
|
||||
|
||||
|
@ -3,20 +3,19 @@
|
||||
#include "Emu/system_config.h"
|
||||
#include "PPUTranslator.h"
|
||||
#include "PPUThread.h"
|
||||
#include "PPUInterpreter.h"
|
||||
|
||||
#include "util/types.hpp"
|
||||
#include "util/endian.hpp"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
const ppu_decoder<PPUTranslator> s_ppu_decoder;
|
||||
const ppu_decoder<ppu_itype> s_ppu_itype;
|
||||
const ppu_decoder<ppu_iname> s_ppu_iname;
|
||||
extern const ppu_decoder<ppu_itype> g_ppu_itype;
|
||||
extern const ppu_decoder<ppu_iname> g_ppu_iname;
|
||||
|
||||
PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_module& info, ExecutionEngine& engine)
|
||||
: cpu_translator(_module, false)
|
||||
@ -151,7 +150,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
{
|
||||
const u32 op = vm::read32(vm::cast(addr + base));
|
||||
|
||||
switch (s_ppu_itype.decode(op))
|
||||
switch (g_ppu_itype.decode(op))
|
||||
{
|
||||
case ppu_itype::UNK:
|
||||
case ppu_itype::ECIWX:
|
||||
@ -251,7 +250,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
if (m_rel)
|
||||
{
|
||||
// This is very bad. m_rel is normally set to nullptr after a relocation is handled (so it wasn't)
|
||||
ppu_log.error("LLVM: [0x%x] Unsupported relocation(%u) in '%s' (opcode=0x%x '%s'). Please report.", rel_found->first, m_rel->type, m_info.name, op, s_ppu_iname.decode(op));
|
||||
ppu_log.error("LLVM: [0x%x] Unsupported relocation(%u) in '%s' (opcode=0x%x '%s'). Please report.", rel_found->first, m_rel->type, m_info.name, op, g_ppu_iname.decode(op));
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
@ -291,8 +290,8 @@ Value* PPUTranslator::VecHandleDenormal(Value* val)
|
||||
|
||||
Value* PPUTranslator::VecHandleResult(Value* val)
|
||||
{
|
||||
val = g_cfg.core.llvm_ppu_accurate_vector_nan ? VecHandleNan(val) : val;
|
||||
val = g_cfg.core.llvm_ppu_jm_handling ? VecHandleDenormal(val) : val;
|
||||
val = g_cfg.core.ppu_fix_vnan ? VecHandleNan(val) : val;
|
||||
val = g_cfg.core.ppu_use_nj_bit ? VecHandleDenormal(val) : val;
|
||||
return val;
|
||||
}
|
||||
|
||||
@ -391,10 +390,10 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||
const auto pos = m_ir->CreateShl(indirect, 1);
|
||||
const auto ptr = m_ir->CreateGEP(m_exec, pos);
|
||||
const auto val = m_ir->CreateLoad(m_ir->CreateBitCast(ptr, get_type<u64*>()));
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0x7fff'ffff'ffff), type->getPointerTo()));
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0xffff'ffff'ffff), type->getPointerTo()));
|
||||
|
||||
// Load new segment address
|
||||
seg0 = m_ir->CreateShl(m_ir->CreateLShr(val, 47), 12);
|
||||
seg0 = m_ir->CreateShl(m_ir->CreateLShr(val, 48), 13);
|
||||
}
|
||||
|
||||
m_ir->SetInsertPoint(block);
|
||||
@ -640,7 +639,8 @@ void PPUTranslator::CompilationError(const std::string& error)
|
||||
|
||||
void PPUTranslator::MFVSCR(ppu_opcode_t op)
|
||||
{
|
||||
const auto vscr = m_ir->CreateOr(ZExt(IsNotZero(RegLoad(m_sat)), GetType<u32>()), m_ir->CreateShl(ZExt(RegLoad(m_nj), GetType<u32>()), 16));
|
||||
const auto vsat = g_cfg.core.ppu_set_sat_bit ? ZExt(IsNotZero(RegLoad(m_sat)), GetType<u32>()) : m_ir->getInt32(0);
|
||||
const auto vscr = m_ir->CreateOr(vsat, m_ir->CreateShl(ZExt(RegLoad(m_nj), GetType<u32>()), 16));
|
||||
SetVr(op.vd, m_ir->CreateInsertElement(ConstantAggregateZero::get(GetType<u32[4]>()), vscr, m_ir->getInt32(m_is_be ? 3 : 0)));
|
||||
}
|
||||
|
||||
@ -649,8 +649,10 @@ void PPUTranslator::MTVSCR(ppu_opcode_t op)
|
||||
const auto vscr = m_ir->CreateExtractElement(GetVr(op.vb, VrType::vi32), m_ir->getInt32(m_is_be ? 3 : 0));
|
||||
const auto nj = Trunc(m_ir->CreateLShr(vscr, 16), GetType<bool>());
|
||||
RegStore(nj, m_nj);
|
||||
if (g_cfg.core.llvm_ppu_jm_handling) RegStore(m_ir->CreateSelect(nj, m_ir->getInt32(0x7f80'0000), m_ir->getInt32(0x7fff'ffff)), m_jm_mask);
|
||||
RegStore(m_ir->CreateInsertElement(ConstantAggregateZero::get(GetType<u32[4]>()), m_ir->CreateAnd(vscr, 1), m_ir->getInt32(0)), m_sat);
|
||||
if (g_cfg.core.ppu_use_nj_bit)
|
||||
RegStore(m_ir->CreateSelect(nj, m_ir->getInt32(0x7f80'0000), m_ir->getInt32(0x7fff'ffff)), m_jm_mask);
|
||||
if (g_cfg.core.ppu_set_sat_bit)
|
||||
RegStore(m_ir->CreateInsertElement(ConstantAggregateZero::get(GetType<u32[4]>()), m_ir->CreateAnd(vscr, 1), m_ir->getInt32(0)), m_sat);
|
||||
}
|
||||
|
||||
void PPUTranslator::VADDCUW(ppu_opcode_t op)
|
||||
@ -902,10 +904,12 @@ void PPUTranslator::VCTSXS(ppu_opcode_t op)
|
||||
const auto b = get_vr<f32[4]>(op.vb);
|
||||
const auto scaled = b * fsplat<f32[4]>(std::pow(2, 0 + op.vuimm));
|
||||
const auto const1 = fsplat<f32[4]>(-std::pow(2, 31));
|
||||
//const auto is_nan = fcmp_uno(b == b); // NaN -> 0.0
|
||||
const auto sat_l = fcmp_ord(scaled < const1); // TODO ???
|
||||
const auto is_nan = fcmp_uno(b != b);
|
||||
const auto sat_l = fcmp_ord(scaled < const1);
|
||||
const auto sat_h = fcmp_ord(scaled >= fsplat<f32[4]>(std::pow(2, 31)));
|
||||
const auto converted = fpcast<s32[4]>(select(sat_l, const1, scaled));
|
||||
value_t<s32[4]> converted = eval(fpcast<s32[4]>(select(sat_l, const1, scaled)));
|
||||
if (g_cfg.core.ppu_fix_vnan)
|
||||
converted = eval(select(is_nan, splat<s32[4]>(0), converted)); // NaN -> 0
|
||||
set_vr(op.vd, select(sat_h, splat<s32[4]>(0x7fff'ffff), converted));
|
||||
set_sat(sext<s32[4]>(sat_l) | sext<s32[4]>(sat_h));
|
||||
}
|
||||
@ -915,10 +919,12 @@ void PPUTranslator::VCTUXS(ppu_opcode_t op)
|
||||
const auto b = get_vr<f32[4]>(op.vb);
|
||||
const auto scaled = b * fsplat<f32[4]>(std::pow(2, 0 + op.vuimm));
|
||||
const auto const0 = fsplat<f32[4]>(0.);
|
||||
//const auto is_nan = fcmp_uno(b == b); // NaN -> 0.0
|
||||
const auto is_nan = fcmp_uno(b != b);
|
||||
const auto sat_l = fcmp_ord(scaled < const0);
|
||||
const auto sat_h = fcmp_ord(scaled >= fsplat<f32[4]>(std::pow(2, 32))); // TODO ???
|
||||
const auto converted = fpcast<u32[4]>(select(sat_l, const0, scaled));
|
||||
const auto sat_h = fcmp_ord(scaled >= fsplat<f32[4]>(std::pow(2, 32)));
|
||||
value_t<u32[4]> converted = eval(fpcast<u32[4]>(select(sat_l, const0, scaled)));
|
||||
if (g_cfg.core.ppu_fix_vnan)
|
||||
converted = eval(select(is_nan, splat<u32[4]>(0), converted)); // NaN -> 0
|
||||
set_vr(op.vd, select(sat_h, splat<u32[4]>(0xffff'ffff), converted));
|
||||
set_sat(sext<s32[4]>(sat_l) | sext<s32[4]>(sat_h));
|
||||
}
|
||||
@ -1334,7 +1340,7 @@ void PPUTranslator::VPKSHSS(ppu_opcode_t op)
|
||||
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
const auto r = trunc<u8[16]>(min(max(ab, splat<s16[16]>(-0x80)), splat<s16[16]>(0x7f)));
|
||||
set_vr(op.vd, r);
|
||||
set_sat(((a + 0x80) | (b + 0x80)) >> 8);
|
||||
set_sat(bitcast<u16[8]>((a + 0x80) | (b + 0x80)) >> 8);
|
||||
}
|
||||
|
||||
void PPUTranslator::VPKSHUS(ppu_opcode_t op)
|
||||
@ -1344,7 +1350,7 @@ void PPUTranslator::VPKSHUS(ppu_opcode_t op)
|
||||
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
const auto r = trunc<u8[16]>(min(max(ab, splat<s16[16]>(0)), splat<s16[16]>(0xff)));
|
||||
set_vr(op.vd, r);
|
||||
set_sat((a | b) >> 8);
|
||||
set_sat(bitcast<u16[8]>(a | b) >> 8);
|
||||
}
|
||||
|
||||
void PPUTranslator::VPKSWSS(ppu_opcode_t op)
|
||||
@ -1354,7 +1360,7 @@ void PPUTranslator::VPKSWSS(ppu_opcode_t op)
|
||||
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
const auto r = trunc<u16[8]>(min(max(ab, splat<s32[8]>(-0x8000)), splat<s32[8]>(0x7fff)));
|
||||
set_vr(op.vd, r);
|
||||
set_sat(((a + 0x8000) | (b + 0x8000)) >> 16);
|
||||
set_sat(bitcast<u32[4]>((a + 0x8000) | (b + 0x8000)) >> 16);
|
||||
}
|
||||
|
||||
void PPUTranslator::VPKSWUS(ppu_opcode_t op)
|
||||
@ -1364,7 +1370,7 @@ void PPUTranslator::VPKSWUS(ppu_opcode_t op)
|
||||
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
const auto r = trunc<u16[8]>(min(max(ab, splat<s32[8]>(0)), splat<s32[8]>(0xffff)));
|
||||
set_vr(op.vd, r);
|
||||
set_sat((a | b) >> 16);
|
||||
set_sat(bitcast<u32[4]>(a | b) >> 16);
|
||||
}
|
||||
|
||||
void PPUTranslator::VPKUHUM(ppu_opcode_t op)
|
||||
@ -1741,7 +1747,7 @@ void PPUTranslator::VSUMSWS(ppu_opcode_t op)
|
||||
const auto s = eval(x + y + z);
|
||||
const auto r = min(max(zshuffle(s, 0, 2) + zshuffle(s, 1, 2), splat<s64[2]>(-0x8000'0000ll)), splat<s64[2]>(0x7fff'ffff));
|
||||
set_vr(op.vd, zshuffle(bitcast<u32[4]>(r), 0, 4, 4, 4));
|
||||
set_sat((r + 0x8000'0000) >> 32);
|
||||
set_sat(bitcast<u64[2]>(r + 0x8000'0000) >> 32);
|
||||
}
|
||||
|
||||
void PPUTranslator::VSUM2SWS(ppu_opcode_t op)
|
||||
@ -1752,18 +1758,15 @@ void PPUTranslator::VSUM2SWS(ppu_opcode_t op)
|
||||
const auto z = b >> 32;
|
||||
const auto r = min(max(x + y + z, splat<s64[2]>(-0x8000'0000ll)), splat<s64[2]>(0x7fff'ffff));
|
||||
set_vr(op.vd, zshuffle(bitcast<u32[4]>(r), 0, 4, 2, 4));
|
||||
set_sat((r + 0x8000'0000) >> 32);
|
||||
set_sat(bitcast<u64[2]>(r + 0x8000'0000) >> 32);
|
||||
}
|
||||
|
||||
void PPUTranslator::VSUM4SBS(ppu_opcode_t op)
|
||||
{
|
||||
const auto a = get_vr<s32[4]>(op.va);
|
||||
const auto a = get_vr<s16[8]>(op.va);
|
||||
const auto b = get_vr<s32[4]>(op.vb);
|
||||
const auto x = a << 24 >> 24;
|
||||
const auto y = a << 16 >> 24;
|
||||
const auto z = a << 8 >> 24;
|
||||
const auto w = a >> 24;
|
||||
const auto s = eval(x + y + z + w); // Can't overflow
|
||||
const auto x = eval(bitcast<s32[4]>((a << 8 >> 8) + (a >> 8)));
|
||||
const auto s = eval((x << 16 >> 16) + (x >> 16));
|
||||
const auto r = add_sat(s, b);
|
||||
set_vr(op.vd, r);
|
||||
set_sat(r ^ (s + b));
|
||||
@ -1773,9 +1776,7 @@ void PPUTranslator::VSUM4SHS(ppu_opcode_t op)
|
||||
{
|
||||
const auto a = get_vr<s32[4]>(op.va);
|
||||
const auto b = get_vr<s32[4]>(op.vb);
|
||||
const auto x = a << 16 >> 16;
|
||||
const auto y = a >> 16;
|
||||
const auto s = eval(x + y); // Can't overflow
|
||||
const auto s = eval((a << 16 >> 16) + (a >> 16));
|
||||
const auto r = add_sat(s, b);
|
||||
set_vr(op.vd, r);
|
||||
set_sat(r ^ (s + b));
|
||||
@ -1783,13 +1784,10 @@ void PPUTranslator::VSUM4SHS(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::VSUM4UBS(ppu_opcode_t op)
|
||||
{
|
||||
const auto a = get_vr<u32[4]>(op.va);
|
||||
const auto a = get_vr<u16[8]>(op.va);
|
||||
const auto b = get_vr<u32[4]>(op.vb);
|
||||
const auto x = a & 0xff;
|
||||
const auto y = a << 16 >> 24;
|
||||
const auto z = a << 8 >> 24;
|
||||
const auto w = a >> 24;
|
||||
const auto s = eval(x + y + z + w); // Can't overflow
|
||||
const auto x = eval(bitcast<u32[4]>((a & 0xff) + (a >> 8)));
|
||||
const auto s = eval((x & 0xffff) + (x >> 16));
|
||||
const auto r = add_sat(s, b);
|
||||
set_vr(op.vd, r);
|
||||
set_sat(r ^ (s + b));
|
||||
@ -4047,7 +4045,7 @@ void PPUTranslator::FMADDS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
|
||||
}
|
||||
@ -4075,7 +4073,7 @@ void PPUTranslator::FMSUBS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -4103,7 +4101,7 @@ void PPUTranslator::FNMSUBS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -4131,7 +4129,7 @@ void PPUTranslator::FNMADDS(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
|
||||
}
|
||||
@ -4384,7 +4382,7 @@ void PPUTranslator::FMSUB(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -4412,7 +4410,7 @@ void PPUTranslator::FMADD(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), { a, c, b });
|
||||
}
|
||||
@ -4440,7 +4438,7 @@ void PPUTranslator::FNMSUB(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
|
||||
}
|
||||
@ -4468,7 +4466,7 @@ void PPUTranslator::FNMADD(ppu_opcode_t op)
|
||||
const auto c = GetFpr(op.frc);
|
||||
|
||||
llvm::Value* result;
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
{
|
||||
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
|
||||
}
|
||||
|
@ -358,18 +358,31 @@ public:
|
||||
void VCFSX(ppu_opcode_t op);
|
||||
void VCFUX(ppu_opcode_t op);
|
||||
void VCMPBFP(ppu_opcode_t op);
|
||||
void VCMPBFP_(ppu_opcode_t op) { return VCMPBFP(op); }
|
||||
void VCMPEQFP(ppu_opcode_t op);
|
||||
void VCMPEQFP_(ppu_opcode_t op) { return VCMPEQFP(op); }
|
||||
void VCMPEQUB(ppu_opcode_t op);
|
||||
void VCMPEQUB_(ppu_opcode_t op) { return VCMPEQUB(op); }
|
||||
void VCMPEQUH(ppu_opcode_t op);
|
||||
void VCMPEQUH_(ppu_opcode_t op) { return VCMPEQUH(op); }
|
||||
void VCMPEQUW(ppu_opcode_t op);
|
||||
void VCMPEQUW_(ppu_opcode_t op) { return VCMPEQUW(op); }
|
||||
void VCMPGEFP(ppu_opcode_t op);
|
||||
void VCMPGEFP_(ppu_opcode_t op) { return VCMPGEFP(op); }
|
||||
void VCMPGTFP(ppu_opcode_t op);
|
||||
void VCMPGTFP_(ppu_opcode_t op) { return VCMPGTFP(op); }
|
||||
void VCMPGTSB(ppu_opcode_t op);
|
||||
void VCMPGTSB_(ppu_opcode_t op) { return VCMPGTSB(op); }
|
||||
void VCMPGTSH(ppu_opcode_t op);
|
||||
void VCMPGTSH_(ppu_opcode_t op) { return VCMPGTSH(op); }
|
||||
void VCMPGTSW(ppu_opcode_t op);
|
||||
void VCMPGTSW_(ppu_opcode_t op) { return VCMPGTSW(op); }
|
||||
void VCMPGTUB(ppu_opcode_t op);
|
||||
void VCMPGTUB_(ppu_opcode_t op) { return VCMPGTUB(op); }
|
||||
void VCMPGTUH(ppu_opcode_t op);
|
||||
void VCMPGTUH_(ppu_opcode_t op) { return VCMPGTUH(op); }
|
||||
void VCMPGTUW(ppu_opcode_t op);
|
||||
void VCMPGTUW_(ppu_opcode_t op) { return VCMPGTUW(op); }
|
||||
void VCTSXS(ppu_opcode_t op);
|
||||
void VCTUXS(ppu_opcode_t op);
|
||||
void VEXPTEFP(ppu_opcode_t op);
|
||||
@ -717,6 +730,130 @@ public:
|
||||
void FCFID(ppu_opcode_t op);
|
||||
|
||||
void UNK(ppu_opcode_t op);
|
||||
|
||||
void SUBFCO(ppu_opcode_t op) { return SUBFC(op); }
|
||||
void ADDCO(ppu_opcode_t op) { return ADDC(op); }
|
||||
void SUBFO(ppu_opcode_t op) { return SUBF(op); }
|
||||
void NEGO(ppu_opcode_t op) { return NEG(op); }
|
||||
void SUBFEO(ppu_opcode_t op) { return SUBFE(op); }
|
||||
void ADDEO(ppu_opcode_t op) { return ADDE(op); }
|
||||
void SUBFZEO(ppu_opcode_t op) { return SUBFZE(op); }
|
||||
void ADDZEO(ppu_opcode_t op) { return ADDZE(op); }
|
||||
void SUBFMEO(ppu_opcode_t op) { return SUBFME(op); }
|
||||
void MULLDO(ppu_opcode_t op) { return MULLD(op); }
|
||||
void ADDMEO(ppu_opcode_t op) { return ADDME(op); }
|
||||
void MULLWO(ppu_opcode_t op) { return MULLW(op); }
|
||||
void ADDO(ppu_opcode_t op) { return ADD(op); }
|
||||
void DIVDUO(ppu_opcode_t op) { return DIVDU(op); }
|
||||
void DIVWUO(ppu_opcode_t op) { return DIVWU(op); }
|
||||
void DIVDO(ppu_opcode_t op) { return DIVD(op); }
|
||||
void DIVWO(ppu_opcode_t op) { return DIVW(op); }
|
||||
|
||||
void SUBFCO_(ppu_opcode_t op) { return SUBFC(op); }
|
||||
void ADDCO_(ppu_opcode_t op) { return ADDC(op); }
|
||||
void SUBFO_(ppu_opcode_t op) { return SUBF(op); }
|
||||
void NEGO_(ppu_opcode_t op) { return NEG(op); }
|
||||
void SUBFEO_(ppu_opcode_t op) { return SUBFE(op); }
|
||||
void ADDEO_(ppu_opcode_t op) { return ADDE(op); }
|
||||
void SUBFZEO_(ppu_opcode_t op) { return SUBFZE(op); }
|
||||
void ADDZEO_(ppu_opcode_t op) { return ADDZE(op); }
|
||||
void SUBFMEO_(ppu_opcode_t op) { return SUBFME(op); }
|
||||
void MULLDO_(ppu_opcode_t op) { return MULLD(op); }
|
||||
void ADDMEO_(ppu_opcode_t op) { return ADDME(op); }
|
||||
void MULLWO_(ppu_opcode_t op) { return MULLW(op); }
|
||||
void ADDO_(ppu_opcode_t op) { return ADD(op); }
|
||||
void DIVDUO_(ppu_opcode_t op) { return DIVDU(op); }
|
||||
void DIVWUO_(ppu_opcode_t op) { return DIVWU(op); }
|
||||
void DIVDO_(ppu_opcode_t op) { return DIVD(op); }
|
||||
void DIVWO_(ppu_opcode_t op) { return DIVW(op); }
|
||||
|
||||
void RLWIMI_(ppu_opcode_t op) { return RLWIMI(op); }
|
||||
void RLWINM_(ppu_opcode_t op) { return RLWINM(op); }
|
||||
void RLWNM_(ppu_opcode_t op) { return RLWNM(op); }
|
||||
void RLDICL_(ppu_opcode_t op) { return RLDICL(op); }
|
||||
void RLDICR_(ppu_opcode_t op) { return RLDICR(op); }
|
||||
void RLDIC_(ppu_opcode_t op) { return RLDIC(op); }
|
||||
void RLDIMI_(ppu_opcode_t op) { return RLDIMI(op); }
|
||||
void RLDCL_(ppu_opcode_t op) { return RLDCL(op); }
|
||||
void RLDCR_(ppu_opcode_t op) { return RLDCR(op); }
|
||||
void SUBFC_(ppu_opcode_t op) { return SUBFC(op); }
|
||||
void MULHDU_(ppu_opcode_t op) { return MULHDU(op); }
|
||||
void ADDC_(ppu_opcode_t op) { return ADDC(op); }
|
||||
void MULHWU_(ppu_opcode_t op) { return MULHWU(op); }
|
||||
void SLW_(ppu_opcode_t op) { return SLW(op); }
|
||||
void CNTLZW_(ppu_opcode_t op) { return CNTLZW(op); }
|
||||
void SLD_(ppu_opcode_t op) { return SLD(op); }
|
||||
void AND_(ppu_opcode_t op) { return AND(op); }
|
||||
void SUBF_(ppu_opcode_t op) { return SUBF(op); }
|
||||
void CNTLZD_(ppu_opcode_t op) { return CNTLZD(op); }
|
||||
void ANDC_(ppu_opcode_t op) { return ANDC(op); }
|
||||
void MULHD_(ppu_opcode_t op) { return MULHD(op); }
|
||||
void MULHW_(ppu_opcode_t op) { return MULHW(op); }
|
||||
void NEG_(ppu_opcode_t op) { return NEG(op); }
|
||||
void NOR_(ppu_opcode_t op) { return NOR(op); }
|
||||
void SUBFE_(ppu_opcode_t op) { return SUBFE(op); }
|
||||
void ADDE_(ppu_opcode_t op) { return ADDE(op); }
|
||||
void SUBFZE_(ppu_opcode_t op) { return SUBFZE(op); }
|
||||
void ADDZE_(ppu_opcode_t op) { return ADDZE(op); }
|
||||
void MULLD_(ppu_opcode_t op) { return MULLD(op); }
|
||||
void SUBFME_(ppu_opcode_t op) { return SUBFME(op); }
|
||||
void ADDME_(ppu_opcode_t op) { return ADDME(op); }
|
||||
void MULLW_(ppu_opcode_t op) { return MULLW(op); }
|
||||
void ADD_(ppu_opcode_t op) { return ADD(op); }
|
||||
void EQV_(ppu_opcode_t op) { return EQV(op); }
|
||||
void XOR_(ppu_opcode_t op) { return XOR(op); }
|
||||
void ORC_(ppu_opcode_t op) { return ORC(op); }
|
||||
void OR_(ppu_opcode_t op) { return OR(op); }
|
||||
void DIVDU_(ppu_opcode_t op) { return DIVDU(op); }
|
||||
void DIVWU_(ppu_opcode_t op) { return DIVWU(op); }
|
||||
void NAND_(ppu_opcode_t op) { return NAND(op); }
|
||||
void DIVD_(ppu_opcode_t op) { return DIVD(op); }
|
||||
void DIVW_(ppu_opcode_t op) { return DIVW(op); }
|
||||
void SRW_(ppu_opcode_t op) { return SRW(op); }
|
||||
void SRD_(ppu_opcode_t op) { return SRD(op); }
|
||||
void SRAW_(ppu_opcode_t op) { return SRAW(op); }
|
||||
void SRAD_(ppu_opcode_t op) { return SRAD(op); }
|
||||
void SRAWI_(ppu_opcode_t op) { return SRAWI(op); }
|
||||
void SRADI_(ppu_opcode_t op) { return SRADI(op); }
|
||||
void EXTSH_(ppu_opcode_t op) { return EXTSH(op); }
|
||||
void EXTSB_(ppu_opcode_t op) { return EXTSB(op); }
|
||||
void EXTSW_(ppu_opcode_t op) { return EXTSW(op); }
|
||||
void FDIVS_(ppu_opcode_t op) { return FDIVS(op); }
|
||||
void FSUBS_(ppu_opcode_t op) { return FSUBS(op); }
|
||||
void FADDS_(ppu_opcode_t op) { return FADDS(op); }
|
||||
void FSQRTS_(ppu_opcode_t op) { return FSQRTS(op); }
|
||||
void FRES_(ppu_opcode_t op) { return FRES(op); }
|
||||
void FMULS_(ppu_opcode_t op) { return FMULS(op); }
|
||||
void FMADDS_(ppu_opcode_t op) { return FMADDS(op); }
|
||||
void FMSUBS_(ppu_opcode_t op) { return FMSUBS(op); }
|
||||
void FNMSUBS_(ppu_opcode_t op) { return FNMSUBS(op); }
|
||||
void FNMADDS_(ppu_opcode_t op) { return FNMADDS(op); }
|
||||
void MTFSB1_(ppu_opcode_t op) { return MTFSB1(op); }
|
||||
void MTFSB0_(ppu_opcode_t op) { return MTFSB0(op); }
|
||||
void MTFSFI_(ppu_opcode_t op) { return MTFSFI(op); }
|
||||
void MFFS_(ppu_opcode_t op) { return MFFS(op); }
|
||||
void MTFSF_(ppu_opcode_t op) { return MTFSF(op); }
|
||||
void FRSP_(ppu_opcode_t op) { return FRSP(op); }
|
||||
void FCTIW_(ppu_opcode_t op) { return FCTIW(op); }
|
||||
void FCTIWZ_(ppu_opcode_t op) { return FCTIWZ(op); }
|
||||
void FDIV_(ppu_opcode_t op) { return FDIV(op); }
|
||||
void FSUB_(ppu_opcode_t op) { return FSUB(op); }
|
||||
void FADD_(ppu_opcode_t op) { return FADD(op); }
|
||||
void FSQRT_(ppu_opcode_t op) { return FSQRT(op); }
|
||||
void FSEL_(ppu_opcode_t op) { return FSEL(op); }
|
||||
void FMUL_(ppu_opcode_t op) { return FMUL(op); }
|
||||
void FRSQRTE_(ppu_opcode_t op) { return FRSQRTE(op); }
|
||||
void FMSUB_(ppu_opcode_t op) { return FMSUB(op); }
|
||||
void FMADD_(ppu_opcode_t op) { return FMADD(op); }
|
||||
void FNMSUB_(ppu_opcode_t op) { return FNMSUB(op); }
|
||||
void FNMADD_(ppu_opcode_t op) { return FNMADD(op); }
|
||||
void FNEG_(ppu_opcode_t op) { return FNEG(op); }
|
||||
void FMR_(ppu_opcode_t op) { return FMR(op); }
|
||||
void FNABS_(ppu_opcode_t op) { return FNABS(op); }
|
||||
void FABS_(ppu_opcode_t op) { return FABS(op); }
|
||||
void FCTID_(ppu_opcode_t op) { return FCTID(op); }
|
||||
void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); }
|
||||
void FCFID_(ppu_opcode_t op) { return FCFID(op); }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -88,8 +88,6 @@ private:
|
||||
XmmLink XmmGet(s8 reg, XmmType type);
|
||||
|
||||
asmjit::x86::Mem XmmConst(const v128& data);
|
||||
asmjit::x86::Mem XmmConst(const __m128& data);
|
||||
asmjit::x86::Mem XmmConst(const __m128i& data);
|
||||
|
||||
asmjit::x86::Mem get_pc(u32 addr);
|
||||
void branch_fixed(u32 target, bool absolute = false);
|
||||
|
@ -1,2 +1,7 @@
|
||||
#include "stdafx.h"
|
||||
#include "SPUAnalyser.h"
|
||||
#include "SPUOpcodes.h"
|
||||
|
||||
const extern spu_decoder<spu_itype> g_spu_itype{};
|
||||
const extern spu_decoder<spu_iname> g_spu_iname{};
|
||||
const extern spu_decoder<spu_iflag> g_spu_iflag{};
|
||||
|
@ -4,11 +4,12 @@
|
||||
#include "SPUThread.h"
|
||||
|
||||
const spu_decoder<SPUDisAsm> s_spu_disasm;
|
||||
const spu_decoder<spu_itype> s_spu_itype;
|
||||
const spu_decoder<spu_iflag> s_spu_iflag;
|
||||
const extern spu_decoder<spu_itype> g_spu_itype;
|
||||
const extern spu_decoder<spu_iname> g_spu_iname;
|
||||
const extern spu_decoder<spu_iflag> g_spu_iflag;
|
||||
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
|
||||
u32 SPUDisAsm::disasm(u32 pc)
|
||||
{
|
||||
@ -68,7 +69,7 @@ std::pair<bool, v128> SPUDisAsm::try_get_const_value(u32 reg, u32 pc, u32 TTL) c
|
||||
const u32 opcode = *reinterpret_cast<const be_t<u32>*>(m_offset + i);
|
||||
const spu_opcode_t op0{ opcode };
|
||||
|
||||
const auto type = s_spu_itype.decode(opcode);
|
||||
const auto type = g_spu_itype.decode(opcode);
|
||||
|
||||
if (type & spu_itype::branch || type == spu_itype::UNK || !opcode)
|
||||
{
|
||||
@ -101,7 +102,7 @@ std::pair<bool, v128> SPUDisAsm::try_get_const_value(u32 reg, u32 pc, u32 TTL) c
|
||||
var = value;\
|
||||
} void() /*<- Require a semicolon*/
|
||||
|
||||
//const auto flag = s_spu_iflag.decode(opcode);
|
||||
//const auto flag = g_spu_iflag.decode(opcode);
|
||||
|
||||
// TODO: It detects spurious register modifications
|
||||
if (u32 dst = type & spu_itype::_quadrop ? +op0.rt4 : +op0.rt; dst == reg)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,246 +4,39 @@
|
||||
|
||||
class spu_thread;
|
||||
|
||||
using spu_inter_func_t = bool(*)(spu_thread& spu, spu_opcode_t op);
|
||||
using spu_intrp_func_t = bool(*)(spu_thread& spu, spu_opcode_t op);
|
||||
|
||||
template <typename IT>
|
||||
struct spu_interpreter_t;
|
||||
|
||||
struct spu_interpreter
|
||||
{
|
||||
static bool UNK(spu_thread&, spu_opcode_t);
|
||||
static void set_interrupt_status(spu_thread&, spu_opcode_t);
|
||||
|
||||
static bool STOP(spu_thread&, spu_opcode_t);
|
||||
static bool LNOP(spu_thread&, spu_opcode_t);
|
||||
static bool SYNC(spu_thread&, spu_opcode_t);
|
||||
static bool DSYNC(spu_thread&, spu_opcode_t);
|
||||
static bool MFSPR(spu_thread&, spu_opcode_t);
|
||||
static bool RDCH(spu_thread&, spu_opcode_t);
|
||||
static bool RCHCNT(spu_thread&, spu_opcode_t);
|
||||
static bool SF(spu_thread&, spu_opcode_t);
|
||||
static bool OR(spu_thread&, spu_opcode_t);
|
||||
static bool BG(spu_thread&, spu_opcode_t);
|
||||
static bool SFH(spu_thread&, spu_opcode_t);
|
||||
static bool NOR(spu_thread&, spu_opcode_t);
|
||||
static bool ABSDB(spu_thread&, spu_opcode_t);
|
||||
static bool ROT(spu_thread&, spu_opcode_t);
|
||||
static bool ROTM(spu_thread&, spu_opcode_t);
|
||||
static bool ROTMA(spu_thread&, spu_opcode_t);
|
||||
static bool SHL(spu_thread&, spu_opcode_t);
|
||||
static bool ROTH(spu_thread&, spu_opcode_t);
|
||||
static bool ROTHM(spu_thread&, spu_opcode_t);
|
||||
static bool ROTMAH(spu_thread&, spu_opcode_t);
|
||||
static bool SHLH(spu_thread&, spu_opcode_t);
|
||||
static bool ROTI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTMI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTMAI(spu_thread&, spu_opcode_t);
|
||||
static bool SHLI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTHI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTHMI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTMAHI(spu_thread&, spu_opcode_t);
|
||||
static bool SHLHI(spu_thread&, spu_opcode_t);
|
||||
static bool A(spu_thread&, spu_opcode_t);
|
||||
static bool AND(spu_thread&, spu_opcode_t);
|
||||
static bool CG(spu_thread&, spu_opcode_t);
|
||||
static bool AH(spu_thread&, spu_opcode_t);
|
||||
static bool NAND(spu_thread&, spu_opcode_t);
|
||||
static bool AVGB(spu_thread&, spu_opcode_t);
|
||||
static bool MTSPR(spu_thread&, spu_opcode_t);
|
||||
static bool WRCH(spu_thread&, spu_opcode_t);
|
||||
static bool BIZ(spu_thread&, spu_opcode_t);
|
||||
static bool BINZ(spu_thread&, spu_opcode_t);
|
||||
static bool BIHZ(spu_thread&, spu_opcode_t);
|
||||
static bool BIHNZ(spu_thread&, spu_opcode_t);
|
||||
static bool STOPD(spu_thread&, spu_opcode_t);
|
||||
static bool STQX(spu_thread&, spu_opcode_t);
|
||||
static bool BI(spu_thread&, spu_opcode_t);
|
||||
static bool BISL(spu_thread&, spu_opcode_t);
|
||||
static bool IRET(spu_thread&, spu_opcode_t);
|
||||
static bool BISLED(spu_thread&, spu_opcode_t);
|
||||
static bool HBR(spu_thread&, spu_opcode_t);
|
||||
static bool GB(spu_thread&, spu_opcode_t);
|
||||
static bool GBH(spu_thread&, spu_opcode_t);
|
||||
static bool GBB(spu_thread&, spu_opcode_t);
|
||||
static bool FSM(spu_thread&, spu_opcode_t);
|
||||
static bool FSMH(spu_thread&, spu_opcode_t);
|
||||
static bool FSMB(spu_thread&, spu_opcode_t);
|
||||
static bool LQX(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQBYBI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQMBYBI(spu_thread&, spu_opcode_t);
|
||||
static bool SHLQBYBI(spu_thread&, spu_opcode_t);
|
||||
static bool CBX(spu_thread&, spu_opcode_t);
|
||||
static bool CHX(spu_thread&, spu_opcode_t);
|
||||
static bool CWX(spu_thread&, spu_opcode_t);
|
||||
static bool CDX(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQBI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQMBI(spu_thread&, spu_opcode_t);
|
||||
static bool SHLQBI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQBY(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQMBY(spu_thread&, spu_opcode_t);
|
||||
static bool SHLQBY(spu_thread&, spu_opcode_t);
|
||||
static bool ORX(spu_thread&, spu_opcode_t);
|
||||
static bool CBD(spu_thread&, spu_opcode_t);
|
||||
static bool CHD(spu_thread&, spu_opcode_t);
|
||||
static bool CWD(spu_thread&, spu_opcode_t);
|
||||
static bool CDD(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQBII(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQMBII(spu_thread&, spu_opcode_t);
|
||||
static bool SHLQBII(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQBYI(spu_thread&, spu_opcode_t);
|
||||
static bool ROTQMBYI(spu_thread&, spu_opcode_t);
|
||||
static bool SHLQBYI(spu_thread&, spu_opcode_t);
|
||||
static bool NOP(spu_thread&, spu_opcode_t);
|
||||
static bool CGT(spu_thread&, spu_opcode_t);
|
||||
static bool XOR(spu_thread&, spu_opcode_t);
|
||||
static bool CGTH(spu_thread&, spu_opcode_t);
|
||||
static bool EQV(spu_thread&, spu_opcode_t);
|
||||
static bool CGTB(spu_thread&, spu_opcode_t);
|
||||
static bool SUMB(spu_thread&, spu_opcode_t);
|
||||
static bool HGT(spu_thread&, spu_opcode_t);
|
||||
static bool CLZ(spu_thread&, spu_opcode_t);
|
||||
static bool XSWD(spu_thread&, spu_opcode_t);
|
||||
static bool XSHW(spu_thread&, spu_opcode_t);
|
||||
static bool CNTB(spu_thread&, spu_opcode_t);
|
||||
static bool XSBH(spu_thread&, spu_opcode_t);
|
||||
static bool CLGT(spu_thread&, spu_opcode_t);
|
||||
static bool ANDC(spu_thread&, spu_opcode_t);
|
||||
static bool CLGTH(spu_thread&, spu_opcode_t);
|
||||
static bool ORC(spu_thread&, spu_opcode_t);
|
||||
static bool CLGTB(spu_thread&, spu_opcode_t);
|
||||
static bool HLGT(spu_thread&, spu_opcode_t);
|
||||
static bool CEQ(spu_thread&, spu_opcode_t);
|
||||
static bool MPYHHU(spu_thread&, spu_opcode_t);
|
||||
static bool ADDX(spu_thread&, spu_opcode_t);
|
||||
static bool SFX(spu_thread&, spu_opcode_t);
|
||||
static bool CGX(spu_thread&, spu_opcode_t);
|
||||
static bool BGX(spu_thread&, spu_opcode_t);
|
||||
static bool MPYHHA(spu_thread&, spu_opcode_t);
|
||||
static bool MPYHHAU(spu_thread&, spu_opcode_t);
|
||||
static bool MPY(spu_thread&, spu_opcode_t);
|
||||
static bool MPYH(spu_thread&, spu_opcode_t);
|
||||
static bool MPYHH(spu_thread&, spu_opcode_t);
|
||||
static bool MPYS(spu_thread&, spu_opcode_t);
|
||||
static bool CEQH(spu_thread&, spu_opcode_t);
|
||||
static bool MPYU(spu_thread&, spu_opcode_t);
|
||||
static bool CEQB(spu_thread&, spu_opcode_t);
|
||||
static bool HEQ(spu_thread&, spu_opcode_t);
|
||||
static bool BRZ(spu_thread&, spu_opcode_t);
|
||||
static bool STQA(spu_thread&, spu_opcode_t);
|
||||
static bool BRNZ(spu_thread&, spu_opcode_t);
|
||||
static bool BRHZ(spu_thread&, spu_opcode_t);
|
||||
static bool BRHNZ(spu_thread&, spu_opcode_t);
|
||||
static bool STQR(spu_thread&, spu_opcode_t);
|
||||
static bool BRA(spu_thread&, spu_opcode_t);
|
||||
static bool LQA(spu_thread&, spu_opcode_t);
|
||||
static bool BRASL(spu_thread&, spu_opcode_t);
|
||||
static bool BR(spu_thread&, spu_opcode_t);
|
||||
static bool FSMBI(spu_thread&, spu_opcode_t);
|
||||
static bool BRSL(spu_thread&, spu_opcode_t);
|
||||
static bool LQR(spu_thread&, spu_opcode_t);
|
||||
static bool IL(spu_thread&, spu_opcode_t);
|
||||
static bool ILHU(spu_thread&, spu_opcode_t);
|
||||
static bool ILH(spu_thread&, spu_opcode_t);
|
||||
static bool IOHL(spu_thread&, spu_opcode_t);
|
||||
static bool ORI(spu_thread&, spu_opcode_t);
|
||||
static bool ORHI(spu_thread&, spu_opcode_t);
|
||||
static bool ORBI(spu_thread&, spu_opcode_t);
|
||||
static bool SFI(spu_thread&, spu_opcode_t);
|
||||
static bool SFHI(spu_thread&, spu_opcode_t);
|
||||
static bool ANDI(spu_thread&, spu_opcode_t);
|
||||
static bool ANDHI(spu_thread&, spu_opcode_t);
|
||||
static bool ANDBI(spu_thread&, spu_opcode_t);
|
||||
static bool AI(spu_thread&, spu_opcode_t);
|
||||
static bool AHI(spu_thread&, spu_opcode_t);
|
||||
static bool STQD(spu_thread&, spu_opcode_t);
|
||||
static bool LQD(spu_thread&, spu_opcode_t);
|
||||
static bool XORI(spu_thread&, spu_opcode_t);
|
||||
static bool XORHI(spu_thread&, spu_opcode_t);
|
||||
static bool XORBI(spu_thread&, spu_opcode_t);
|
||||
static bool CGTI(spu_thread&, spu_opcode_t);
|
||||
static bool CGTHI(spu_thread&, spu_opcode_t);
|
||||
static bool CGTBI(spu_thread&, spu_opcode_t);
|
||||
static bool HGTI(spu_thread&, spu_opcode_t);
|
||||
static bool CLGTI(spu_thread&, spu_opcode_t);
|
||||
static bool CLGTHI(spu_thread&, spu_opcode_t);
|
||||
static bool CLGTBI(spu_thread&, spu_opcode_t);
|
||||
static bool HLGTI(spu_thread&, spu_opcode_t);
|
||||
static bool MPYI(spu_thread&, spu_opcode_t);
|
||||
static bool MPYUI(spu_thread&, spu_opcode_t);
|
||||
static bool CEQI(spu_thread&, spu_opcode_t);
|
||||
static bool CEQHI(spu_thread&, spu_opcode_t);
|
||||
static bool CEQBI(spu_thread&, spu_opcode_t);
|
||||
static bool HEQI(spu_thread&, spu_opcode_t);
|
||||
static bool HBRA(spu_thread&, spu_opcode_t);
|
||||
static bool HBRR(spu_thread&, spu_opcode_t);
|
||||
static bool ILA(spu_thread&, spu_opcode_t);
|
||||
static bool SELB(spu_thread&, spu_opcode_t);
|
||||
static bool SHUFB(spu_thread&, spu_opcode_t);
|
||||
static bool MPYA(spu_thread&, spu_opcode_t);
|
||||
static bool DFCGT(spu_thread&, spu_opcode_t);
|
||||
static bool DFCMGT(spu_thread&, spu_opcode_t);
|
||||
static bool DFTSV(spu_thread&, spu_opcode_t);
|
||||
static bool DFCEQ(spu_thread&, spu_opcode_t);
|
||||
static bool DFCMEQ(spu_thread&, spu_opcode_t);
|
||||
};
|
||||
|
||||
struct spu_interpreter_fast final : spu_interpreter
|
||||
struct spu_interpreter_rt_base
|
||||
{
|
||||
static bool FREST(spu_thread&, spu_opcode_t);
|
||||
static bool FRSQEST(spu_thread&, spu_opcode_t);
|
||||
static bool FCGT(spu_thread&, spu_opcode_t);
|
||||
static bool FA(spu_thread&, spu_opcode_t);
|
||||
static bool FS(spu_thread&, spu_opcode_t);
|
||||
static bool FM(spu_thread&, spu_opcode_t);
|
||||
static bool FCMGT(spu_thread&, spu_opcode_t);
|
||||
static bool DFA(spu_thread&, spu_opcode_t);
|
||||
static bool DFS(spu_thread&, spu_opcode_t);
|
||||
static bool DFM(spu_thread&, spu_opcode_t);
|
||||
static bool DFMA(spu_thread&, spu_opcode_t);
|
||||
static bool DFMS(spu_thread&, spu_opcode_t);
|
||||
static bool DFNMS(spu_thread&, spu_opcode_t);
|
||||
static bool DFNMA(spu_thread&, spu_opcode_t);
|
||||
static bool FSCRRD(spu_thread&, spu_opcode_t);
|
||||
static bool FESD(spu_thread&, spu_opcode_t);
|
||||
static bool FRDS(spu_thread&, spu_opcode_t);
|
||||
static bool FSCRWR(spu_thread&, spu_opcode_t);
|
||||
static bool FCEQ(spu_thread&, spu_opcode_t);
|
||||
static bool FCMEQ(spu_thread&, spu_opcode_t);
|
||||
static bool FI(spu_thread&, spu_opcode_t);
|
||||
static bool CFLTS(spu_thread&, spu_opcode_t);
|
||||
static bool CFLTU(spu_thread&, spu_opcode_t);
|
||||
static bool CSFLT(spu_thread&, spu_opcode_t);
|
||||
static bool CUFLT(spu_thread&, spu_opcode_t);
|
||||
static bool FNMS(spu_thread&, spu_opcode_t);
|
||||
static bool FMA(spu_thread&, spu_opcode_t);
|
||||
static bool FMS(spu_thread&, spu_opcode_t);
|
||||
protected:
|
||||
std::unique_ptr<spu_interpreter_t<spu_intrp_func_t>> ptrs;
|
||||
|
||||
spu_interpreter_rt_base() noexcept;
|
||||
|
||||
spu_interpreter_rt_base(const spu_interpreter_rt_base&) = delete;
|
||||
|
||||
spu_interpreter_rt_base& operator=(const spu_interpreter_rt_base&) = delete;
|
||||
|
||||
virtual ~spu_interpreter_rt_base();
|
||||
};
|
||||
|
||||
struct spu_interpreter_precise final : spu_interpreter
|
||||
struct spu_interpreter_rt : spu_interpreter_rt_base
|
||||
{
|
||||
static bool FREST(spu_thread&, spu_opcode_t);
|
||||
static bool FRSQEST(spu_thread&, spu_opcode_t);
|
||||
static bool FCGT(spu_thread&, spu_opcode_t);
|
||||
static bool FA(spu_thread&, spu_opcode_t);
|
||||
static bool FS(spu_thread&, spu_opcode_t);
|
||||
static bool FM(spu_thread&, spu_opcode_t);
|
||||
static bool FCMGT(spu_thread&, spu_opcode_t);
|
||||
static bool DFA(spu_thread&, spu_opcode_t);
|
||||
static bool DFS(spu_thread&, spu_opcode_t);
|
||||
static bool DFM(spu_thread&, spu_opcode_t);
|
||||
static bool DFMA(spu_thread&, spu_opcode_t);
|
||||
static bool DFMS(spu_thread&, spu_opcode_t);
|
||||
static bool DFNMS(spu_thread&, spu_opcode_t);
|
||||
static bool DFNMA(spu_thread&, spu_opcode_t);
|
||||
static bool FSCRRD(spu_thread&, spu_opcode_t);
|
||||
static bool FESD(spu_thread&, spu_opcode_t);
|
||||
static bool FRDS(spu_thread&, spu_opcode_t);
|
||||
static bool FSCRWR(spu_thread&, spu_opcode_t);
|
||||
static bool FCEQ(spu_thread&, spu_opcode_t);
|
||||
static bool FCMEQ(spu_thread&, spu_opcode_t);
|
||||
static bool FI(spu_thread&, spu_opcode_t);
|
||||
static bool CFLTS(spu_thread&, spu_opcode_t);
|
||||
static bool CFLTU(spu_thread&, spu_opcode_t);
|
||||
static bool CSFLT(spu_thread&, spu_opcode_t);
|
||||
static bool CUFLT(spu_thread&, spu_opcode_t);
|
||||
static bool FNMS(spu_thread&, spu_opcode_t);
|
||||
static bool FMA(spu_thread&, spu_opcode_t);
|
||||
static bool FMS(spu_thread&, spu_opcode_t);
|
||||
spu_interpreter_rt() noexcept;
|
||||
|
||||
spu_intrp_func_t decode(u32 op) const noexcept
|
||||
{
|
||||
return table.decode(op);
|
||||
}
|
||||
|
||||
private:
|
||||
spu_decoder<spu_interpreter_t<spu_intrp_func_t>, spu_intrp_func_t> table;
|
||||
};
|
||||
|
@ -71,215 +71,227 @@ class spu_decoder
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
spu_decoder() noexcept
|
||||
// Helper
|
||||
static const D& _first(const D& arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
|
||||
public:
|
||||
template <typename... Args>
|
||||
spu_decoder(const Args&... args) noexcept
|
||||
{
|
||||
// If an object is passed to the constructor, assign values from that object
|
||||
#define GET(name) [&]{ if constexpr (sizeof...(Args) > 0) return _first(args...).name; else return &D::name; }()
|
||||
|
||||
static_assert(sizeof...(Args) <= 1);
|
||||
|
||||
const std::initializer_list<instruction_info> instructions
|
||||
{
|
||||
{ 0, 0x0, &D::STOP },
|
||||
{ 0, 0x1, &D::LNOP },
|
||||
{ 0, 0x2, &D::SYNC },
|
||||
{ 0, 0x3, &D::DSYNC },
|
||||
{ 0, 0xc, &D::MFSPR },
|
||||
{ 0, 0xd, &D::RDCH },
|
||||
{ 0, 0xf, &D::RCHCNT },
|
||||
{ 0, 0x40, &D::SF },
|
||||
{ 0, 0x41, &D::OR },
|
||||
{ 0, 0x42, &D::BG },
|
||||
{ 0, 0x48, &D::SFH },
|
||||
{ 0, 0x49, &D::NOR },
|
||||
{ 0, 0x53, &D::ABSDB },
|
||||
{ 0, 0x58, &D::ROT },
|
||||
{ 0, 0x59, &D::ROTM },
|
||||
{ 0, 0x5a, &D::ROTMA },
|
||||
{ 0, 0x5b, &D::SHL },
|
||||
{ 0, 0x5c, &D::ROTH },
|
||||
{ 0, 0x5d, &D::ROTHM },
|
||||
{ 0, 0x5e, &D::ROTMAH },
|
||||
{ 0, 0x5f, &D::SHLH },
|
||||
{ 0, 0x78, &D::ROTI },
|
||||
{ 0, 0x79, &D::ROTMI },
|
||||
{ 0, 0x7a, &D::ROTMAI },
|
||||
{ 0, 0x7b, &D::SHLI },
|
||||
{ 0, 0x7c, &D::ROTHI },
|
||||
{ 0, 0x7d, &D::ROTHMI },
|
||||
{ 0, 0x7e, &D::ROTMAHI },
|
||||
{ 0, 0x7f, &D::SHLHI },
|
||||
{ 0, 0xc0, &D::A },
|
||||
{ 0, 0xc1, &D::AND },
|
||||
{ 0, 0xc2, &D::CG },
|
||||
{ 0, 0xc8, &D::AH },
|
||||
{ 0, 0xc9, &D::NAND },
|
||||
{ 0, 0xd3, &D::AVGB },
|
||||
{ 0, 0x10c, &D::MTSPR },
|
||||
{ 0, 0x10d, &D::WRCH },
|
||||
{ 0, 0x128, &D::BIZ },
|
||||
{ 0, 0x129, &D::BINZ },
|
||||
{ 0, 0x12a, &D::BIHZ },
|
||||
{ 0, 0x12b, &D::BIHNZ },
|
||||
{ 0, 0x140, &D::STOPD },
|
||||
{ 0, 0x144, &D::STQX },
|
||||
{ 0, 0x1a8, &D::BI },
|
||||
{ 0, 0x1a9, &D::BISL },
|
||||
{ 0, 0x1aa, &D::IRET },
|
||||
{ 0, 0x1ab, &D::BISLED },
|
||||
{ 0, 0x1ac, &D::HBR },
|
||||
{ 0, 0x1b0, &D::GB },
|
||||
{ 0, 0x1b1, &D::GBH },
|
||||
{ 0, 0x1b2, &D::GBB },
|
||||
{ 0, 0x1b4, &D::FSM },
|
||||
{ 0, 0x1b5, &D::FSMH },
|
||||
{ 0, 0x1b6, &D::FSMB },
|
||||
{ 0, 0x1b8, &D::FREST },
|
||||
{ 0, 0x1b9, &D::FRSQEST },
|
||||
{ 0, 0x1c4, &D::LQX },
|
||||
{ 0, 0x1cc, &D::ROTQBYBI },
|
||||
{ 0, 0x1cd, &D::ROTQMBYBI },
|
||||
{ 0, 0x1cf, &D::SHLQBYBI },
|
||||
{ 0, 0x1d4, &D::CBX },
|
||||
{ 0, 0x1d5, &D::CHX },
|
||||
{ 0, 0x1d6, &D::CWX },
|
||||
{ 0, 0x1d7, &D::CDX },
|
||||
{ 0, 0x1d8, &D::ROTQBI },
|
||||
{ 0, 0x1d9, &D::ROTQMBI },
|
||||
{ 0, 0x1db, &D::SHLQBI },
|
||||
{ 0, 0x1dc, &D::ROTQBY },
|
||||
{ 0, 0x1dd, &D::ROTQMBY },
|
||||
{ 0, 0x1df, &D::SHLQBY },
|
||||
{ 0, 0x1f0, &D::ORX },
|
||||
{ 0, 0x1f4, &D::CBD },
|
||||
{ 0, 0x1f5, &D::CHD },
|
||||
{ 0, 0x1f6, &D::CWD },
|
||||
{ 0, 0x1f7, &D::CDD },
|
||||
{ 0, 0x1f8, &D::ROTQBII },
|
||||
{ 0, 0x1f9, &D::ROTQMBII },
|
||||
{ 0, 0x1fb, &D::SHLQBII },
|
||||
{ 0, 0x1fc, &D::ROTQBYI },
|
||||
{ 0, 0x1fd, &D::ROTQMBYI },
|
||||
{ 0, 0x1ff, &D::SHLQBYI },
|
||||
{ 0, 0x201, &D::NOP },
|
||||
{ 0, 0x240, &D::CGT },
|
||||
{ 0, 0x241, &D::XOR },
|
||||
{ 0, 0x248, &D::CGTH },
|
||||
{ 0, 0x249, &D::EQV },
|
||||
{ 0, 0x250, &D::CGTB },
|
||||
{ 0, 0x253, &D::SUMB },
|
||||
{ 0, 0x258, &D::HGT },
|
||||
{ 0, 0x2a5, &D::CLZ },
|
||||
{ 0, 0x2a6, &D::XSWD },
|
||||
{ 0, 0x2ae, &D::XSHW },
|
||||
{ 0, 0x2b4, &D::CNTB },
|
||||
{ 0, 0x2b6, &D::XSBH },
|
||||
{ 0, 0x2c0, &D::CLGT },
|
||||
{ 0, 0x2c1, &D::ANDC },
|
||||
{ 0, 0x2c2, &D::FCGT },
|
||||
{ 0, 0x2c3, &D::DFCGT },
|
||||
{ 0, 0x2c4, &D::FA },
|
||||
{ 0, 0x2c5, &D::FS },
|
||||
{ 0, 0x2c6, &D::FM },
|
||||
{ 0, 0x2c8, &D::CLGTH },
|
||||
{ 0, 0x2c9, &D::ORC },
|
||||
{ 0, 0x2ca, &D::FCMGT },
|
||||
{ 0, 0x2cb, &D::DFCMGT },
|
||||
{ 0, 0x2cc, &D::DFA },
|
||||
{ 0, 0x2cd, &D::DFS },
|
||||
{ 0, 0x2ce, &D::DFM },
|
||||
{ 0, 0x2d0, &D::CLGTB },
|
||||
{ 0, 0x2d8, &D::HLGT },
|
||||
{ 0, 0x35c, &D::DFMA },
|
||||
{ 0, 0x35d, &D::DFMS },
|
||||
{ 0, 0x35e, &D::DFNMS },
|
||||
{ 0, 0x35f, &D::DFNMA },
|
||||
{ 0, 0x3c0, &D::CEQ },
|
||||
{ 0, 0x3ce, &D::MPYHHU },
|
||||
{ 0, 0x340, &D::ADDX },
|
||||
{ 0, 0x341, &D::SFX },
|
||||
{ 0, 0x342, &D::CGX },
|
||||
{ 0, 0x343, &D::BGX },
|
||||
{ 0, 0x346, &D::MPYHHA },
|
||||
{ 0, 0x34e, &D::MPYHHAU },
|
||||
{ 0, 0x398, &D::FSCRRD },
|
||||
{ 0, 0x3b8, &D::FESD },
|
||||
{ 0, 0x3b9, &D::FRDS },
|
||||
{ 0, 0x3ba, &D::FSCRWR },
|
||||
{ 0, 0x3bf, &D::DFTSV },
|
||||
{ 0, 0x3c2, &D::FCEQ },
|
||||
{ 0, 0x3c3, &D::DFCEQ },
|
||||
{ 0, 0x3c4, &D::MPY },
|
||||
{ 0, 0x3c5, &D::MPYH },
|
||||
{ 0, 0x3c6, &D::MPYHH },
|
||||
{ 0, 0x3c7, &D::MPYS },
|
||||
{ 0, 0x3c8, &D::CEQH },
|
||||
{ 0, 0x3ca, &D::FCMEQ },
|
||||
{ 0, 0x3cb, &D::DFCMEQ },
|
||||
{ 0, 0x3cc, &D::MPYU },
|
||||
{ 0, 0x3d0, &D::CEQB },
|
||||
{ 0, 0x3d4, &D::FI },
|
||||
{ 0, 0x3d8, &D::HEQ },
|
||||
{ 1, 0x1d8, &D::CFLTS },
|
||||
{ 1, 0x1d9, &D::CFLTU },
|
||||
{ 1, 0x1da, &D::CSFLT },
|
||||
{ 1, 0x1db, &D::CUFLT },
|
||||
{ 2, 0x40, &D::BRZ },
|
||||
{ 2, 0x41, &D::STQA },
|
||||
{ 2, 0x42, &D::BRNZ },
|
||||
{ 2, 0x44, &D::BRHZ },
|
||||
{ 2, 0x46, &D::BRHNZ },
|
||||
{ 2, 0x47, &D::STQR },
|
||||
{ 2, 0x60, &D::BRA },
|
||||
{ 2, 0x61, &D::LQA },
|
||||
{ 2, 0x62, &D::BRASL },
|
||||
{ 2, 0x64, &D::BR },
|
||||
{ 2, 0x65, &D::FSMBI },
|
||||
{ 2, 0x66, &D::BRSL },
|
||||
{ 2, 0x67, &D::LQR },
|
||||
{ 2, 0x81, &D::IL },
|
||||
{ 2, 0x82, &D::ILHU },
|
||||
{ 2, 0x83, &D::ILH },
|
||||
{ 2, 0xc1, &D::IOHL },
|
||||
{ 3, 0x4, &D::ORI },
|
||||
{ 3, 0x5, &D::ORHI },
|
||||
{ 3, 0x6, &D::ORBI },
|
||||
{ 3, 0xc, &D::SFI },
|
||||
{ 3, 0xd, &D::SFHI },
|
||||
{ 3, 0x14, &D::ANDI },
|
||||
{ 3, 0x15, &D::ANDHI },
|
||||
{ 3, 0x16, &D::ANDBI },
|
||||
{ 3, 0x1c, &D::AI },
|
||||
{ 3, 0x1d, &D::AHI },
|
||||
{ 3, 0x24, &D::STQD },
|
||||
{ 3, 0x34, &D::LQD },
|
||||
{ 3, 0x44, &D::XORI },
|
||||
{ 3, 0x45, &D::XORHI },
|
||||
{ 3, 0x46, &D::XORBI },
|
||||
{ 3, 0x4c, &D::CGTI },
|
||||
{ 3, 0x4d, &D::CGTHI },
|
||||
{ 3, 0x4e, &D::CGTBI },
|
||||
{ 3, 0x4f, &D::HGTI },
|
||||
{ 3, 0x5c, &D::CLGTI },
|
||||
{ 3, 0x5d, &D::CLGTHI },
|
||||
{ 3, 0x5e, &D::CLGTBI },
|
||||
{ 3, 0x5f, &D::HLGTI },
|
||||
{ 3, 0x74, &D::MPYI },
|
||||
{ 3, 0x75, &D::MPYUI },
|
||||
{ 3, 0x7c, &D::CEQI },
|
||||
{ 3, 0x7d, &D::CEQHI },
|
||||
{ 3, 0x7e, &D::CEQBI },
|
||||
{ 3, 0x7f, &D::HEQI },
|
||||
{ 4, 0x8, &D::HBRA },
|
||||
{ 4, 0x9, &D::HBRR },
|
||||
{ 4, 0x21, &D::ILA },
|
||||
{ 7, 0x8, &D::SELB },
|
||||
{ 7, 0xb, &D::SHUFB },
|
||||
{ 7, 0xc, &D::MPYA },
|
||||
{ 7, 0xd, &D::FNMS },
|
||||
{ 7, 0xe, &D::FMA },
|
||||
{ 7, 0xf, &D::FMS },
|
||||
{ 0, 0x0, GET(STOP) },
|
||||
{ 0, 0x1, GET(LNOP) },
|
||||
{ 0, 0x2, GET(SYNC) },
|
||||
{ 0, 0x3, GET(DSYNC) },
|
||||
{ 0, 0xc, GET(MFSPR) },
|
||||
{ 0, 0xd, GET(RDCH) },
|
||||
{ 0, 0xf, GET(RCHCNT) },
|
||||
{ 0, 0x40, GET(SF) },
|
||||
{ 0, 0x41, GET(OR) },
|
||||
{ 0, 0x42, GET(BG) },
|
||||
{ 0, 0x48, GET(SFH) },
|
||||
{ 0, 0x49, GET(NOR) },
|
||||
{ 0, 0x53, GET(ABSDB) },
|
||||
{ 0, 0x58, GET(ROT) },
|
||||
{ 0, 0x59, GET(ROTM) },
|
||||
{ 0, 0x5a, GET(ROTMA) },
|
||||
{ 0, 0x5b, GET(SHL) },
|
||||
{ 0, 0x5c, GET(ROTH) },
|
||||
{ 0, 0x5d, GET(ROTHM) },
|
||||
{ 0, 0x5e, GET(ROTMAH) },
|
||||
{ 0, 0x5f, GET(SHLH) },
|
||||
{ 0, 0x78, GET(ROTI) },
|
||||
{ 0, 0x79, GET(ROTMI) },
|
||||
{ 0, 0x7a, GET(ROTMAI) },
|
||||
{ 0, 0x7b, GET(SHLI) },
|
||||
{ 0, 0x7c, GET(ROTHI) },
|
||||
{ 0, 0x7d, GET(ROTHMI) },
|
||||
{ 0, 0x7e, GET(ROTMAHI) },
|
||||
{ 0, 0x7f, GET(SHLHI) },
|
||||
{ 0, 0xc0, GET(A) },
|
||||
{ 0, 0xc1, GET(AND) },
|
||||
{ 0, 0xc2, GET(CG) },
|
||||
{ 0, 0xc8, GET(AH) },
|
||||
{ 0, 0xc9, GET(NAND) },
|
||||
{ 0, 0xd3, GET(AVGB) },
|
||||
{ 0, 0x10c, GET(MTSPR) },
|
||||
{ 0, 0x10d, GET(WRCH) },
|
||||
{ 0, 0x128, GET(BIZ) },
|
||||
{ 0, 0x129, GET(BINZ) },
|
||||
{ 0, 0x12a, GET(BIHZ) },
|
||||
{ 0, 0x12b, GET(BIHNZ) },
|
||||
{ 0, 0x140, GET(STOPD) },
|
||||
{ 0, 0x144, GET(STQX) },
|
||||
{ 0, 0x1a8, GET(BI) },
|
||||
{ 0, 0x1a9, GET(BISL) },
|
||||
{ 0, 0x1aa, GET(IRET) },
|
||||
{ 0, 0x1ab, GET(BISLED) },
|
||||
{ 0, 0x1ac, GET(HBR) },
|
||||
{ 0, 0x1b0, GET(GB) },
|
||||
{ 0, 0x1b1, GET(GBH) },
|
||||
{ 0, 0x1b2, GET(GBB) },
|
||||
{ 0, 0x1b4, GET(FSM) },
|
||||
{ 0, 0x1b5, GET(FSMH) },
|
||||
{ 0, 0x1b6, GET(FSMB) },
|
||||
{ 0, 0x1b8, GET(FREST) },
|
||||
{ 0, 0x1b9, GET(FRSQEST) },
|
||||
{ 0, 0x1c4, GET(LQX) },
|
||||
{ 0, 0x1cc, GET(ROTQBYBI) },
|
||||
{ 0, 0x1cd, GET(ROTQMBYBI) },
|
||||
{ 0, 0x1cf, GET(SHLQBYBI) },
|
||||
{ 0, 0x1d4, GET(CBX) },
|
||||
{ 0, 0x1d5, GET(CHX) },
|
||||
{ 0, 0x1d6, GET(CWX) },
|
||||
{ 0, 0x1d7, GET(CDX) },
|
||||
{ 0, 0x1d8, GET(ROTQBI) },
|
||||
{ 0, 0x1d9, GET(ROTQMBI) },
|
||||
{ 0, 0x1db, GET(SHLQBI) },
|
||||
{ 0, 0x1dc, GET(ROTQBY) },
|
||||
{ 0, 0x1dd, GET(ROTQMBY) },
|
||||
{ 0, 0x1df, GET(SHLQBY) },
|
||||
{ 0, 0x1f0, GET(ORX) },
|
||||
{ 0, 0x1f4, GET(CBD) },
|
||||
{ 0, 0x1f5, GET(CHD) },
|
||||
{ 0, 0x1f6, GET(CWD) },
|
||||
{ 0, 0x1f7, GET(CDD) },
|
||||
{ 0, 0x1f8, GET(ROTQBII) },
|
||||
{ 0, 0x1f9, GET(ROTQMBII) },
|
||||
{ 0, 0x1fb, GET(SHLQBII) },
|
||||
{ 0, 0x1fc, GET(ROTQBYI) },
|
||||
{ 0, 0x1fd, GET(ROTQMBYI) },
|
||||
{ 0, 0x1ff, GET(SHLQBYI) },
|
||||
{ 0, 0x201, GET(NOP) },
|
||||
{ 0, 0x240, GET(CGT) },
|
||||
{ 0, 0x241, GET(XOR) },
|
||||
{ 0, 0x248, GET(CGTH) },
|
||||
{ 0, 0x249, GET(EQV) },
|
||||
{ 0, 0x250, GET(CGTB) },
|
||||
{ 0, 0x253, GET(SUMB) },
|
||||
{ 0, 0x258, GET(HGT) },
|
||||
{ 0, 0x2a5, GET(CLZ) },
|
||||
{ 0, 0x2a6, GET(XSWD) },
|
||||
{ 0, 0x2ae, GET(XSHW) },
|
||||
{ 0, 0x2b4, GET(CNTB) },
|
||||
{ 0, 0x2b6, GET(XSBH) },
|
||||
{ 0, 0x2c0, GET(CLGT) },
|
||||
{ 0, 0x2c1, GET(ANDC) },
|
||||
{ 0, 0x2c2, GET(FCGT) },
|
||||
{ 0, 0x2c3, GET(DFCGT) },
|
||||
{ 0, 0x2c4, GET(FA) },
|
||||
{ 0, 0x2c5, GET(FS) },
|
||||
{ 0, 0x2c6, GET(FM) },
|
||||
{ 0, 0x2c8, GET(CLGTH) },
|
||||
{ 0, 0x2c9, GET(ORC) },
|
||||
{ 0, 0x2ca, GET(FCMGT) },
|
||||
{ 0, 0x2cb, GET(DFCMGT) },
|
||||
{ 0, 0x2cc, GET(DFA) },
|
||||
{ 0, 0x2cd, GET(DFS) },
|
||||
{ 0, 0x2ce, GET(DFM) },
|
||||
{ 0, 0x2d0, GET(CLGTB) },
|
||||
{ 0, 0x2d8, GET(HLGT) },
|
||||
{ 0, 0x35c, GET(DFMA) },
|
||||
{ 0, 0x35d, GET(DFMS) },
|
||||
{ 0, 0x35e, GET(DFNMS) },
|
||||
{ 0, 0x35f, GET(DFNMA) },
|
||||
{ 0, 0x3c0, GET(CEQ) },
|
||||
{ 0, 0x3ce, GET(MPYHHU) },
|
||||
{ 0, 0x340, GET(ADDX) },
|
||||
{ 0, 0x341, GET(SFX) },
|
||||
{ 0, 0x342, GET(CGX) },
|
||||
{ 0, 0x343, GET(BGX) },
|
||||
{ 0, 0x346, GET(MPYHHA) },
|
||||
{ 0, 0x34e, GET(MPYHHAU) },
|
||||
{ 0, 0x398, GET(FSCRRD) },
|
||||
{ 0, 0x3b8, GET(FESD) },
|
||||
{ 0, 0x3b9, GET(FRDS) },
|
||||
{ 0, 0x3ba, GET(FSCRWR) },
|
||||
{ 0, 0x3bf, GET(DFTSV) },
|
||||
{ 0, 0x3c2, GET(FCEQ) },
|
||||
{ 0, 0x3c3, GET(DFCEQ) },
|
||||
{ 0, 0x3c4, GET(MPY) },
|
||||
{ 0, 0x3c5, GET(MPYH) },
|
||||
{ 0, 0x3c6, GET(MPYHH) },
|
||||
{ 0, 0x3c7, GET(MPYS) },
|
||||
{ 0, 0x3c8, GET(CEQH) },
|
||||
{ 0, 0x3ca, GET(FCMEQ) },
|
||||
{ 0, 0x3cb, GET(DFCMEQ) },
|
||||
{ 0, 0x3cc, GET(MPYU) },
|
||||
{ 0, 0x3d0, GET(CEQB) },
|
||||
{ 0, 0x3d4, GET(FI) },
|
||||
{ 0, 0x3d8, GET(HEQ) },
|
||||
{ 1, 0x1d8, GET(CFLTS) },
|
||||
{ 1, 0x1d9, GET(CFLTU) },
|
||||
{ 1, 0x1da, GET(CSFLT) },
|
||||
{ 1, 0x1db, GET(CUFLT) },
|
||||
{ 2, 0x40, GET(BRZ) },
|
||||
{ 2, 0x41, GET(STQA) },
|
||||
{ 2, 0x42, GET(BRNZ) },
|
||||
{ 2, 0x44, GET(BRHZ) },
|
||||
{ 2, 0x46, GET(BRHNZ) },
|
||||
{ 2, 0x47, GET(STQR) },
|
||||
{ 2, 0x60, GET(BRA) },
|
||||
{ 2, 0x61, GET(LQA) },
|
||||
{ 2, 0x62, GET(BRASL) },
|
||||
{ 2, 0x64, GET(BR) },
|
||||
{ 2, 0x65, GET(FSMBI) },
|
||||
{ 2, 0x66, GET(BRSL) },
|
||||
{ 2, 0x67, GET(LQR) },
|
||||
{ 2, 0x81, GET(IL) },
|
||||
{ 2, 0x82, GET(ILHU) },
|
||||
{ 2, 0x83, GET(ILH) },
|
||||
{ 2, 0xc1, GET(IOHL) },
|
||||
{ 3, 0x4, GET(ORI) },
|
||||
{ 3, 0x5, GET(ORHI) },
|
||||
{ 3, 0x6, GET(ORBI) },
|
||||
{ 3, 0xc, GET(SFI) },
|
||||
{ 3, 0xd, GET(SFHI) },
|
||||
{ 3, 0x14, GET(ANDI) },
|
||||
{ 3, 0x15, GET(ANDHI) },
|
||||
{ 3, 0x16, GET(ANDBI) },
|
||||
{ 3, 0x1c, GET(AI) },
|
||||
{ 3, 0x1d, GET(AHI) },
|
||||
{ 3, 0x24, GET(STQD) },
|
||||
{ 3, 0x34, GET(LQD) },
|
||||
{ 3, 0x44, GET(XORI) },
|
||||
{ 3, 0x45, GET(XORHI) },
|
||||
{ 3, 0x46, GET(XORBI) },
|
||||
{ 3, 0x4c, GET(CGTI) },
|
||||
{ 3, 0x4d, GET(CGTHI) },
|
||||
{ 3, 0x4e, GET(CGTBI) },
|
||||
{ 3, 0x4f, GET(HGTI) },
|
||||
{ 3, 0x5c, GET(CLGTI) },
|
||||
{ 3, 0x5d, GET(CLGTHI) },
|
||||
{ 3, 0x5e, GET(CLGTBI) },
|
||||
{ 3, 0x5f, GET(HLGTI) },
|
||||
{ 3, 0x74, GET(MPYI) },
|
||||
{ 3, 0x75, GET(MPYUI) },
|
||||
{ 3, 0x7c, GET(CEQI) },
|
||||
{ 3, 0x7d, GET(CEQHI) },
|
||||
{ 3, 0x7e, GET(CEQBI) },
|
||||
{ 3, 0x7f, GET(HEQI) },
|
||||
{ 4, 0x8, GET(HBRA) },
|
||||
{ 4, 0x9, GET(HBRR) },
|
||||
{ 4, 0x21, GET(ILA) },
|
||||
{ 7, 0x8, GET(SELB) },
|
||||
{ 7, 0xb, GET(SHUFB) },
|
||||
{ 7, 0xc, GET(MPYA) },
|
||||
{ 7, 0xd, GET(FNMS) },
|
||||
{ 7, 0xe, GET(FMA) },
|
||||
{ 7, 0xf, GET(FMS) },
|
||||
};
|
||||
|
||||
for (auto& x : m_table)
|
||||
{
|
||||
x = &D::UNK;
|
||||
x = GET(UNK);
|
||||
}
|
||||
|
||||
for (auto& entry : instructions)
|
||||
@ -301,3 +313,5 @@ public:
|
||||
return m_table[spu_decode(inst)];
|
||||
}
|
||||
};
|
||||
|
||||
#undef GET
|
||||
|
@ -24,15 +24,12 @@
|
||||
#include <unordered_set>
|
||||
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
||||
const spu_decoder<spu_itype> s_spu_itype;
|
||||
const spu_decoder<spu_iname> s_spu_iname;
|
||||
const spu_decoder<spu_iflag> s_spu_iflag;
|
||||
|
||||
extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise{};
|
||||
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast;
|
||||
const extern spu_decoder<spu_itype> g_spu_itype;
|
||||
const extern spu_decoder<spu_iname> g_spu_iname;
|
||||
const extern spu_decoder<spu_iflag> g_spu_iflag;
|
||||
|
||||
// Move 4 args for calling native function from a GHC calling convention function
|
||||
static u8* move_args_ghc_to_native(u8* raw)
|
||||
@ -160,11 +157,12 @@ DECLARE(spu_runtime::tr_all) = []
|
||||
return reinterpret_cast<spu_function_t>(trptr);
|
||||
}();
|
||||
|
||||
DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway", [](asmjit::x86::Assembler& c, auto& args)
|
||||
DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway", [](native_asm& c, auto& args)
|
||||
{
|
||||
// Gateway for SPU dispatcher, converts from native to GHC calling convention, also saves RSP value for spu_escape
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#ifdef _WIN32
|
||||
c.push(x86::r15);
|
||||
c.push(x86::r14);
|
||||
@ -247,24 +245,30 @@ DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway",
|
||||
#endif
|
||||
|
||||
c.ret();
|
||||
#else
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_escape", [](asmjit::x86::Assembler& c, auto& args)
|
||||
DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_escape", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
// Restore native stack pointer (longjmp emulation)
|
||||
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&spu_thread::saved_native_sp)));
|
||||
|
||||
// Return to the return location
|
||||
c.sub(x86::rsp, 8);
|
||||
c.ret();
|
||||
#endif
|
||||
});
|
||||
|
||||
DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, spu_function_t, u8*)>("spu_tail_escape", [](asmjit::x86::Assembler& c, auto& args)
|
||||
DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, spu_function_t, u8*)>("spu_tail_escape", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
// Restore native stack pointer (longjmp emulation)
|
||||
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&spu_thread::saved_native_sp)));
|
||||
|
||||
@ -278,6 +282,7 @@ DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, sp
|
||||
c.xor_(x86::ebx, x86::ebx);
|
||||
c.mov(x86::qword_ptr(x86::rsp), args[1]);
|
||||
c.ret();
|
||||
#endif
|
||||
});
|
||||
|
||||
DECLARE(spu_runtime::g_interpreter_table) = {};
|
||||
@ -364,7 +369,7 @@ void spu_cache::initialize()
|
||||
{
|
||||
spu_runtime::g_interpreter = spu_runtime::g_gateway;
|
||||
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::precise || g_cfg.core.spu_decoder == spu_decoder_type::fast)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::_static || g_cfg.core.spu_decoder == spu_decoder_type::dynamic)
|
||||
{
|
||||
for (auto& x : *spu_runtime::g_dispatcher)
|
||||
{
|
||||
@ -395,7 +400,7 @@ void spu_cache::initialize()
|
||||
atomic_t<usz> fnext{};
|
||||
atomic_t<u8> fail_flag{0};
|
||||
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::fast || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::dynamic || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
if (auto compiler = spu_recompiler_base::make_llvm_recompiler(11))
|
||||
{
|
||||
@ -634,7 +639,7 @@ void spu_cache::initialize()
|
||||
|
||||
for (u32 i = 0; i < f->data.size(); i++)
|
||||
{
|
||||
fmt::append(dump, "%-10s", s_spu_iname.decode(std::bit_cast<be_t<u32>>(f->data[i])));
|
||||
fmt::append(dump, "%-10s", g_spu_iname.decode(std::bit_cast<be_t<u32>>(f->data[i])));
|
||||
}
|
||||
|
||||
n_max = std::max(n_max, ::size32(depth_n));
|
||||
@ -1289,15 +1294,13 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
|
||||
|
||||
void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/)
|
||||
{
|
||||
if (g_cfg.core.spu_decoder > spu_decoder_type::fast)
|
||||
if (g_cfg.core.spu_decoder != spu_decoder_type::_static)
|
||||
{
|
||||
fmt::throw_exception("Invalid SPU decoder");
|
||||
}
|
||||
|
||||
// Select opcode table
|
||||
const auto& table = *(g_cfg.core.spu_decoder == spu_decoder_type::precise
|
||||
? &g_spu_interpreter_precise.get_table()
|
||||
: &g_spu_interpreter_fast.get_table());
|
||||
const auto& table = g_fxo->get<spu_interpreter_rt>();
|
||||
|
||||
// LS pointer
|
||||
const auto base = static_cast<const u8*>(ls);
|
||||
@ -1311,7 +1314,7 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/
|
||||
}
|
||||
|
||||
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + spu.pc);
|
||||
if (table[spu_decode(op)](spu, {op}))
|
||||
if (table.decode(op)(spu, {op}))
|
||||
spu.pc += 4;
|
||||
}
|
||||
}
|
||||
@ -1430,7 +1433,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
||||
m_targets.erase(pos);
|
||||
|
||||
// Fill register access info
|
||||
if (auto iflags = s_spu_iflag.decode(data))
|
||||
if (auto iflags = g_spu_iflag.decode(data))
|
||||
{
|
||||
if (+iflags & +spu_iflag::use_ra)
|
||||
m_use_ra[pos / 4] = op.ra;
|
||||
@ -1441,7 +1444,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
||||
}
|
||||
|
||||
// Analyse instruction
|
||||
switch (const auto type = s_spu_itype.decode(data))
|
||||
switch (const auto type = g_spu_itype.decode(data))
|
||||
{
|
||||
case spu_itype::UNK:
|
||||
case spu_itype::DFCEQ:
|
||||
@ -2297,7 +2300,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
||||
// Decode instruction
|
||||
const spu_opcode_t op{std::bit_cast<be_t<u32>>(result.data[(ia - lsa) / 4])};
|
||||
|
||||
const auto type = s_spu_itype.decode(op.opcode);
|
||||
const auto type = g_spu_itype.decode(op.opcode);
|
||||
|
||||
u8 reg_save = 255;
|
||||
|
||||
@ -2790,7 +2793,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
||||
{
|
||||
// Decode instruction again
|
||||
op.opcode = std::bit_cast<be_t<u32>>(result.data[(ia - lsa) / 41]);
|
||||
last_inst = s_spu_itype.decode(op.opcode);
|
||||
last_inst = g_spu_itype.decode(op.opcode);
|
||||
|
||||
// Propagate some constants
|
||||
switch (last_inst)
|
||||
@ -5035,7 +5038,7 @@ public:
|
||||
|
||||
// Execute interpreter instruction
|
||||
const u32 op = *reinterpret_cast<const be_t<u32>*>(_spu->_ptr<u8>(0) + _spu->pc);
|
||||
if (!g_spu_interpreter_fast.decode(op)(*_spu, {op}))
|
||||
if (!g_fxo->get<spu_interpreter_rt>().decode(op)(*_spu, {op}))
|
||||
spu_log.fatal("Bad instruction");
|
||||
|
||||
// Swap state
|
||||
@ -5151,10 +5154,10 @@ public:
|
||||
const u32 op = i << (32u - m_interp_magn);
|
||||
|
||||
// Instruction type
|
||||
const auto itype = s_spu_itype.decode(op);
|
||||
const auto itype = g_spu_itype.decode(op);
|
||||
|
||||
// Function name
|
||||
std::string fname = fmt::format("spu_%s", s_spu_iname.decode(op));
|
||||
std::string fname = fmt::format("spu_%s", g_spu_iname.decode(op));
|
||||
|
||||
if (last_itype != itype)
|
||||
{
|
||||
@ -5460,7 +5463,7 @@ public:
|
||||
return _spu->check_state();
|
||||
}
|
||||
|
||||
template <spu_inter_func_t F>
|
||||
template <spu_intrp_func_t F>
|
||||
static void exec_fall(spu_thread* _spu, spu_opcode_t op)
|
||||
{
|
||||
if (F(*_spu, op))
|
||||
@ -5469,10 +5472,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <spu_inter_func_t F>
|
||||
template <spu_intrp_func_t F>
|
||||
void fall(spu_opcode_t op)
|
||||
{
|
||||
std::string name = fmt::format("spu_%s", s_spu_iname.decode(op.opcode));
|
||||
std::string name = fmt::format("spu_%s", g_spu_iname.decode(op.opcode));
|
||||
|
||||
if (m_interp_magn)
|
||||
{
|
||||
@ -6808,11 +6811,21 @@ public:
|
||||
set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b));
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
static __m128i exec_rotqby(__m128i a, u8 b)
|
||||
{
|
||||
alignas(32) const __m128i buf[2]{a, a};
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(reinterpret_cast<const u8*>(buf) + (16 - (b & 0xf))));
|
||||
}
|
||||
#else
|
||||
static v128 exec_rotqby(v128 a, u8 b)
|
||||
{
|
||||
alignas(32) const v128 buf[2]{a, a};
|
||||
alignas(16) v128 res;
|
||||
std::memcpy(&res, reinterpret_cast<const u8*>(buf) + (16 - (b & 0xf)), 16);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
void ROTQBY(spu_opcode_t op)
|
||||
{
|
||||
@ -6822,7 +6835,7 @@ public:
|
||||
if (!m_use_ssse3)
|
||||
{
|
||||
value_t<u8[16]> r;
|
||||
r.value = call("spu_rotqby", &exec_rotqby, a.value, eval(extract(b, 12)).value);
|
||||
r.value = call<u8[16]>("spu_rotqby", &exec_rotqby, a.value, eval(extract(b, 12)).value);
|
||||
set_vr(op.rt, r);
|
||||
return;
|
||||
}
|
||||
@ -7805,7 +7818,7 @@ public:
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
set_vr(op.rt, fmuladd(a, b, c, true));
|
||||
else
|
||||
set_vr(op.rt, a * b + c);
|
||||
@ -7815,7 +7828,7 @@ public:
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
set_vr(op.rt, fmuladd(a, b, -c, true));
|
||||
else
|
||||
set_vr(op.rt, a * b - c);
|
||||
@ -7825,7 +7838,7 @@ public:
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
set_vr(op.rt, fmuladd(-a, b, c, true));
|
||||
else
|
||||
set_vr(op.rt, c - (a * b));
|
||||
@ -7835,7 +7848,7 @@ public:
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<f64[2]>(op.ra, op.rb, op.rt);
|
||||
|
||||
if (g_cfg.core.llvm_accurate_dfma)
|
||||
if (g_cfg.core.use_accurate_dfma)
|
||||
set_vr(op.rt, -fmuladd(a, b, c, true));
|
||||
else
|
||||
set_vr(op.rt, -(a * b + c));
|
||||
@ -9894,11 +9907,11 @@ std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_llvm_recompiler(u
|
||||
return std::make_unique<spu_llvm_recompiler>(magn);
|
||||
}
|
||||
|
||||
const spu_decoder<spu_llvm_recompiler> g_spu_llvm_decoder;
|
||||
const spu_decoder<spu_llvm_recompiler> s_spu_llvm_decoder;
|
||||
|
||||
decltype(&spu_llvm_recompiler::UNK) spu_llvm_recompiler::decode(u32 op)
|
||||
{
|
||||
return g_spu_llvm_decoder.decode(op);
|
||||
return s_spu_llvm_decoder.decode(op);
|
||||
}
|
||||
|
||||
#else
|
||||
@ -10025,6 +10038,11 @@ struct spu_llvm
|
||||
|
||||
void operator()()
|
||||
{
|
||||
if (g_cfg.core.spu_decoder != spu_decoder_type::llvm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// To compile (hash -> item)
|
||||
std::unordered_multimap<u64, spu_item*, value_hash<u64>> enqueued;
|
||||
|
||||
@ -10345,7 +10363,7 @@ struct spu_fast : public spu_recompiler_base
|
||||
// Fix endianness
|
||||
const spu_opcode_t op{std::bit_cast<be_t<u32>>(func.data[i])};
|
||||
|
||||
switch (auto type = s_spu_itype.decode(op.opcode))
|
||||
switch (auto type = g_spu_itype.decode(op.opcode))
|
||||
{
|
||||
case spu_itype::BRZ:
|
||||
case spu_itype::BRHZ:
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/v128sse.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
||||
using spu_rdata_t = decltype(spu_thread::rdata);
|
||||
@ -87,14 +87,13 @@ void fmt_class_string<spu_type>::format(std::string& out, u64 arg)
|
||||
// Verify AVX availability for TSX transactions
|
||||
static const bool s_tsx_avx = utils::has_avx();
|
||||
|
||||
// For special case
|
||||
static const bool s_tsx_haswell = utils::has_rtm() && !utils::has_mpx();
|
||||
|
||||
// Threshold for when rep mosvb is expected to outperform simd copies
|
||||
// The threshold will be 0xFFFFFFFF when the performance of rep movsb is expected to be bad
|
||||
static const u32 s_rep_movsb_threshold = utils::get_rep_movsb_threshold();
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#if defined(_M_X64)
|
||||
extern "C" void __movsb(uchar*, const uchar*, size_t);
|
||||
#elif defined(ARCH_X64)
|
||||
static FORCE_INLINE void __movsb(unsigned char * Dst, const unsigned char * Src, size_t Size)
|
||||
{
|
||||
__asm__ __volatile__
|
||||
@ -104,8 +103,12 @@ static FORCE_INLINE void __movsb(unsigned char * Dst, const unsigned char * Src,
|
||||
"[Dst]" (Dst), "[Src]" (Src), "[Size]" (Size)
|
||||
);
|
||||
}
|
||||
#else
|
||||
#define s_rep_movsb_threshold umax
|
||||
#define __movsb std::memcpy
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
static FORCE_INLINE bool cmp_rdata_avx(const __m256i* lhs, const __m256i* rhs)
|
||||
{
|
||||
#if defined(_MSC_VER) || defined(__AVX__)
|
||||
@ -145,18 +148,21 @@ static FORCE_INLINE bool cmp_rdata_avx(const __m256i* lhs, const __m256i* rhs)
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__forceinline
|
||||
#endif
|
||||
extern bool cmp_rdata(const spu_rdata_t& _lhs, const spu_rdata_t& _rhs)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
#ifndef __AVX__
|
||||
if (s_tsx_avx) [[likely]]
|
||||
#endif
|
||||
{
|
||||
return cmp_rdata_avx(reinterpret_cast<const __m256i*>(_lhs), reinterpret_cast<const __m256i*>(_rhs));
|
||||
}
|
||||
#endif
|
||||
|
||||
const auto lhs = reinterpret_cast<const v128*>(_lhs);
|
||||
const auto rhs = reinterpret_cast<const v128*>(_rhs);
|
||||
@ -165,9 +171,10 @@ extern bool cmp_rdata(const spu_rdata_t& _lhs, const spu_rdata_t& _rhs)
|
||||
const v128 c = (lhs[4] ^ rhs[4]) | (lhs[5] ^ rhs[5]);
|
||||
const v128 d = (lhs[6] ^ rhs[6]) | (lhs[7] ^ rhs[7]);
|
||||
const v128 r = (a | b) | (c | d);
|
||||
return r == v128{};
|
||||
return gv_testz(r);
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
static FORCE_INLINE void mov_rdata_avx(__m256i* dst, const __m256i* src)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
@ -199,12 +206,14 @@ static FORCE_INLINE void mov_rdata_avx(__m256i* dst, const __m256i* src)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__forceinline
|
||||
#endif
|
||||
extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
#ifndef __AVX__
|
||||
if (s_tsx_avx) [[likely]]
|
||||
#endif
|
||||
@ -232,8 +241,12 @@ extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src)
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 80), v1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 96), v2);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 112), v3);
|
||||
#else
|
||||
std::memcpy(_dst, _src, 128);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
static FORCE_INLINE void mov_rdata_nt_avx(__m256i* dst, const __m256i* src)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
@ -265,9 +278,11 @@ static FORCE_INLINE void mov_rdata_nt_avx(__m256i* dst, const __m256i* src)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
#ifndef __AVX__
|
||||
if (s_tsx_avx) [[likely]]
|
||||
#endif
|
||||
@ -295,6 +310,9 @@ extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
|
||||
_mm_stream_si128(reinterpret_cast<__m128i*>(_dst + 80), v1);
|
||||
_mm_stream_si128(reinterpret_cast<__m128i*>(_dst + 96), v2);
|
||||
_mm_stream_si128(reinterpret_cast<__m128i*>(_dst + 112), v3);
|
||||
#else
|
||||
std::memcpy(_dst, _src, 128);
|
||||
#endif
|
||||
}
|
||||
|
||||
void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
||||
@ -421,10 +439,11 @@ std::array<u32, 2> op_branch_targets(u32 pc, spu_opcode_t op)
|
||||
return res;
|
||||
}
|
||||
|
||||
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
Label _ret = c.newLabel();
|
||||
@ -677,12 +696,16 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
||||
c.bind(ret2);
|
||||
#endif
|
||||
c.ret();
|
||||
#else
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
Label fall = c.newLabel();
|
||||
Label _ret = c.newLabel();
|
||||
|
||||
@ -803,12 +826,16 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
||||
c.bind(ret2);
|
||||
#endif
|
||||
c.ret();
|
||||
#else
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](native_asm& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
Label fall = c.newLabel();
|
||||
Label _ret = c.newLabel();
|
||||
|
||||
@ -938,6 +965,9 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
|
||||
c.bind(ret2);
|
||||
#endif
|
||||
c.ret();
|
||||
#else
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
void spu_int_ctrl_t::set(u64 ints)
|
||||
@ -967,7 +997,7 @@ spu_imm_table_t::scale_table_t::scale_table_t()
|
||||
{
|
||||
for (s32 i = -155; i < 174; i++)
|
||||
{
|
||||
m_data[i + 155].vf = _mm_set1_ps(static_cast<float>(std::exp2(i)));
|
||||
m_data[i + 155] = v128::fromf32p(static_cast<float>(std::exp2(i)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1385,6 +1415,8 @@ void spu_thread::cpu_task()
|
||||
|
||||
std::fesetround(FE_TOWARDZERO);
|
||||
|
||||
gv_set_zeroing_denormals();
|
||||
|
||||
g_tls_log_prefix = []
|
||||
{
|
||||
const auto cpu = static_cast<spu_thread*>(get_current_cpu_thread());
|
||||
@ -1622,7 +1654,7 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u
|
||||
jit = spu_recompiler_base::make_fast_llvm_recompiler();
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_decoder != spu_decoder_type::fast && g_cfg.core.spu_decoder != spu_decoder_type::precise)
|
||||
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||
{
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
@ -2640,7 +2672,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
||||
return false;
|
||||
});
|
||||
|
||||
const u64 count2 = __rdtsc() - perf2.get();
|
||||
const u64 count2 = utils::get_tsc() - perf2.get();
|
||||
|
||||
if (count2 > 20000 && g_cfg.core.perf_report) [[unlikely]]
|
||||
{
|
||||
@ -2672,7 +2704,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
||||
utils::prefetch_read(rdata + 64);
|
||||
last_faddr = addr;
|
||||
last_ftime = res.load() & -128;
|
||||
last_ftsc = __rdtsc();
|
||||
last_ftsc = utils::get_tsc();
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
@ -2854,7 +2886,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
||||
});
|
||||
|
||||
vm::reservation_acquire(addr) += 32;
|
||||
result = __rdtsc() - perf0.get();
|
||||
result = utils::get_tsc() - perf0.get();
|
||||
}
|
||||
|
||||
if (result > 20000 && g_cfg.core.perf_report) [[unlikely]]
|
||||
@ -3007,7 +3039,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
|
||||
{
|
||||
// Get commands' execution mask
|
||||
// Mask bits are always set when mfc_transfers_shuffling is 0
|
||||
return static_cast<u16>((0 - (1u << std::min<u32>(g_cfg.core.mfc_transfers_shuffling, size))) | __rdtsc());
|
||||
return static_cast<u16>((0 - (1u << std::min<u32>(g_cfg.core.mfc_transfers_shuffling, size))) | utils::get_tsc());
|
||||
};
|
||||
|
||||
// Process enqueued commands
|
||||
@ -3684,9 +3716,9 @@ void spu_thread::set_interrupt_status(bool enable)
|
||||
// Detect enabling interrupts with events masked
|
||||
if (auto mask = ch_events.load().mask; mask & SPU_EVENT_INTR_BUSY_CHECK)
|
||||
{
|
||||
if (g_cfg.core.spu_decoder != spu_decoder_type::precise && g_cfg.core.spu_decoder != spu_decoder_type::fast)
|
||||
if (g_cfg.core.spu_decoder != spu_decoder_type::_static)
|
||||
{
|
||||
fmt::throw_exception("SPU Interrupts not implemented (mask=0x%x): Use interpreterts", mask);
|
||||
fmt::throw_exception("SPU Interrupts not implemented (mask=0x%x): Use static interpreter", mask);
|
||||
}
|
||||
|
||||
spu_log.trace("SPU Interrupts (mask=0x%x) are using CPU busy checking mode", mask);
|
||||
|
@ -503,9 +503,9 @@ struct spu_imm_table_t
|
||||
public:
|
||||
scale_table_t();
|
||||
|
||||
FORCE_INLINE const auto& operator [](s32 scale) const
|
||||
FORCE_INLINE const v128& operator [](s32 scale) const
|
||||
{
|
||||
return m_data[scale + 155].vf;
|
||||
return m_data[scale + 155];
|
||||
}
|
||||
}
|
||||
const scale;
|
||||
|
@ -75,30 +75,28 @@ void fmt_class_string<lv2_protocol>::format(std::string& out, u64 arg)
|
||||
});
|
||||
}
|
||||
|
||||
static bool null_func_(ppu_thread& ppu)
|
||||
static void null_func_(ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*)
|
||||
{
|
||||
ppu_log.todo("Unimplemented syscall %s -> CELL_OK (r3=0x%llx, r4=0x%llx, r5=0x%llx, r6=0x%llx, r7=0x%llx, r8=0x%llx, r9=0x%llx, r10=0x%llx)", ppu_syscall_code(ppu.gpr[11]),
|
||||
ppu.gpr[3], ppu.gpr[4], ppu.gpr[5], ppu.gpr[6], ppu.gpr[7], ppu.gpr[8], ppu.gpr[9], ppu.gpr[10]);
|
||||
|
||||
ppu.gpr[3] = 0;
|
||||
ppu.cia += 4;
|
||||
return false;
|
||||
ppu.cia = vm::get_addr(this_op) + 4;
|
||||
}
|
||||
|
||||
static bool uns_func_(ppu_thread& ppu)
|
||||
static void uns_func_(ppu_thread& ppu, ppu_opcode_t, be_t<u32>* this_op, ppu_intrp_func*)
|
||||
{
|
||||
ppu_log.trace("Unused syscall %d -> ENOSYS", ppu.gpr[11]);
|
||||
ppu.gpr[3] = CELL_ENOSYS;
|
||||
ppu.cia += 4;
|
||||
return false;
|
||||
ppu.cia = vm::get_addr(this_op) + 4;
|
||||
}
|
||||
|
||||
// Bind Syscall
|
||||
#define BIND_SYSC(func) {BIND_FUNC(func), #func}
|
||||
#define NULL_FUNC(name) {null_func_, #name}
|
||||
|
||||
constexpr std::pair<ppu_function_t, std::string_view> null_func{null_func_, ""};
|
||||
constexpr std::pair<ppu_function_t, std::string_view> uns_func{uns_func_, ""};
|
||||
constexpr std::pair<ppu_intrp_func_t, std::string_view> null_func{null_func_, ""};
|
||||
constexpr std::pair<ppu_intrp_func_t, std::string_view> uns_func{uns_func_, ""};
|
||||
|
||||
// UNS = Unused
|
||||
// ROOT = Root
|
||||
@ -106,7 +104,7 @@ constexpr std::pair<ppu_function_t, std::string_view> uns_func{uns_func_, ""};
|
||||
// DEX..DECR = Unavailable on retail consoles
|
||||
// PM = Product Mode
|
||||
// AuthID = Authentication ID
|
||||
const std::array<std::pair<ppu_function_t, std::string_view>, 1024> g_ppu_syscall_table
|
||||
const std::array<std::pair<ppu_intrp_func_t, std::string_view>, 1024> g_ppu_syscall_table
|
||||
{
|
||||
null_func,
|
||||
BIND_SYSC(sys_process_getpid), //1 (0x001)
|
||||
@ -1151,7 +1149,7 @@ extern void ppu_execute_syscall(ppu_thread& ppu, u64 code)
|
||||
|
||||
if (const auto func = g_ppu_syscall_table[code].first)
|
||||
{
|
||||
func(ppu);
|
||||
func(ppu, {}, vm::_ptr<u32>(ppu.cia), nullptr);
|
||||
ppu_log.trace("Syscall '%s' (%llu) finished, r3=0x%llx", ppu_syscall_code(code), code, ppu.gpr[3]);
|
||||
return;
|
||||
}
|
||||
@ -1160,7 +1158,7 @@ extern void ppu_execute_syscall(ppu_thread& ppu, u64 code)
|
||||
fmt::throw_exception("Invalid syscall number (%llu)", code);
|
||||
}
|
||||
|
||||
extern ppu_function_t ppu_get_syscall(u64 code)
|
||||
extern ppu_intrp_func_t ppu_get_syscall(u64 code)
|
||||
{
|
||||
if (code < g_ppu_syscall_table.size())
|
||||
{
|
||||
|
@ -11,6 +11,10 @@
|
||||
#include <winsock2.h>
|
||||
#include <WS2tcpip.h>
|
||||
#else
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
@ -22,6 +26,9 @@
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <poll.h>
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "Emu/NP/np_handler.h"
|
||||
|
@ -317,7 +317,7 @@ void usb_handler_thread::operator()()
|
||||
{
|
||||
timeval lusb_tv{0, 200};
|
||||
|
||||
while (thread_ctrl::state() != thread_state::aborting)
|
||||
while (ctx && thread_ctrl::state() != thread_state::aborting)
|
||||
{
|
||||
// Todo: Hotplug here?
|
||||
|
||||
|
@ -15,6 +15,10 @@
|
||||
#include <WS2tcpip.h>
|
||||
#include <afunix.h> // sockaddr_un
|
||||
#else
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
@ -25,6 +29,9 @@
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/un.h> // sockaddr_un
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <charconv>
|
||||
|
@ -684,7 +684,7 @@ namespace vm
|
||||
// 1. To simplify range_lock logic
|
||||
// 2. To make sure it never overlaps with 32-bit addresses
|
||||
// Also check that it's aligned (lowest 16 bits)
|
||||
ensure((shm_self & 0xffff'8000'0000'ffff) == range_locked);
|
||||
ensure((shm_self & 0xffff'0000'0000'ffff) == range_locked);
|
||||
|
||||
// Find another mirror and map it as shareable too
|
||||
for (auto& ploc : g_locations)
|
||||
@ -714,7 +714,7 @@ namespace vm
|
||||
u64 shm_self = reinterpret_cast<u64>(shm->get()) ^ range_locked;
|
||||
|
||||
// Check (see above)
|
||||
ensure((shm_self & 0xffff'8000'0000'ffff) == range_locked);
|
||||
ensure((shm_self & 0xffff'0000'0000'ffff) == range_locked);
|
||||
|
||||
// Map range as shareable
|
||||
for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++)
|
||||
@ -1129,13 +1129,16 @@ namespace vm
|
||||
{
|
||||
auto fill64 = [](u8* ptr, u64 data, usz count)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
__stosq(reinterpret_cast<u64*>(ptr), data, count);
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
__asm__ ("mov %0, %%rdi; mov %1, %%rax; mov %2, %%rcx; rep stosq;"
|
||||
:
|
||||
: "r" (ptr), "r" (data), "r" (count)
|
||||
: "rdi", "rax", "rcx", "memory");
|
||||
#else
|
||||
for (usz i = 0; i < count; i++)
|
||||
reinterpret_cast<u64*>(ptr)[i] = data;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -200,16 +200,10 @@ namespace vm
|
||||
return {};
|
||||
}
|
||||
|
||||
// Unsafe convert host ptr to PS3 VM address (clamp with 4GiB alignment assumption)
|
||||
inline vm::addr_t get_addr(const void* ptr)
|
||||
{
|
||||
const auto [addr, ok] = try_get_addr(ptr);
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
fmt::throw_exception("Not a virtual memory pointer (%p)", ptr);
|
||||
}
|
||||
|
||||
return addr;
|
||||
return vm::addr_t{static_cast<u32>(uptr(ptr))};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "vm.h"
|
||||
#include "vm_locking.h"
|
||||
#include "util/atomic.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include <functional>
|
||||
|
||||
extern bool g_use_rtm;
|
||||
@ -11,7 +12,6 @@ extern u64 g_rtm_tx_limit2;
|
||||
#ifdef _MSC_VER
|
||||
extern "C"
|
||||
{
|
||||
u64 __rdtsc();
|
||||
u32 _xbegin();
|
||||
void _xend();
|
||||
}
|
||||
@ -19,15 +19,6 @@ extern "C"
|
||||
|
||||
namespace vm
|
||||
{
|
||||
inline u64 get_tsc()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
return __builtin_ia32_rdtsc();
|
||||
#endif
|
||||
}
|
||||
|
||||
enum : u64
|
||||
{
|
||||
rsrv_lock_mask = 127,
|
||||
@ -108,13 +99,14 @@ namespace vm
|
||||
auto& res = vm::reservation_acquire(addr);
|
||||
//_m_prefetchw(&res);
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
if (g_use_rtm)
|
||||
{
|
||||
// Stage 1: single optimistic transaction attempt
|
||||
unsigned status = -1;
|
||||
u64 _old = 0;
|
||||
|
||||
auto stamp0 = get_tsc(), stamp1 = stamp0, stamp2 = stamp0;
|
||||
auto stamp0 = utils::get_tsc(), stamp1 = stamp0, stamp2 = stamp0;
|
||||
|
||||
#ifndef _MSC_VER
|
||||
__asm__ goto ("xbegin %l[stage2];" ::: "memory" : stage2);
|
||||
@ -176,16 +168,16 @@ namespace vm
|
||||
#ifndef _MSC_VER
|
||||
__asm__ volatile ("mov %%eax, %0;" : "=r" (status) :: "memory");
|
||||
#endif
|
||||
stamp1 = get_tsc();
|
||||
stamp1 = utils::get_tsc();
|
||||
|
||||
// Stage 2: try to lock reservation first
|
||||
_old = res.fetch_add(1);
|
||||
|
||||
// Compute stamps excluding memory touch
|
||||
stamp2 = get_tsc() - (stamp1 - stamp0);
|
||||
stamp2 = utils::get_tsc() - (stamp1 - stamp0);
|
||||
|
||||
// Start lightened transaction
|
||||
for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = get_tsc())
|
||||
for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = utils::get_tsc())
|
||||
{
|
||||
if (cpu.has_pause_flag())
|
||||
{
|
||||
@ -285,6 +277,9 @@ namespace vm
|
||||
return result;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static_cast<void>(cpu);
|
||||
#endif /* ARCH_X64 */
|
||||
|
||||
// Lock reservation and perform heavyweight lock
|
||||
reservation_shared_lock_internal(res);
|
||||
|
@ -8,8 +8,15 @@
|
||||
#ifdef _WIN32
|
||||
#include <WS2tcpip.h>
|
||||
#else
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#include <sys/socket.h>
|
||||
#include <arpa/inet.h>
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
LOG_CHANNEL(dnshook_log, "DnsHook");
|
||||
|
@ -19,12 +19,19 @@
|
||||
#include <WS2tcpip.h>
|
||||
#include <iphlpapi.h>
|
||||
#else
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <net/if.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <netdb.h>
|
||||
#include <unistd.h>
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__FreeBSD__) || defined(__APPLE__)
|
||||
|
@ -21,6 +21,10 @@
|
||||
#include <winsock2.h>
|
||||
#include <WS2tcpip.h>
|
||||
#else
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
@ -32,6 +36,9 @@
|
||||
#include <fcntl.h>
|
||||
#include <poll.h>
|
||||
#include <netdb.h>
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
LOG_CHANNEL(rpcn_log, "rpcn");
|
||||
|
@ -11,9 +11,16 @@
|
||||
#ifdef _WIN32
|
||||
#include <winsock2.h>
|
||||
#else
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "Emu/Cell/Modules/sceNp.h"
|
||||
|
@ -7,15 +7,25 @@
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#include "emmintrin.h"
|
||||
#include "immintrin.h"
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) && defined(__clang__)
|
||||
#if !defined(_MSC_VER)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#ifdef ARCH_ARM64
|
||||
#if !defined(_MSC_VER)
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif
|
||||
#undef FORCE_INLINE
|
||||
#include "Emu/CPU/sse2neon.h"
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) || !defined(__SSE2__)
|
||||
#define PLAIN_FUNC
|
||||
#define SSSE3_FUNC
|
||||
#define SSE4_1_FUNC
|
||||
@ -57,7 +67,7 @@ constexpr bool s_use_ssse3 = true;
|
||||
constexpr bool s_use_sse4_1 = true;
|
||||
constexpr bool s_use_avx2 = true;
|
||||
constexpr bool s_use_avx3 = false;
|
||||
#elif defined(__SSE41__)
|
||||
#elif defined(__SSE4_1__)
|
||||
constexpr bool s_use_ssse3 = true;
|
||||
constexpr bool s_use_sse4_1 = true;
|
||||
constexpr bool s_use_avx2 = false;
|
||||
@ -67,11 +77,16 @@ constexpr bool s_use_ssse3 = true;
|
||||
constexpr bool s_use_sse4_1 = false;
|
||||
constexpr bool s_use_avx2 = false;
|
||||
constexpr bool s_use_avx3 = false;
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
const bool s_use_ssse3 = utils::has_ssse3();
|
||||
const bool s_use_sse4_1 = utils::has_sse41();
|
||||
const bool s_use_avx2 = utils::has_avx2();
|
||||
const bool s_use_avx3 = utils::has_avx512();
|
||||
#else
|
||||
constexpr bool s_use_ssse3 = true; // Non x86
|
||||
constexpr bool s_use_sse4_1 = true; // Non x86
|
||||
constexpr bool s_use_avx2 = false;
|
||||
constexpr bool s_use_avx3 = false;
|
||||
#endif
|
||||
|
||||
const __m128i s_bswap_u32_mask = _mm_set_epi8(
|
||||
@ -98,7 +113,7 @@ namespace utils
|
||||
namespace
|
||||
{
|
||||
template <bool Compare>
|
||||
PLAIN_FUNC bool copy_data_swap_u32_naive(u32* dst, const u32* src, u32 count)
|
||||
PLAIN_FUNC auto copy_data_swap_u32_naive(u32* dst, const u32* src, u32 count)
|
||||
{
|
||||
u32 result = 0;
|
||||
|
||||
@ -117,11 +132,14 @@ namespace
|
||||
dst[i] = data;
|
||||
}
|
||||
|
||||
return static_cast<bool>(result);
|
||||
if constexpr (Compare)
|
||||
{
|
||||
return static_cast<bool>(result);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool Compare>
|
||||
SSSE3_FUNC bool copy_data_swap_u32_ssse3(u32* dst, const u32* src, u32 count)
|
||||
SSSE3_FUNC auto copy_data_swap_u32_ssse3(u32* dst, const u32* src, u32 count)
|
||||
{
|
||||
u32 result = 0;
|
||||
|
||||
@ -140,9 +158,13 @@ namespace
|
||||
dst[i] = data;
|
||||
}
|
||||
|
||||
return static_cast<bool>(result);
|
||||
if constexpr (Compare)
|
||||
{
|
||||
return static_cast<bool>(result);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
template <bool Compare, int Size, typename RT>
|
||||
void build_copy_data_swap_u32_avx3(asmjit::x86::Assembler& c, std::array<asmjit::x86::Gp, 4>& args, const RT& rmask, const RT& rload, const RT& rtest)
|
||||
{
|
||||
@ -199,8 +221,7 @@ namespace
|
||||
c.jmp(loop);
|
||||
|
||||
c.bind(tail);
|
||||
c.shlx(x86::eax, x86::eax, args[2].r32());
|
||||
c.not_(x86::eax);
|
||||
c.bzhi(x86::eax, x86::eax, args[2].r32());
|
||||
c.kmovw(x86::k1, x86::eax);
|
||||
c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
|
||||
c.vpshufb(rload, rload, rmask);
|
||||
@ -230,7 +251,7 @@ namespace
|
||||
}
|
||||
|
||||
template <bool Compare>
|
||||
void build_copy_data_swap_u32(asmjit::x86::Assembler& c, std::array<asmjit::x86::Gp, 4>& args)
|
||||
void build_copy_data_swap_u32(native_asm& c, native_args& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -254,11 +275,18 @@ namespace
|
||||
|
||||
c.jmp(asmjit::imm_ptr(©_data_swap_u32_naive<Compare>));
|
||||
}
|
||||
#else
|
||||
template <bool Compare>
|
||||
constexpr auto build_copy_data_swap_u32()
|
||||
{
|
||||
return ©_data_swap_u32_naive<Compare>;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
built_function<void(*)(void*, const void*, u32)> copy_data_swap_u32("copy_data_swap_u32", &build_copy_data_swap_u32<false>);
|
||||
built_function<void(*)(u32*, const u32*, u32)> copy_data_swap_u32("copy_data_swap_u32", &build_copy_data_swap_u32<false>);
|
||||
|
||||
built_function<bool(*)(void*, const void*, u32)> copy_data_swap_u32_cmp("copy_data_swap_u32_cmp", &build_copy_data_swap_u32<true>);
|
||||
built_function<bool(*)(u32*, const u32*, u32)> copy_data_swap_u32_cmp("copy_data_swap_u32_cmp", &build_copy_data_swap_u32<true>);
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -390,6 +418,7 @@ namespace
|
||||
|
||||
struct primitive_restart_impl
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
AVX2_FUNC
|
||||
static
|
||||
std::tuple<u16, u16> upload_u16_swapped_avx2(const void *src, void *dst, u32 iterations, u16 restart_index)
|
||||
@ -428,6 +457,7 @@ namespace
|
||||
|
||||
return std::make_tuple(min_index, max_index);
|
||||
}
|
||||
#endif
|
||||
|
||||
SSE4_1_FUNC
|
||||
static
|
||||
@ -512,9 +542,11 @@ namespace
|
||||
{
|
||||
if (s_use_avx2)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
u32 iterations = length >> 4;
|
||||
written = length & ~0xF;
|
||||
std::tie(min_index, max_index) = upload_u16_swapped_avx2(src.data(), dst.data(), iterations, restart_index);
|
||||
#endif
|
||||
}
|
||||
else if (s_use_sse4_1)
|
||||
{
|
||||
|
@ -51,7 +51,7 @@ void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w);
|
||||
void stream_vector_from_memory(void *dst, void *src);
|
||||
|
||||
// Copy and swap data in 32-bit units
|
||||
extern built_function<void(*)(void*, const void*, u32)> copy_data_swap_u32;
|
||||
extern built_function<void(*)(u32*, const u32*, u32)> copy_data_swap_u32;
|
||||
|
||||
// Copy and swap data in 32-bit units, return true if changed
|
||||
extern built_function<bool(*)(void*, const void*, u32)> copy_data_swap_u32_cmp;
|
||||
extern built_function<bool(*)(u32*, const u32*, u32)> copy_data_swap_u32_cmp;
|
||||
|
@ -38,16 +38,16 @@ namespace gl
|
||||
ensure(real_pitch == (width * 4));
|
||||
if (rsx_pitch == real_pitch) [[likely]]
|
||||
{
|
||||
copy_data_swap_u32(dst, dst, valid_length / 4);
|
||||
copy_data_swap_u32(static_cast<u32*>(dst), static_cast<u32*>(dst), valid_length / 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
const u32 num_rows = utils::align(valid_length, rsx_pitch) / rsx_pitch;
|
||||
u8* data = static_cast<u8*>(dst);
|
||||
u32* data = static_cast<u32*>(dst);
|
||||
for (u32 row = 0; row < num_rows; ++row)
|
||||
{
|
||||
copy_data_swap_u32(data, data, width);
|
||||
data += rsx_pitch;
|
||||
data += rsx_pitch / 4;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -2,9 +2,12 @@
|
||||
|
||||
#include "ProgramStateCache.h"
|
||||
|
||||
#include "emmintrin.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#include "emmintrin.h"
|
||||
#endif
|
||||
|
||||
template <typename Traits>
|
||||
void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32> dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize) const
|
||||
{
|
||||
@ -19,12 +22,23 @@ void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32>
|
||||
for (usz offset_in_fragment_program : I->second.FragmentConstantOffsetCache)
|
||||
{
|
||||
char* data = static_cast<char*>(fragment_program.get_data()) + offset_in_fragment_program;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data));
|
||||
const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8));
|
||||
#else
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const u32 value = reinterpret_cast<u32*>(data)[i];
|
||||
tmp[i] = std::bit_cast<f32, u32>(((value >> 8) & 0xff00ff) | ((value << 8) & 0xff00ff00));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!patch_table.is_empty())
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
_mm_store_ps(tmp, _mm_castsi128_ps(shuffled_vector));
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
@ -47,15 +61,29 @@ void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32>
|
||||
}
|
||||
else if (sanitize)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
//Convert NaNs and Infs to 0
|
||||
const auto masked = _mm_and_si128(shuffled_vector, _mm_set1_epi32(0x7fffffff));
|
||||
const auto valid = _mm_cmplt_epi32(masked, _mm_set1_epi32(0x7f800000));
|
||||
const auto result = _mm_and_si128(shuffled_vector, valid);
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), result);
|
||||
#else
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const u32 value = std::bit_cast<u32>(tmp[i]);
|
||||
tmp[i] = (value & 0x7fffffff) < 0x7f800000 ? value : 0;
|
||||
}
|
||||
|
||||
std::memcpy(dst, tmp, 16);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), shuffled_vector);
|
||||
#else
|
||||
std::memcpy(dst, tmp, 16);
|
||||
#endif
|
||||
}
|
||||
|
||||
dst += 4;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "VKShaderInterpreter.h"
|
||||
#include "VKQueryPool.h"
|
||||
#include "../GCM.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#include <thread>
|
||||
#include <optional>
|
||||
@ -310,11 +311,7 @@ namespace vk
|
||||
{
|
||||
while (num_waiters.load() != 0)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
_mm_pause();
|
||||
#else
|
||||
__builtin_ia32_pause();
|
||||
#endif
|
||||
utils::pause();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -452,6 +452,18 @@ namespace vk
|
||||
enabled_features.shaderStorageImageWriteWithoutFormat = VK_FALSE;
|
||||
}
|
||||
|
||||
if (!pgpu->features.shaderClipDistance)
|
||||
{
|
||||
rsx_log.error("Your GPU does not support shader clip distance. Graphics will not render correctly.");
|
||||
enabled_features.shaderClipDistance = VK_FALSE;
|
||||
}
|
||||
|
||||
if (!pgpu->features.shaderStorageBufferArrayDynamicIndexing)
|
||||
{
|
||||
rsx_log.error("Your GPU does not support shader storage buffer array dynamic indexing. Graphics will not render correctly.");
|
||||
enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_FALSE;
|
||||
}
|
||||
|
||||
if (!pgpu->features.samplerAnisotropy)
|
||||
{
|
||||
rsx_log.error("Your GPU does not support anisotropic filtering. Graphics may not render correctly.");
|
||||
|
@ -12,10 +12,6 @@
|
||||
|
||||
namespace vk
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
extern "C" void _mm_pause();
|
||||
#endif
|
||||
|
||||
fence::fence(VkDevice dev)
|
||||
{
|
||||
owner = dev;
|
||||
@ -48,11 +44,7 @@ namespace vk
|
||||
{
|
||||
while (!flushed)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
_mm_pause();
|
||||
#else
|
||||
__builtin_ia32_pause();
|
||||
#endif
|
||||
utils::pause();
|
||||
}
|
||||
}
|
||||
|
||||
@ -218,11 +210,7 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
_mm_pause();
|
||||
#else
|
||||
__builtin_ia32_pause();
|
||||
#endif
|
||||
utils::pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -42,8 +42,10 @@ namespace rsx
|
||||
{
|
||||
rsx->sync();
|
||||
|
||||
// Write ref+get atomically (get will be written again with the same value at command end)
|
||||
vm::_ref<atomic_be_t<u64>>(rsx->dma_address + ::offset32(&RsxDmaControl::get)).store(u64{rsx->fifo_ctrl->get_pos()} << 32 | arg);
|
||||
// Write ref+get (get will be written again with the same value at command end)
|
||||
auto& dma = vm::_ref<RsxDmaControl>(rsx->dma_address);
|
||||
dma.get.release(rsx->fifo_ctrl->get_pos());
|
||||
dma.ref.store(arg);
|
||||
}
|
||||
|
||||
void semaphore_acquire(thread* rsx, u32 /*reg*/, u32 arg)
|
||||
@ -436,11 +438,11 @@ namespace rsx
|
||||
if (rsx->m_graphics_state & rsx::pipeline_state::transform_constants_dirty)
|
||||
{
|
||||
// Minor optimization: don't compare values if we already know we need invalidation
|
||||
copy_data_swap_u32(values, vm::base(rsx->fifo_ctrl->get_current_arg_ptr()), rcount);
|
||||
copy_data_swap_u32(values, static_cast<u32*>(vm::base(rsx->fifo_ctrl->get_current_arg_ptr())), rcount);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (copy_data_swap_u32_cmp(values, vm::base(rsx->fifo_ctrl->get_current_arg_ptr()), rcount))
|
||||
if (copy_data_swap_u32_cmp(values, static_cast<u32*>(vm::base(rsx->fifo_ctrl->get_current_arg_ptr())), rcount))
|
||||
{
|
||||
// Transform constants invalidation is expensive (~8k bytes per update)
|
||||
rsx->m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
|
||||
@ -472,7 +474,7 @@ namespace rsx
|
||||
rcount -= max - (max_vertex_program_instructions * 4);
|
||||
}
|
||||
|
||||
copy_data_swap_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4], vm::base(rsx->fifo_ctrl->get_current_arg_ptr()), rcount);
|
||||
copy_data_swap_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4], static_cast<u32*>(vm::base(rsx->fifo_ctrl->get_current_arg_ptr())), rcount);
|
||||
|
||||
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty;
|
||||
rsx::method_registers.transform_program_load_set(load_pos + ((rcount + index % 4) / 4));
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include "perf_meter.hpp"
|
||||
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/fence.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include "Utilities/Thread.h"
|
||||
|
||||
#include <map>
|
||||
@ -68,18 +70,10 @@ void perf_stat_base::print(const char* name) const noexcept
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
extern "C" void _mm_lfence();
|
||||
#endif
|
||||
|
||||
SAFE_BUFFERS(void) perf_stat_base::push(u64 data[66], u64 start_time, const char* name) noexcept
|
||||
{
|
||||
// Event end
|
||||
#ifdef _MSC_VER
|
||||
const u64 end_time = (_mm_lfence(), get_tsc());
|
||||
#else
|
||||
const u64 end_time = (__builtin_ia32_lfence(), get_tsc());
|
||||
#endif
|
||||
const u64 end_time = (utils::lfence(), utils::get_tsc());
|
||||
|
||||
// Compute difference in seconds
|
||||
const f64 diff = (end_time - start_time) * 1. / utils::get_tsc_freq();
|
||||
|
@ -2,26 +2,13 @@
|
||||
|
||||
#include "util/types.hpp"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include "system_config.h"
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
|
||||
LOG_CHANNEL(perf_log, "PERF");
|
||||
|
||||
#ifdef _MSC_VER
|
||||
extern "C" u64 __rdtsc();
|
||||
|
||||
inline u64 get_tsc()
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
#else
|
||||
inline u64 get_tsc()
|
||||
{
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO: constexpr with the help of bitcast
|
||||
template <auto Name>
|
||||
inline const auto perf_name = []
|
||||
@ -145,7 +132,7 @@ public:
|
||||
if constexpr (std::array<bool, sizeof...(SubEvents)>{(SubEvents == Event)...}[Index])
|
||||
{
|
||||
// Push actual timestamp into an array
|
||||
m_timestamps[Index + 1] = get_tsc();
|
||||
m_timestamps[Index + 1] = utils::get_tsc();
|
||||
}
|
||||
else if constexpr (Index < sizeof...(SubEvents))
|
||||
{
|
||||
@ -169,7 +156,7 @@ public:
|
||||
// Re-initialize first timestamp
|
||||
FORCE_INLINE SAFE_BUFFERS(void) restart() noexcept
|
||||
{
|
||||
m_timestamps[0] = get_tsc();
|
||||
m_timestamps[0] = utils::get_tsc();
|
||||
std::memset(m_timestamps + 1, 0, sizeof(m_timestamps) - sizeof(u64));
|
||||
}
|
||||
|
||||
|
@ -52,12 +52,15 @@ struct cfg_root : cfg::node
|
||||
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", enable_tsx_by_default() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
|
||||
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
|
||||
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
|
||||
cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
|
||||
cfg::_bool llvm_ppu_jm_handling{ this, "PPU LLVM Java Mode Handling", true }; // Respect current Java Mode for alti-vec ops by PPU LLVM
|
||||
cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length
|
||||
cfg::_bool llvm_ppu_accurate_vector_nan{ this, "PPU LLVM Accurate Vector NaN values", false };
|
||||
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
|
||||
cfg::_bool full_width_avx512{ this, "Full Width AVX-512", false};
|
||||
cfg::_bool full_width_avx512{ this, "Full Width AVX-512", false };
|
||||
cfg::_bool use_accurate_dfma{ this, "Use Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
|
||||
cfg::_bool ppu_set_sat_bit{ this, "PPU Set Saturation Bit", false }; // Accuracy. If unset, completely disable saturation flag handling.
|
||||
cfg::_bool ppu_use_nj_bit{ this, "PPU Use Non-Java Mode Bit", false }; // Accuracy. If unset, ignore NJ flag completely.
|
||||
cfg::_bool ppu_fix_vnan{ this, "PPU Fixup Vector NaN Values", false }; // Accuracy. Partial.
|
||||
cfg::_bool ppu_set_vnan{ this, "PPU Accurate Vector NaN Values", false }; // Accuracy. Implies ppu_fix_vnan.
|
||||
cfg::_bool ppu_set_fpcc{ this, "PPU Set FPCC Bits", false }; // Accuracy.
|
||||
|
||||
cfg::_bool debug_console_mode{ this, "Debug Console Mode", false }; // Debug console emulation, not recommended
|
||||
cfg::_bool hook_functions{ this, "Hook static functions" };
|
||||
|
@ -256,8 +256,8 @@ void fmt_class_string<spu_decoder_type>::format(std::string& out, u64 arg)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case spu_decoder_type::precise: return "Interpreter (precise)";
|
||||
case spu_decoder_type::fast: return "Interpreter (fast)";
|
||||
case spu_decoder_type::_static: return "Interpreter (static)";
|
||||
case spu_decoder_type::dynamic: return "Interpreter (dynamic)";
|
||||
case spu_decoder_type::asmjit: return "Recompiler (ASMJIT)";
|
||||
case spu_decoder_type::llvm: return "Recompiler (LLVM)";
|
||||
}
|
||||
@ -440,8 +440,8 @@ void fmt_class_string<ppu_decoder_type>::format(std::string& out, u64 arg)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ppu_decoder_type::precise: return "Interpreter (precise)";
|
||||
case ppu_decoder_type::fast: return "Interpreter (fast)";
|
||||
case ppu_decoder_type::_static: return "Interpreter (static)";
|
||||
case ppu_decoder_type::dynamic: return "Interpreter (dynamic)";
|
||||
case ppu_decoder_type::llvm: return "Recompiler (LLVM)";
|
||||
}
|
||||
|
||||
|
@ -2,15 +2,15 @@
|
||||
|
||||
enum class ppu_decoder_type : unsigned
|
||||
{
|
||||
precise = 0, // Don't change (0)
|
||||
fast, // Don't change (1)
|
||||
_static,
|
||||
dynamic,
|
||||
llvm,
|
||||
};
|
||||
|
||||
enum class spu_decoder_type : unsigned
|
||||
{
|
||||
precise = 0, // Don't change (0)
|
||||
fast, // Don't change (1)
|
||||
_static,
|
||||
dynamic,
|
||||
asmjit,
|
||||
llvm,
|
||||
};
|
||||
|
@ -509,7 +509,7 @@
|
||||
<ClInclude Include="util\media_utils.h" />
|
||||
<ClInclude Include="util\serialization.hpp" />
|
||||
<ClInclude Include="util\v128.hpp" />
|
||||
<ClInclude Include="util\v128sse.hpp" />
|
||||
<ClInclude Include="util\simd.hpp" />
|
||||
<ClInclude Include="util\to_endian.hpp" />
|
||||
<ClInclude Include="..\Utilities\bin_patch.h" />
|
||||
<ClInclude Include="..\Utilities\BitField.h" />
|
||||
|
@ -1122,7 +1122,7 @@
|
||||
<ClInclude Include="util\v128.hpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="util\v128sse.hpp">
|
||||
<ClInclude Include="util\simd.hpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="util\to_endian.hpp">
|
||||
|
@ -42,6 +42,7 @@ DYNAMIC_IMPORT("ntdll.dll", NtSetTimerResolution, NTSTATUS(ULONG DesiredResoluti
|
||||
#include <spawn.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
@ -49,7 +50,7 @@ DYNAMIC_IMPORT("ntdll.dll", NtSetTimerResolution, NTSTATUS(ULONG DesiredResoluti
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) && defined(BLOCKS) // BLOCKS is required for dispatch_sync, but GCC-11 does not support it
|
||||
#if defined(__APPLE__)
|
||||
#include <dispatch/dispatch.h>
|
||||
#endif
|
||||
|
||||
@ -96,7 +97,7 @@ LOG_CHANNEL(q_debug, "QDEBUG");
|
||||
fmt::append(buf, "\nThread id = %s.", std::this_thread::get_id());
|
||||
}
|
||||
|
||||
const std::string_view text = buf.empty() ? _text : buf;
|
||||
std::string_view text = buf.empty() ? _text : buf;
|
||||
|
||||
if (s_headless)
|
||||
{
|
||||
@ -124,18 +125,16 @@ LOG_CHANNEL(q_debug, "QDEBUG");
|
||||
std::cerr << fmt::format("RPCS3: %s\n", text);
|
||||
}
|
||||
|
||||
auto show_report = [](std::string_view text)
|
||||
static auto show_report = [](std::string_view text)
|
||||
{
|
||||
fatal_error_dialog dlg(text);
|
||||
dlg.exec();
|
||||
};
|
||||
|
||||
#if defined(__APPLE__) && defined(BLOCKS) // BLOCKS is required for dispatch_sync, but GCC-11 does not support it
|
||||
// Cocoa access is not allowed outside of the main thread
|
||||
// Prevents crash dialogs from freezing the program
|
||||
#if defined(__APPLE__)
|
||||
if (!pthread_main_np())
|
||||
{
|
||||
dispatch_sync(dispatch_get_main_queue(), ^ { show_report(text); });
|
||||
dispatch_sync_f(dispatch_get_main_queue(), &text, [](void* text){ show_report(*static_cast<std::string_view*>(text)); });
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -143,9 +142,12 @@ LOG_CHANNEL(q_debug, "QDEBUG");
|
||||
// If Qt is already initialized, spawn a new RPCS3 process with an --error argument
|
||||
if (local)
|
||||
{
|
||||
// Since we only show an error, we can hope for a graceful exit
|
||||
show_report(text);
|
||||
std::exit(0);
|
||||
#ifdef _WIN32
|
||||
ExitProcess(0);
|
||||
#else
|
||||
kill(getpid(), SIGKILL);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
@ -408,7 +408,7 @@ bool cheat_engine::set_value(const u32 offset, const T value)
|
||||
|
||||
if (exec_code_at_end || exec_code_at_start)
|
||||
{
|
||||
extern void ppu_register_function_at(u32, u32, ppu_function_t);
|
||||
extern void ppu_register_function_at(u32, u32, ppu_intrp_func_t);
|
||||
|
||||
u32 addr = offset, size = sizeof(T);
|
||||
|
||||
|
@ -45,7 +45,7 @@ extern bool is_using_interpreter(u32 id_type)
|
||||
switch (id_type)
|
||||
{
|
||||
case 1: return g_cfg.core.ppu_decoder != ppu_decoder_type::llvm;
|
||||
case 2: return g_cfg.core.spu_decoder == spu_decoder_type::fast || g_cfg.core.spu_decoder == spu_decoder_type::precise;
|
||||
case 2: return g_cfg.core.spu_decoder != spu_decoder_type::asmjit && g_cfg.core.spu_decoder != spu_decoder_type::llvm;
|
||||
default: return true;
|
||||
}
|
||||
}
|
||||
|
@ -1104,16 +1104,16 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
|
||||
case emu_settings_type::PPUDecoder:
|
||||
switch (static_cast<ppu_decoder_type>(index))
|
||||
{
|
||||
case ppu_decoder_type::precise: return tr("Interpreter (precise)", "PPU decoder");
|
||||
case ppu_decoder_type::fast: return tr("Interpreter (fast)", "PPU decoder");
|
||||
case ppu_decoder_type::_static: return tr("Interpreter (static)", "PPU decoder");
|
||||
case ppu_decoder_type::dynamic: return tr("Interpreter (dynamic)", "PPU decoder");
|
||||
case ppu_decoder_type::llvm: return tr("Recompiler (LLVM)", "PPU decoder");
|
||||
}
|
||||
break;
|
||||
case emu_settings_type::SPUDecoder:
|
||||
switch (static_cast<spu_decoder_type>(index))
|
||||
{
|
||||
case spu_decoder_type::precise: return tr("Interpreter (precise)", "SPU decoder");
|
||||
case spu_decoder_type::fast: return tr("Interpreter (fast)", "SPU decoder");
|
||||
case spu_decoder_type::_static: return tr("Interpreter (static)", "SPU decoder");
|
||||
case spu_decoder_type::dynamic: return tr("Interpreter (dynamic)", "SPU decoder");
|
||||
case spu_decoder_type::asmjit: return tr("Recompiler (ASMJIT)", "SPU decoder");
|
||||
case spu_decoder_type::llvm: return tr("Recompiler (LLVM)", "SPU decoder");
|
||||
}
|
||||
|
@ -23,8 +23,6 @@ enum class emu_settings_type
|
||||
AccurateGETLLAR,
|
||||
AccurateSpuDMA,
|
||||
AccurateClineStores,
|
||||
AccurateLLVMdfma,
|
||||
AccurateVectorNaN,
|
||||
AccurateRSXAccess,
|
||||
AccurateXFloat,
|
||||
AccuratePPU128Loop,
|
||||
@ -40,7 +38,12 @@ enum class emu_settings_type
|
||||
ClocksScale,
|
||||
PerformanceReport,
|
||||
FullWidthAVX512,
|
||||
PPULLVMJavaModeHandling,
|
||||
AccurateDFMA,
|
||||
AccuratePPUSAT,
|
||||
AccuratePPUNJ,
|
||||
FixupPPUVNAN,
|
||||
AccuratePPUVNAN,
|
||||
AccuratePPUFPCC,
|
||||
|
||||
// Graphics
|
||||
Renderer,
|
||||
@ -178,8 +181,6 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
|
||||
{ emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}},
|
||||
{ emu_settings_type::AccurateSpuDMA, { "Core", "Accurate SPU DMA"}},
|
||||
{ emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}},
|
||||
{ emu_settings_type::AccurateLLVMdfma, { "Core", "LLVM Accurate DFMA"}},
|
||||
{ emu_settings_type::AccurateVectorNaN, { "Core", "PPU LLVM Accurate Vector NaN values"}},
|
||||
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
|
||||
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
|
||||
{ emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}},
|
||||
@ -194,7 +195,12 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
|
||||
{ emu_settings_type::PerformanceReport, { "Core", "Enable Performance Report"}},
|
||||
{ emu_settings_type::FullWidthAVX512, { "Core", "Full Width AVX-512"}},
|
||||
{ emu_settings_type::NumPPUThreads, { "Core", "PPU Threads"}},
|
||||
{ emu_settings_type::PPULLVMJavaModeHandling, { "Core", "PPU LLVM Java Mode Handling"}},
|
||||
{ emu_settings_type::AccurateDFMA, { "Core", "Use Accurate DFMA"}},
|
||||
{ emu_settings_type::AccuratePPUSAT, { "Core", "PPU Set Saturation Bit"}},
|
||||
{ emu_settings_type::AccuratePPUNJ, { "Core", "PPU Use Non-Java Mode Bit"}},
|
||||
{ emu_settings_type::FixupPPUVNAN, { "Core", "PPU Fixup Vector NaN Values"}},
|
||||
{ emu_settings_type::AccuratePPUVNAN, { "Core", "PPU Accurate Vector NaN Values"}},
|
||||
{ emu_settings_type::AccuratePPUFPCC, { "Core", "PPU Set FPCC Bits"}},
|
||||
|
||||
// Graphics Tab
|
||||
{ emu_settings_type::Renderer, { "Video", "Renderer"}},
|
||||
|
@ -319,26 +319,26 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
}
|
||||
|
||||
// PPU tool tips
|
||||
SubscribeTooltip(ui->ppu_precise, tooltips.settings.ppu_precise);
|
||||
SubscribeTooltip(ui->ppu_fast, tooltips.settings.ppu_fast);
|
||||
SubscribeTooltip(ui->ppu__static, tooltips.settings.ppu__static);
|
||||
SubscribeTooltip(ui->ppu_dynamic, tooltips.settings.ppu_dynamic);
|
||||
SubscribeTooltip(ui->ppu_llvm, tooltips.settings.ppu_llvm);
|
||||
|
||||
QButtonGroup *ppu_bg = new QButtonGroup(this);
|
||||
ppu_bg->addButton(ui->ppu_precise, static_cast<int>(ppu_decoder_type::precise));
|
||||
ppu_bg->addButton(ui->ppu_fast, static_cast<int>(ppu_decoder_type::fast));
|
||||
ppu_bg->addButton(ui->ppu__static, static_cast<int>(ppu_decoder_type::_static));
|
||||
ppu_bg->addButton(ui->ppu_dynamic, static_cast<int>(ppu_decoder_type::dynamic));
|
||||
ppu_bg->addButton(ui->ppu_llvm, static_cast<int>(ppu_decoder_type::llvm));
|
||||
|
||||
m_emu_settings->EnhanceRadioButton(ppu_bg, emu_settings_type::PPUDecoder);
|
||||
|
||||
// SPU tool tips
|
||||
SubscribeTooltip(ui->spu_precise, tooltips.settings.spu_precise);
|
||||
SubscribeTooltip(ui->spu_fast, tooltips.settings.spu_fast);
|
||||
SubscribeTooltip(ui->spu__static, tooltips.settings.spu__static);
|
||||
SubscribeTooltip(ui->spu_dynamic, tooltips.settings.spu_dynamic);
|
||||
SubscribeTooltip(ui->spu_asmjit, tooltips.settings.spu_asmjit);
|
||||
SubscribeTooltip(ui->spu_llvm, tooltips.settings.spu_llvm);
|
||||
|
||||
QButtonGroup *spu_bg = new QButtonGroup(this);
|
||||
spu_bg->addButton(ui->spu_precise, static_cast<int>(spu_decoder_type::precise));
|
||||
spu_bg->addButton(ui->spu_fast, static_cast<int>(spu_decoder_type::fast));
|
||||
spu_bg->addButton(ui->spu__static, static_cast<int>(spu_decoder_type::_static));
|
||||
spu_bg->addButton(ui->spu_dynamic, static_cast<int>(spu_decoder_type::dynamic));
|
||||
spu_bg->addButton(ui->spu_asmjit, static_cast<int>(spu_decoder_type::asmjit));
|
||||
spu_bg->addButton(ui->spu_llvm, static_cast<int>(spu_decoder_type::llvm));
|
||||
|
||||
@ -349,17 +349,24 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
ui->accurateXFloat->setEnabled(checked);
|
||||
});
|
||||
|
||||
connect(ui->spu_fast, &QAbstractButton::toggled, [this](bool checked)
|
||||
connect(ui->spu__static, &QAbstractButton::toggled, [this](bool checked)
|
||||
{
|
||||
ui->accurateXFloat->setEnabled(checked);
|
||||
});
|
||||
|
||||
ui->accurateXFloat->setEnabled(ui->spu_llvm->isChecked() || ui->spu_fast->isChecked());
|
||||
connect(ui->spu_dynamic, &QAbstractButton::toggled, [this](bool checked)
|
||||
{
|
||||
ui->accurateXFloat->setEnabled(checked);
|
||||
});
|
||||
|
||||
ui->accurateXFloat->setEnabled(ui->spu_llvm->isChecked() || ui->spu_dynamic->isChecked());
|
||||
|
||||
#ifndef LLVM_AVAILABLE
|
||||
ui->ppu_llvm->setEnabled(false);
|
||||
ui->spu_llvm->setEnabled(false);
|
||||
ui->spu_dynamic->setEnabled(false);
|
||||
#endif
|
||||
ui->ppu_dynamic->setEnabled(false);
|
||||
|
||||
// _____ _____ _ _ _______ _
|
||||
// / ____| __ \| | | | |__ __| | |
|
||||
@ -1138,12 +1145,24 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
m_emu_settings->EnhanceCheckBox(ui->debugConsoleMode, emu_settings_type::DebugConsoleMode);
|
||||
SubscribeTooltip(ui->debugConsoleMode, tooltips.settings.debug_console_mode);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->accurateLLVMdfma, emu_settings_type::AccurateLLVMdfma);
|
||||
SubscribeTooltip(ui->accurateLLVMdfma, tooltips.settings.accurate_llvm_dfma);
|
||||
ui->accurateLLVMdfma->setDisabled(utils::has_fma3() || utils::has_fma4());
|
||||
m_emu_settings->EnhanceCheckBox(ui->accurateDFMA, emu_settings_type::AccurateDFMA);
|
||||
SubscribeTooltip(ui->accurateDFMA, tooltips.settings.accurate_dfma);
|
||||
ui->accurateDFMA->setDisabled(utils::has_fma3() || utils::has_fma4());
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->AccurateVectorNaN, emu_settings_type::AccurateVectorNaN);
|
||||
SubscribeTooltip(ui->AccurateVectorNaN, tooltips.settings.accurate_vector_nan);
|
||||
m_emu_settings->EnhanceCheckBox(ui->accuratePPUSAT, emu_settings_type::AccuratePPUSAT);
|
||||
SubscribeTooltip(ui->accuratePPUSAT, tooltips.settings.accurate_ppusat);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->accuratePPUNJ, emu_settings_type::AccuratePPUNJ);
|
||||
SubscribeTooltip(ui->accuratePPUNJ, tooltips.settings.accurate_ppunj);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->fixupPPUVNAN, emu_settings_type::FixupPPUVNAN);
|
||||
SubscribeTooltip(ui->fixupPPUVNAN, tooltips.settings.fixup_ppuvnan);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->accuratePPUVNAN, emu_settings_type::AccuratePPUVNAN);
|
||||
SubscribeTooltip(ui->accuratePPUVNAN, tooltips.settings.accurate_ppuvnan);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->accuratePPUFPCC, emu_settings_type::AccuratePPUFPCC);
|
||||
SubscribeTooltip(ui->accuratePPUFPCC, tooltips.settings.accurate_ppufpcc);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->silenceAllLogs, emu_settings_type::SilenceAllLogs);
|
||||
SubscribeTooltip(ui->silenceAllLogs, tooltips.settings.silence_all_logs);
|
||||
@ -1927,9 +1946,6 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
m_emu_settings->EnhanceCheckBox(ui->accurateRSXAccess, emu_settings_type::AccurateRSXAccess);
|
||||
SubscribeTooltip(ui->accurateRSXAccess, tooltips.settings.accurate_rsx_access);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->ppuLlvmJavaModeHandling, emu_settings_type::PPULLVMJavaModeHandling);
|
||||
SubscribeTooltip(ui->ppuLlvmJavaModeHandling, tooltips.settings.ppu_llvm_java_mode_handling);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->ppuPrecompilation, emu_settings_type::PPULLVMPrecompilation);
|
||||
SubscribeTooltip(ui->ppuPrecompilation, tooltips.settings.ppu_precompilation);
|
||||
|
||||
|
@ -57,16 +57,16 @@
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="ppu_layout">
|
||||
<item>
|
||||
<widget class="QRadioButton" name="ppu_precise">
|
||||
<widget class="QRadioButton" name="ppu__static">
|
||||
<property name="text">
|
||||
<string notr="true">Interpreter (precise)</string>
|
||||
<string notr="true">Interpreter (static)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QRadioButton" name="ppu_fast">
|
||||
<widget class="QRadioButton" name="ppu_dynamic">
|
||||
<property name="text">
|
||||
<string notr="true">Interpreter (fast)</string>
|
||||
<string notr="true">Interpreter (dynamic)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -87,16 +87,16 @@
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="spu_layout">
|
||||
<item>
|
||||
<widget class="QRadioButton" name="spu_precise">
|
||||
<widget class="QRadioButton" name="spu__static">
|
||||
<property name="text">
|
||||
<string notr="true">Interpreter (precise)</string>
|
||||
<string notr="true">Interpreter (static)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QRadioButton" name="spu_fast">
|
||||
<widget class="QRadioButton" name="spu_dynamic">
|
||||
<property name="text">
|
||||
<string notr="true">Interpreter (fast)</string>
|
||||
<string notr="true">Interpreter (dynamic)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -2050,9 +2050,9 @@
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="accurateLLVMdfma">
|
||||
<widget class="QCheckBox" name="accurateDFMA">
|
||||
<property name="text">
|
||||
<string>Accurate LLVM DFMA</string>
|
||||
<string>Accurate DFMA</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -2064,16 +2064,37 @@
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="AccurateVectorNaN">
|
||||
<widget class="QCheckBox" name="accuratePPUSAT">
|
||||
<property name="text">
|
||||
<string>PPU LLVM Accurate Vector NaNs</string>
|
||||
<string>Accurate PPU Saturation Bit</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="ppuLlvmJavaModeHandling">
|
||||
<widget class="QCheckBox" name="accuratePPUNJ">
|
||||
<property name="text">
|
||||
<string>PPU LLVM Java Mode Handling</string>
|
||||
<string>Accurate PPU Non-Java Mode</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="fixupPPUVNAN">
|
||||
<property name="text">
|
||||
<string>PPU Vector NaN Fixup</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="accuratePPUVNAN">
|
||||
<property name="text">
|
||||
<string>Accurate PPU Vector NaN Handling</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="accuratePPUFPCC">
|
||||
<property name="text">
|
||||
<string>Accurate PPU Float Condition Control</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -55,12 +55,12 @@ public:
|
||||
|
||||
// cpu
|
||||
|
||||
const QString ppu_precise = tr("Interprets PPU code with absolute accuracy.\nThis is the most accurate Interpreter, but very slow to play games with.\nYou may try this as a last resort if you encounter odd bugs or crashes.\nIf unsure, use PPU Interpreter Fast or PPU Recompiler (LLVM).");
|
||||
const QString ppu_fast = tr("Interprets PPU code with sacrificed accuracy in order to achieve better performance.\nThis is the fastest interpreter.\nIt very rarely breaks games even in comparison to the Precise option.\nTry this if PPU Recompiler (LLVM) fails.");
|
||||
const QString ppu__static = tr("Interpreter (slow). Try this if PPU Recompiler (LLVM) doesn't work.");
|
||||
const QString ppu_dynamic = tr("Alternative interpreter (slow). May be faster than static interpreter. Try this if PPU Recompiler (LLVM) doesn't work.");
|
||||
const QString ppu_llvm = tr("Recompiles and caches the game's PPU code using the LLVM Recompiler once before running it for the first time.\nThis is by far the fastest option and should always be used.\nShould you face compatibility issues, fall back to one of the Interpreters and retry.\nIf unsure, use this option.");
|
||||
const QString ppu_precompilation = tr("Searches the game's directory and precompiles extra PPU modules during boot.\nIf disabled, these modules will only be compiled when needed. Depending on the game, this might interrupt the gameplay unexpectedly and possibly frequently.\nOnly disable this if you want to get ingame more quickly.");
|
||||
const QString spu_precise = tr("Interprets SPU code with absolute accuracy.\nThis is extremely slow but may fix broken graphics in some games.");
|
||||
const QString spu_fast = tr("Interprets SPU code with sacrificed accuracy in order to achieve better performance.\nThis is slower than the SPU Recompiler but significantly faster than the precise interpreter.\nHowever, games rarely need this.");
|
||||
const QString spu__static = tr("Interpreter (slow). Try this if SPU Recompiler (LLVM) doesn't work.");
|
||||
const QString spu_dynamic = tr("Alternative interpreter (slow). May be faster than static interpreter. Try this if SPU Recompiler (LLVM) doesn't work.");
|
||||
const QString spu_asmjit = tr("Recompiles the game's SPU code using the ASMJIT Recompiler.\nThis is the fast option with very good compatibility.\nIf unsure, use this option.");
|
||||
const QString spu_llvm = tr("Recompiles and caches the game's SPU code using the LLVM Recompiler before running which adds extra start-up time.\nThis is the fastest option with very good compatibility.\nIf you experience issues, use the ASMJIT Recompiler.");
|
||||
const QString accurate_xfloat = tr("Adds extra accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Fast or LLVM.");
|
||||
@ -70,6 +70,12 @@ public:
|
||||
const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility.");
|
||||
const QString preferred_spu_threads = tr("Some SPU stages are sensitive to race conditions and allowing a limited number at a time helps alleviate performance stalls.\nSetting this to a smaller value might improve performance and reduce stuttering in some games.\nLeave this on auto if performance is negatively affected when setting a small value.");
|
||||
const QString full_width_avx512 = tr("Enables the use of code with full width AVX-512.\nThis code can be executed much faster, but may cause a loss in performance if your CPU model experiences downclocking on wide AVX-512 loads.\nNote that AVX-512 instructions will be used regardless of this option, just at 128 and 256 bit width.");
|
||||
const QString accurate_dfma = tr("Use accurate double-precision FMA instructions in PPU and SPU backends.\nWhile disabling it might give a decent performance boost if your CPU doesn't support FMA, it may also introduce subtle bugs that otherwise do not occur.\nYou shouldn't disable it if your CPU supports FMA.");
|
||||
const QString accurate_ppusat = tr("Accurately set Saturation Bit values in PPU backends.\nIf unsure, do not modify this setting.");
|
||||
const QString accurate_ppunj = tr("Respect Non-Java Mode Bit values for vector ops in PPU backends.\nIf unsure, do not modify this setting.");
|
||||
const QString fixup_ppuvnan = tr("Fixup NaN results in vector instructions in PPU backends.\nIf unsure, do not modify this setting.");
|
||||
const QString accurate_ppuvnan = tr("Accurately set NaN results in vector instructions in PPU backends.\nIf unsure, do not modify this setting.");
|
||||
const QString accurate_ppufpcc = tr("Accurately set FPCC Bits in PPU backends.\nIf unsure, do not modify this setting.");
|
||||
|
||||
// debug
|
||||
|
||||
@ -80,8 +86,6 @@ public:
|
||||
const QString accurate_getllar = tr("Accurately processes SPU MFC_GETLLAR operation.");
|
||||
const QString accurate_spu_dma = tr("Accurately processes SPU DMA operations.");
|
||||
const QString accurate_cache_line_stores = tr("Accurately processes PPU DCBZ instruction.\nIn addition, when combined with Accurate SPU DMA, SPU PUT cache line accesses will be processed atomically.");
|
||||
const QString accurate_llvm_dfma = tr("Provides extra accuracy on FMA instructions at the cost of performance.\nWhile disabling it might give a decent performance boost if your CPU doesn't support FMA, it may also introduce subtle bugs that otherwise do not occur.\nYou can't disable it if your CPU supports FMA.");
|
||||
const QString accurate_vector_nan = tr("Forces the floating point NaN (Not A Number) values outputted from PPU vector instructions to be accurate to the real hardware. (0x7FC00000)");
|
||||
const QString accurate_rsx_access = tr("Forces RSX pauses on SPU MFC_GETLLAR and SPU MFC_PUTLLUC operations.");
|
||||
const QString mfc_delay_command = tr("Forces delaying any odd MFC command, waits for at least 2 pending commands to execute them in a random order.\nMust be used with either SPU interpreters currently.\nSeverely degrades performance! If unsure, don't use this option.");
|
||||
const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental.");
|
||||
@ -101,7 +105,6 @@ public:
|
||||
const QString accurate_ppu_128_loop = tr("When enabled, PPU atomic operations will operate on entire cache line data, as opposed to a single 64bit block of memory when disabled.\nNumerical values control whether or not to enable the accurate version based on the atomic operation's length.");
|
||||
const QString enable_performance_report = tr("Measure certain events and print a chart after the emulator is stopped. Don't enable if not asked to.");
|
||||
const QString num_ppu_threads = tr("Affects maximum amount of PPU threads running concurrently, the value of 1 has very low compatibility with games.\n2 is the default, if unsure do not modify this setting.");
|
||||
const QString ppu_llvm_java_mode_handling = tr("Respect current Java Mode for alti-vec ops by PPU LLVM.\nIf unsure, do not modify this setting.");
|
||||
|
||||
// emulator
|
||||
|
||||
|
@ -1,15 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/types.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include <functional>
|
||||
|
||||
extern bool g_use_rtm;
|
||||
extern u64 g_rtm_tx_limit1;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
extern "C"
|
||||
{
|
||||
u64 __rdtsc();
|
||||
u32 _xbegin();
|
||||
void _xend();
|
||||
void _mm_pause();
|
||||
@ -27,24 +27,17 @@ extern "C"
|
||||
|
||||
s64 _div128(s64, s64, s64, s64*);
|
||||
u64 _udiv128(u64, u64, u64, u64*);
|
||||
void __debugbreak();
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace utils
|
||||
{
|
||||
inline u64 get_tsc()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
return __builtin_ia32_rdtsc();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Transaction helper (result = pair of success and op result, or just bool)
|
||||
template <typename F, typename R = std::invoke_result_t<F>>
|
||||
inline auto tx_start(F op)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
uint status = -1;
|
||||
|
||||
for (auto stamp0 = get_tsc(), stamp1 = stamp0; g_use_rtm && stamp1 - stamp0 <= g_rtm_tx_limit1; stamp1 = get_tsc())
|
||||
@ -90,6 +83,9 @@ namespace utils
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static_cast<void>(op);
|
||||
#endif
|
||||
|
||||
if constexpr (std::is_void_v<R>)
|
||||
{
|
||||
@ -113,7 +109,7 @@ namespace utils
|
||||
const u64 value = reinterpret_cast<u64>(func);
|
||||
const void* ptr = reinterpret_cast<const void*>(value);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
return _mm_prefetch(static_cast<const char*>(ptr), 2);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 0, 2);
|
||||
@ -128,7 +124,7 @@ namespace utils
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
return _mm_prefetch(static_cast<const char*>(ptr), 3);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 0, 3);
|
||||
@ -142,7 +138,7 @@ namespace utils
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
return _m_prefetchw(ptr);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 1, 0);
|
||||
@ -160,8 +156,10 @@ namespace utils
|
||||
return _rotl8(x, n);
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft8(x, n);
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rolqi(x, n);
|
||||
#else
|
||||
return (x << (n & 7)) | (x >> ((-n & 7)));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -176,8 +174,10 @@ namespace utils
|
||||
return _rotl16(x, static_cast<uchar>(n));
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft16(x, n);
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rolhi(x, n);
|
||||
#else
|
||||
return (x << (n & 15)) | (x >> ((-n & 15)));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -344,10 +344,14 @@ namespace utils
|
||||
|
||||
inline void pause()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#if defined(ARCH_ARM64)
|
||||
__asm__ volatile("yield");
|
||||
#elif defined(_M_X64)
|
||||
_mm_pause();
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
__builtin_ia32_pause();
|
||||
#else
|
||||
#error "Missing utils::pause() implementation"
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -391,10 +395,27 @@ namespace utils
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return (T*)ptr;
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
T* result;
|
||||
__asm__("movq %1, %0;" : "=r" (result) : "r" (ptr) : "memory");
|
||||
return result;
|
||||
#elif defined(ARCH_ARM64)
|
||||
T* result;
|
||||
__asm__("mov %0, %1" : "=r" (result) : "r" (ptr) : "memory");
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void trap()
|
||||
{
|
||||
#ifdef _M_X64
|
||||
__debugbreak();
|
||||
#elif defined(ARCH_X64)
|
||||
__asm__ volatile("int3");
|
||||
#elif defined(ARCH_ARM64)
|
||||
__asm__ volatile("brk 0x42");
|
||||
#else
|
||||
#error "Missing utils::trap() implementation"
|
||||
#endif
|
||||
}
|
||||
} // namespace utils
|
||||
|
@ -35,6 +35,7 @@ namespace utils
|
||||
|
||||
#include "asm.hpp"
|
||||
#include "endian.hpp"
|
||||
#include "tsc.hpp"
|
||||
|
||||
// Total number of entries.
|
||||
static constexpr usz s_hashtable_size = 1u << 17;
|
||||
@ -804,17 +805,9 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
extern "C" u64 __rdtsc();
|
||||
#endif
|
||||
|
||||
u64 utils::get_unique_tsc()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
const u64 stamp0 = __rdtsc();
|
||||
#else
|
||||
const u64 stamp0 = __builtin_ia32_rdtsc();
|
||||
#endif
|
||||
const u64 stamp0 = utils::get_tsc();
|
||||
|
||||
return s_min_tsc.atomic_op([&](u64& tsc)
|
||||
{
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4996)
|
||||
|
||||
@ -67,7 +67,7 @@ namespace utils
|
||||
|
||||
FORCE_INLINE void atomic_fence_consume()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
_ReadWriteBarrier();
|
||||
#else
|
||||
__atomic_thread_fence(__ATOMIC_CONSUME);
|
||||
@ -76,7 +76,7 @@ FORCE_INLINE void atomic_fence_consume()
|
||||
|
||||
FORCE_INLINE void atomic_fence_acquire()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
_ReadWriteBarrier();
|
||||
#else
|
||||
__atomic_thread_fence(__ATOMIC_ACQUIRE);
|
||||
@ -85,7 +85,7 @@ FORCE_INLINE void atomic_fence_acquire()
|
||||
|
||||
FORCE_INLINE void atomic_fence_release()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
_ReadWriteBarrier();
|
||||
#else
|
||||
__atomic_thread_fence(__ATOMIC_RELEASE);
|
||||
@ -94,7 +94,7 @@ FORCE_INLINE void atomic_fence_release()
|
||||
|
||||
FORCE_INLINE void atomic_fence_acq_rel()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
_ReadWriteBarrier();
|
||||
#else
|
||||
__atomic_thread_fence(__ATOMIC_ACQ_REL);
|
||||
@ -103,16 +103,18 @@ FORCE_INLINE void atomic_fence_acq_rel()
|
||||
|
||||
FORCE_INLINE void atomic_fence_seq_cst()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
_ReadWriteBarrier();
|
||||
_InterlockedOr(static_cast<long*>(_AddressOfReturnAddress()), 0);
|
||||
_ReadWriteBarrier();
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
__asm__ volatile ("lock orl $0, 0(%%rsp);" ::: "cc", "memory");
|
||||
#else
|
||||
__atomic_thread_fence(__ATOMIC_SEQ_CST);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
@ -342,7 +344,7 @@ struct atomic_storage
|
||||
|
||||
using type = get_uint_t<sizeof(T)>;
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#ifndef _M_X64
|
||||
|
||||
#if defined(__ATOMIC_HLE_ACQUIRE) && defined(__ATOMIC_HLE_RELEASE)
|
||||
static constexpr int s_hle_ack = __ATOMIC_SEQ_CST | __ATOMIC_HLE_ACQUIRE;
|
||||
@ -472,7 +474,7 @@ struct atomic_storage
|
||||
|
||||
/* Second part: MSVC-specific */
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
static inline T add_fetch(T& dest, T value)
|
||||
{
|
||||
return atomic_storage<T>::fetch_add(dest, value) + value;
|
||||
@ -529,6 +531,7 @@ struct atomic_storage
|
||||
|
||||
static inline bool bts(T& dest, uint bit)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
uchar* dst = reinterpret_cast<uchar*>(&dest);
|
||||
|
||||
if constexpr (sizeof(T) < 4)
|
||||
@ -539,18 +542,23 @@ struct atomic_storage
|
||||
bit = bit + (ptr & 3) * 8;
|
||||
dst = reinterpret_cast<T*>(ptr & -4);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
return _interlockedbittestandset((long*)dst, bit) != 0;
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
bool result;
|
||||
__asm__ volatile ("lock btsl %2, 0(%1)\n" : "=@ccc" (result) : "r" (dst), "Ir" (bit) : "cc", "memory");
|
||||
return result;
|
||||
#else
|
||||
const T value = static_cast<T>(1) << bit;
|
||||
return (__atomic_fetch_or(&dest, value, __ATOMIC_SEQ_CST) & value) != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool btr(T& dest, uint bit)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
uchar* dst = reinterpret_cast<uchar*>(&dest);
|
||||
|
||||
if constexpr (sizeof(T) < 4)
|
||||
@ -561,18 +569,23 @@ struct atomic_storage
|
||||
bit = bit + (ptr & 3) * 8;
|
||||
dst = reinterpret_cast<T*>(ptr & -4);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
return _interlockedbittestandreset((long*)dst, bit) != 0;
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
bool result;
|
||||
__asm__ volatile ("lock btrl %2, 0(%1)\n" : "=@ccc" (result) : "r" (dst), "Ir" (bit) : "cc", "memory");
|
||||
return result;
|
||||
#else
|
||||
const T value = static_cast<T>(1) << bit;
|
||||
return (__atomic_fetch_and(&dest, ~value, __ATOMIC_SEQ_CST) & value) != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool btc(T& dest, uint bit)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
uchar* dst = reinterpret_cast<uchar*>(&dest);
|
||||
|
||||
if constexpr (sizeof(T) < 4)
|
||||
@ -583,8 +596,9 @@ struct atomic_storage
|
||||
bit = bit + (ptr & 3) * 8;
|
||||
dst = reinterpret_cast<T*>(ptr & -4);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
while (true)
|
||||
{
|
||||
// Keep trying until we actually invert desired bit
|
||||
@ -593,10 +607,13 @@ struct atomic_storage
|
||||
if (_interlockedbittestandreset((long*)dst, bit))
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
bool result;
|
||||
__asm__ volatile ("lock btcl %2, 0(%1)\n" : "=@ccc" (result) : "r" (dst), "Ir" (bit) : "cc", "memory");
|
||||
return result;
|
||||
#else
|
||||
const T value = static_cast<T>(1) << bit;
|
||||
return (__atomic_fetch_xor(&dest, value, __ATOMIC_SEQ_CST) & value) != 0;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
@ -606,7 +623,7 @@ struct atomic_storage
|
||||
template <typename T>
|
||||
struct atomic_storage<T, 1> : atomic_storage<T, 0>
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
||||
{
|
||||
const char v = std::bit_cast<char>(comp);
|
||||
@ -676,7 +693,7 @@ struct atomic_storage<T, 1> : atomic_storage<T, 0>
|
||||
template <typename T>
|
||||
struct atomic_storage<T, 2> : atomic_storage<T, 0>
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
||||
{
|
||||
const short v = std::bit_cast<short>(comp);
|
||||
@ -758,7 +775,7 @@ struct atomic_storage<T, 2> : atomic_storage<T, 0>
|
||||
template <typename T>
|
||||
struct atomic_storage<T, 4> : atomic_storage<T, 0>
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
||||
{
|
||||
const long v = std::bit_cast<long>(comp);
|
||||
@ -854,7 +871,7 @@ struct atomic_storage<T, 4> : atomic_storage<T, 0>
|
||||
template <typename T>
|
||||
struct atomic_storage<T, 8> : atomic_storage<T, 0>
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
||||
{
|
||||
const llong v = std::bit_cast<llong>(comp);
|
||||
@ -950,7 +967,7 @@ struct atomic_storage<T, 8> : atomic_storage<T, 0>
|
||||
template <typename T>
|
||||
struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_X64
|
||||
static inline T load(const T& dest)
|
||||
{
|
||||
atomic_fence_acquire();
|
||||
@ -995,7 +1012,7 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
||||
utils::atomic_store16(&dest, std::bit_cast<u128>(value));
|
||||
atomic_fence_release();
|
||||
}
|
||||
#else
|
||||
#elif defined(ARCH_X64)
|
||||
static inline T load(const T& dest)
|
||||
{
|
||||
alignas(16) T r;
|
||||
@ -1078,6 +1095,91 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
||||
__asm__ volatile("movdqa %0, %1;" :: "x" (val), "m" (dest) : "memory");
|
||||
#endif
|
||||
}
|
||||
#elif defined(ARCH_ARM64)
|
||||
static inline T load(const T& dest)
|
||||
{
|
||||
u32 tmp;
|
||||
u64 data[2];
|
||||
__asm__ volatile("1:\n"
|
||||
"ldaxp %x[data0], %x[data1], %[dest]\n"
|
||||
"stlxp %w[tmp], %x[data0], %x[data1], %[dest]\n"
|
||||
"cbnz %w[tmp], 1b\n"
|
||||
: [tmp] "=&r" (tmp), [data0] "=&r" (data[0]), [data1] "=&r" (data[1])
|
||||
: [dest] "Q" (dest)
|
||||
: "memory"
|
||||
);
|
||||
T result;
|
||||
std::memcpy(&result, data, 16);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline T observe(const T& dest)
|
||||
{
|
||||
// TODO
|
||||
return load(dest);
|
||||
}
|
||||
|
||||
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
||||
{
|
||||
bool result;
|
||||
u64 cmp[2];
|
||||
std::memcpy(cmp, &comp, 16);
|
||||
u64 data[2];
|
||||
std::memcpy(data, &exch, 16);
|
||||
u64 prev[2];
|
||||
__asm__ volatile("1:\n"
|
||||
"ldaxp %x[prev0], %x[prev1], %[storage]\n"
|
||||
"cmp %x[prev0], %x[cmp0]\n"
|
||||
"ccmp %x[prev1], %x[cmp1], #0, eq\n"
|
||||
"b.ne 2f\n"
|
||||
"stlxp %w[result], %x[data0], %x[data1], %[storage]\n"
|
||||
"cbnz %w[result], 1b\n"
|
||||
"2:\n"
|
||||
"cset %w[result], eq\n"
|
||||
: [result] "=&r" (result), [storage] "+Q" (dest), [prev0] "=&r" (prev[0]), [prev1] "=&r" (prev[1])
|
||||
: [data0] "r" (data[0]), [data1] "r" (data[1]), [cmp0] "r" (cmp[0]), [cmp1] "r" (cmp[1])
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
||||
if (result)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
std::memcpy(&comp, prev, 16);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline T exchange(T& dest, T value)
|
||||
{
|
||||
u32 tmp;
|
||||
u64 src[2];
|
||||
u64 data[2];
|
||||
std::memcpy(src, &value, 16);
|
||||
__asm__ volatile("1:\n"
|
||||
"ldaxp %x[data0], %x[data1], %[dest]\n"
|
||||
"stlxp %w[tmp], %x[src0], %x[src1], %[dest]\n"
|
||||
"cbnz %w[tmp], 1b\n"
|
||||
: [tmp] "=&r" (tmp), [dest] "+Q" (dest), [data0] "=&r" (data[0]), [data1] "=&r" (data[1])
|
||||
: [src0] "r" (src[0]), [src1] "r" (src[1])
|
||||
: "memory"
|
||||
);
|
||||
T result;
|
||||
std::memcpy(&result, data, 16);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void store(T& dest, T value)
|
||||
{
|
||||
// TODO
|
||||
exchange(dest, value);
|
||||
}
|
||||
|
||||
static inline void release(T& dest, T value)
|
||||
{
|
||||
// TODO
|
||||
exchange(dest, value);
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO
|
||||
@ -1562,17 +1664,50 @@ public:
|
||||
|
||||
bool bit_test_set(uint bit)
|
||||
{
|
||||
return atomic_storage<type>::bts(m_data, bit & (sizeof(T) * 8 - 1));
|
||||
if constexpr (std::is_integral<type>::value)
|
||||
{
|
||||
return atomic_storage<type>::bts(m_data, bit & (sizeof(T) * 8 - 1));
|
||||
}
|
||||
|
||||
return atomic_op([](type& v)
|
||||
{
|
||||
const auto old = v;
|
||||
const auto bit = type(1) << (sizeof(T) * 8 - 1);
|
||||
v |= bit;
|
||||
return !!(old & bit);
|
||||
});
|
||||
}
|
||||
|
||||
bool bit_test_reset(uint bit)
|
||||
{
|
||||
return atomic_storage<type>::btr(m_data, bit & (sizeof(T) * 8 - 1));
|
||||
if constexpr (std::is_integral<type>::value)
|
||||
{
|
||||
return atomic_storage<type>::btr(m_data, bit & (sizeof(T) * 8 - 1));
|
||||
}
|
||||
|
||||
return atomic_op([](type& v)
|
||||
{
|
||||
const auto old = v;
|
||||
const auto bit = type(1) << (sizeof(T) * 8 - 1);
|
||||
v &= ~bit;
|
||||
return !!(old & bit);
|
||||
});
|
||||
}
|
||||
|
||||
bool bit_test_invert(uint bit)
|
||||
{
|
||||
return atomic_storage<type>::btc(m_data, bit & (sizeof(T) * 8 - 1));
|
||||
if constexpr (std::is_integral<type>::value)
|
||||
{
|
||||
return atomic_storage<type>::btc(m_data, bit & (sizeof(T) * 8 - 1));
|
||||
}
|
||||
|
||||
return atomic_op([](type& v)
|
||||
{
|
||||
const auto old = v;
|
||||
const auto bit = type(1) << (sizeof(T) * 8 - 1);
|
||||
v ^= bit;
|
||||
return !!(old & bit);
|
||||
});
|
||||
}
|
||||
|
||||
// Timeout is discouraged
|
||||
|
24
rpcs3/util/fence.hpp
Normal file
24
rpcs3/util/fence.hpp
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/types.hpp"
|
||||
|
||||
#ifdef _M_X64
|
||||
extern "C" void _mm_lfence();
|
||||
#endif
|
||||
|
||||
namespace utils
|
||||
{
|
||||
inline void lfence()
|
||||
{
|
||||
#ifdef _M_X64
|
||||
_mm_lfence();
|
||||
#elif defined(ARCH_X64)
|
||||
__builtin_ia32_lfence();
|
||||
#elif defined(ARCH_ARM64)
|
||||
// TODO
|
||||
__asm__ volatile("isb");
|
||||
#else
|
||||
#error "Missing lfence() implementation"
|
||||
#endif
|
||||
}
|
||||
}
|
@ -19,10 +19,10 @@ namespace stx
|
||||
class atomic_ptr;
|
||||
|
||||
// Basic assumption of userspace pointer size
|
||||
constexpr uint c_ptr_size = 47;
|
||||
constexpr uint c_ptr_size = 48;
|
||||
|
||||
// Use lower 17 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted)
|
||||
constexpr uint c_ref_mask = 0x1ffff, c_ref_size = 17;
|
||||
constexpr uint c_ref_mask = 0xffff, c_ref_size = 16;
|
||||
|
||||
// Remaining pointer bits
|
||||
constexpr uptr c_ptr_mask = static_cast<uptr>(-1) << c_ref_size;
|
||||
|
2143
rpcs3/util/simd.hpp
Normal file
2143
rpcs3/util/simd.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -19,15 +19,14 @@
|
||||
#endif
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "util/fence.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
extern "C"
|
||||
{
|
||||
u64 _xgetbv(u32);
|
||||
}
|
||||
#ifdef _M_X64
|
||||
extern "C" u64 _xgetbv(u32);
|
||||
#endif
|
||||
|
||||
inline std::array<u32, 4> utils::get_cpuid(u32 func, u32 subfunc)
|
||||
#if defined(ARCH_X64)
|
||||
static inline std::array<u32, 4> get_cpuid(u32 func, u32 subfunc)
|
||||
{
|
||||
int regs[4];
|
||||
#ifdef _MSC_VER
|
||||
@ -38,7 +37,7 @@ inline std::array<u32, 4> utils::get_cpuid(u32 func, u32 subfunc)
|
||||
return {0u+regs[0], 0u+regs[1], 0u+regs[2], 0u+regs[3]};
|
||||
}
|
||||
|
||||
inline u64 utils::get_xgetbv(u32 xcr)
|
||||
static inline u64 get_xgetbv(u32 xcr)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return _xgetbv(xcr);
|
||||
@ -48,6 +47,7 @@ inline u64 utils::get_xgetbv(u32 xcr)
|
||||
return eax | (u64(edx) << 32);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
// sysinfo_darwin.mm
|
||||
@ -61,113 +61,192 @@ namespace Darwin_Version
|
||||
|
||||
bool utils::has_ssse3()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x200;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_sse41()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x80000;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_avx()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x10000000 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0x6) == 0x6;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_avx2()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && get_cpuid(7, 0)[1] & 0x20 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0x6) == 0x6;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_rtm()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x800) == 0x800;
|
||||
return g_value;
|
||||
#elif defined(ARCH_ARM64)
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_tsx_force_abort()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[3] & 0x2000) == 0x2000;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_rtm_always_abort()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[3] & 0x800) == 0x800;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_mpx()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x4000) == 0x4000;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_avx512()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
// Check AVX512F, AVX512CD, AVX512DQ, AVX512BW, AVX512VL extensions (Skylake-X level support)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0xd0030000) == 0xd0030000 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0xe6) == 0xe6;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_avx512_icl()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
// Check AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512VPOPCNTDQ, AVX512BITALG, AVX512VNNI, AVX512VPCLMULQDQ, AVX512GFNI, AVX512VAES (Icelake-client level support)
|
||||
static const bool g_value = has_avx512() && (get_cpuid(7, 0)[1] & 0x00200000) == 0x00200000 && (get_cpuid(7, 0)[2] & 0x00005f42) == 0x00005f42;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_avx512_vnni()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
// Check AVX512VNNI
|
||||
static const bool g_value = has_avx512() && get_cpuid(7, 0)[2] & 0x00000800;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_xop()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = has_avx() && get_cpuid(0x80000001, 0)[2] & 0x800;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_clwb()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x1000000) == 0x1000000;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_invariant_tsc()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(0x80000007, 0)[3] & 0x100) == 0x100;
|
||||
return g_value;
|
||||
#elif defined(ARCH_ARM64)
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_fma3()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x1000;
|
||||
return g_value;
|
||||
#elif defined(ARCH_ARM64)
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_fma4()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(0x80000001, 0)[2] & 0x10000) == 0x10000;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_erms()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x200) == 0x200;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool utils::has_fsrm()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[3] & 0x10) == 0x10;
|
||||
return g_value;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 utils::get_rep_movsb_threshold()
|
||||
{
|
||||
static const u32 g_value = []()
|
||||
{
|
||||
u32 thresh_value = 0xFFFFFFFF;
|
||||
u32 thresh_value = umax;
|
||||
if (has_fsrm())
|
||||
{
|
||||
thresh_value = 2047;
|
||||
@ -187,6 +266,7 @@ std::string utils::get_cpu_brand()
|
||||
{
|
||||
std::string brand;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
if (get_cpuid(0x80000000, 0)[0] >= 0x80000004)
|
||||
{
|
||||
for (u32 i = 0; i < 3; i++)
|
||||
@ -198,6 +278,9 @@ std::string utils::get_cpu_brand()
|
||||
{
|
||||
brand = "Unknown CPU";
|
||||
}
|
||||
#else
|
||||
brand = "Unidentified CPU";
|
||||
#endif
|
||||
|
||||
brand.erase(brand.find_last_not_of('\0') + 1);
|
||||
brand.erase(brand.find_last_not_of(' ') + 1);
|
||||
@ -396,19 +479,6 @@ static constexpr ullong round_tsc(ullong val)
|
||||
return utils::rounded_div(val, 1'000'000) * 1'000'000;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
extern "C" void _mm_lfence();
|
||||
#endif
|
||||
|
||||
static inline void lfence()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
_mm_lfence();
|
||||
#else
|
||||
__builtin_ia32_lfence();
|
||||
#endif
|
||||
}
|
||||
|
||||
ullong utils::get_tsc_freq()
|
||||
{
|
||||
static const ullong cal_tsc = []() -> ullong
|
||||
@ -449,17 +519,17 @@ ullong utils::get_tsc_freq()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
Sleep(1);
|
||||
error_data[i] = (lfence(), utils::get_tsc());
|
||||
error_data[i] = (utils::lfence(), utils::get_tsc());
|
||||
LARGE_INTEGER ctr;
|
||||
QueryPerformanceCounter(&ctr);
|
||||
rdtsc_data[i] = (lfence(), utils::get_tsc());
|
||||
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
|
||||
timer_data[i] = ctr.QuadPart;
|
||||
#else
|
||||
usleep(200);
|
||||
error_data[i] = (lfence(), utils::get_tsc());
|
||||
error_data[i] = (utils::lfence(), utils::get_tsc());
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
rdtsc_data[i] = (lfence(), utils::get_tsc());
|
||||
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
|
||||
timer_data[i] = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
|
||||
#endif
|
||||
}
|
||||
@ -511,6 +581,7 @@ u32 utils::get_thread_count()
|
||||
|
||||
u32 utils::get_cpu_family()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const u32 g_value = []()
|
||||
{
|
||||
const u32 reg_value = get_cpuid(0x00000001, 0)[0]; // Processor feature info
|
||||
@ -528,10 +599,14 @@ u32 utils::get_cpu_family()
|
||||
}();
|
||||
|
||||
return g_value;
|
||||
#elif defined(ARCH_ARM64)
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 utils::get_cpu_model()
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
static const u32 g_value = []()
|
||||
{
|
||||
const u32 reg_value = get_cpuid(0x00000001, 0)[0]; // Processor feature info
|
||||
@ -550,16 +625,19 @@ u32 utils::get_cpu_model()
|
||||
}();
|
||||
|
||||
return g_value;
|
||||
#elif defined(ARCH_ARM64)
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace utils
|
||||
{
|
||||
extern const u64 main_tid = []() -> u64
|
||||
{
|
||||
#ifdef _WIN32
|
||||
#ifdef _WIN32
|
||||
return GetCurrentThreadId();
|
||||
#else
|
||||
#else
|
||||
return reinterpret_cast<u64>(pthread_self());
|
||||
#endif
|
||||
#endif
|
||||
}();
|
||||
}
|
||||
|
@ -5,10 +5,6 @@
|
||||
|
||||
namespace utils
|
||||
{
|
||||
std::array<u32, 4> get_cpuid(u32 func, u32 subfunc);
|
||||
|
||||
u64 get_xgetbv(u32 xcr);
|
||||
|
||||
bool has_ssse3();
|
||||
|
||||
bool has_sse41();
|
||||
@ -29,6 +25,8 @@ namespace utils
|
||||
|
||||
bool has_avx512_icl();
|
||||
|
||||
bool has_avx512_vnni();
|
||||
|
||||
bool has_xop();
|
||||
|
||||
bool has_clwb();
|
||||
|
25
rpcs3/util/tsc.hpp
Normal file
25
rpcs3/util/tsc.hpp
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/types.hpp"
|
||||
|
||||
#ifdef _M_X64
|
||||
extern "C" u64 __rdtsc();
|
||||
#endif
|
||||
|
||||
namespace utils
|
||||
{
|
||||
inline u64 get_tsc()
|
||||
{
|
||||
#if defined(ARCH_ARM64)
|
||||
u64 r = 0;
|
||||
__asm__ volatile("mrs %0, cntvct_el0" : "=r" (r));
|
||||
return r;
|
||||
#elif defined(_M_X64)
|
||||
return __rdtsc();
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rdtsc();
|
||||
#else
|
||||
#error "Missing utils::get_tsc() implementation"
|
||||
#endif
|
||||
}
|
||||
}
|
@ -12,6 +12,12 @@
|
||||
#include <memory>
|
||||
#include <bit>
|
||||
|
||||
#if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(__amd64__)
|
||||
#define ARCH_X64 1
|
||||
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
|
||||
#define ARCH_ARM64 1
|
||||
#endif
|
||||
|
||||
using std::chrono::steady_clock;
|
||||
|
||||
using namespace std::literals;
|
||||
@ -180,15 +186,15 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#ifndef _MSC_VER
|
||||
|
||||
using u128 = __uint128_t;
|
||||
using s128 = __int128_t;
|
||||
|
||||
#if defined(ARCH_X64) && !defined(_MSC_VER)
|
||||
using __m128i = long long __attribute__((vector_size(16)));
|
||||
using __m128d = double __attribute__((vector_size(16)));
|
||||
using __m128 = float __attribute__((vector_size(16)));
|
||||
#endif
|
||||
|
||||
#ifndef _MSC_VER
|
||||
using u128 = __uint128_t;
|
||||
using s128 = __int128_t;
|
||||
#else
|
||||
|
||||
extern "C"
|
||||
|
@ -2,6 +2,9 @@
|
||||
|
||||
#include "util/types.hpp"
|
||||
|
||||
template <typename T>
|
||||
concept Vector128 = (sizeof(T) == 16) && (std::is_trivial_v<T>);
|
||||
|
||||
// 128-bit vector type
|
||||
union alignas(16) v128
|
||||
{
|
||||
@ -58,39 +61,23 @@ union alignas(16) v128
|
||||
u128 _u;
|
||||
s128 _s;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
template <typename T>
|
||||
struct opaque_wrapper
|
||||
v128() = default;
|
||||
|
||||
constexpr v128(const v128&) noexcept = default;
|
||||
|
||||
template <Vector128 T>
|
||||
constexpr v128(const T& rhs) noexcept
|
||||
: v128(std::bit_cast<v128>(rhs))
|
||||
{
|
||||
u128 m_data;
|
||||
}
|
||||
|
||||
opaque_wrapper() = default;
|
||||
constexpr v128& operator=(const v128&) noexcept = default;
|
||||
|
||||
opaque_wrapper(const T& value)
|
||||
: m_data(std::bit_cast<u128>(value))
|
||||
{
|
||||
}
|
||||
|
||||
opaque_wrapper& operator=(const T& value)
|
||||
{
|
||||
m_data = std::bit_cast<u128>(value);
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator T() const
|
||||
{
|
||||
return std::bit_cast<T>(m_data);
|
||||
}
|
||||
};
|
||||
|
||||
opaque_wrapper<__m128> vf;
|
||||
opaque_wrapper<__m128i> vi;
|
||||
opaque_wrapper<__m128d> vd;
|
||||
#else
|
||||
__m128 vf;
|
||||
__m128i vi;
|
||||
__m128d vd;
|
||||
#endif
|
||||
template <Vector128 T>
|
||||
constexpr operator T() const noexcept
|
||||
{
|
||||
return std::bit_cast<T>(*this);
|
||||
}
|
||||
|
||||
using enable_bitcopy = std::true_type;
|
||||
|
||||
@ -107,6 +94,14 @@ union alignas(16) v128
|
||||
return from64(_0, _1);
|
||||
}
|
||||
|
||||
static v128 from64p(u64 value)
|
||||
{
|
||||
v128 ret;
|
||||
ret._u64[0] = value;
|
||||
ret._u64[1] = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0)
|
||||
{
|
||||
v128 ret;
|
||||
@ -132,6 +127,16 @@ union alignas(16) v128
|
||||
return ret;
|
||||
}
|
||||
|
||||
static v128 fromf32p(f32 value)
|
||||
{
|
||||
v128 ret;
|
||||
ret._f[0] = value;
|
||||
ret._f[1] = value;
|
||||
ret._f[2] = value;
|
||||
ret._f[3] = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static v128 from16p(u16 value)
|
||||
{
|
||||
v128 ret;
|
||||
@ -153,11 +158,18 @@ union alignas(16) v128
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline v128 fromV(const __m128i& value);
|
||||
|
||||
static inline v128 fromF(const __m128& value);
|
||||
|
||||
static inline v128 fromD(const __m128d& value);
|
||||
static v128 undef()
|
||||
{
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||||
#endif
|
||||
v128 ret;
|
||||
return ret;
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
||||
// Unaligned load with optional index offset
|
||||
static v128 loadu(const void* ptr, usz index = 0)
|
||||
@ -173,45 +185,13 @@ union alignas(16) v128
|
||||
std::memcpy(static_cast<u8*>(ptr) + index * sizeof(v128), &value, sizeof(v128));
|
||||
}
|
||||
|
||||
static inline v128 add8(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 add16(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 add32(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 addfs(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 addfd(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 sub8(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 sub16(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 sub32(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 subfs(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 subfd(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 maxu8(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 minu8(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 eq8(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 eq16(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 eq32(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 eq32f(const v128& left, const v128& right);
|
||||
|
||||
static inline v128 fma32f(v128 a, const v128& b, const v128& c);
|
||||
v128 operator|(const v128&) const;
|
||||
v128 operator&(const v128&) const;
|
||||
v128 operator^(const v128&) const;
|
||||
v128 operator~() const;
|
||||
|
||||
bool operator==(const v128& right) const;
|
||||
|
||||
// result = (~left) & (right)
|
||||
static inline v128 andnot(const v128& left, const v128& right);
|
||||
|
||||
void clear()
|
||||
{
|
||||
*this = {};
|
||||
@ -227,3 +207,12 @@ struct offset32_array<v128::masked_array_t<T, N, M>>
|
||||
return u32{sizeof(T)} * (static_cast<u32>(arg) ^ static_cast<u32>(M));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct std::hash<v128>
|
||||
{
|
||||
usz operator()(const v128& key) const
|
||||
{
|
||||
return key._u64[0] + key._u64[1];
|
||||
}
|
||||
};
|
||||
|
@ -1,178 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/types.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
inline bool v128_use_fma = utils::has_fma3();
|
||||
|
||||
inline v128 v128::fromV(const __m128i& value)
|
||||
{
|
||||
v128 ret;
|
||||
ret.vi = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline v128 v128::fromF(const __m128& value)
|
||||
{
|
||||
v128 ret;
|
||||
ret.vf = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline v128 v128::fromD(const __m128d& value)
|
||||
{
|
||||
v128 ret;
|
||||
ret.vd = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline v128 v128::add8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_add_epi8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::add16(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_add_epi16(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::add32(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_add_epi32(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::addfs(const v128& left, const v128& right)
|
||||
{
|
||||
return fromF(_mm_add_ps(left.vf, right.vf));
|
||||
}
|
||||
|
||||
inline v128 v128::addfd(const v128& left, const v128& right)
|
||||
{
|
||||
return fromD(_mm_add_pd(left.vd, right.vd));
|
||||
}
|
||||
|
||||
inline v128 v128::sub8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_sub_epi8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::sub16(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_sub_epi16(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::sub32(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_sub_epi32(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::subfs(const v128& left, const v128& right)
|
||||
{
|
||||
return fromF(_mm_sub_ps(left.vf, right.vf));
|
||||
}
|
||||
|
||||
inline v128 v128::subfd(const v128& left, const v128& right)
|
||||
{
|
||||
return fromD(_mm_sub_pd(left.vd, right.vd));
|
||||
}
|
||||
|
||||
inline v128 v128::maxu8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_max_epu8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::minu8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_min_epu8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::eq8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_cmpeq_epi8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::eq16(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_cmpeq_epi16(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::eq32(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_cmpeq_epi32(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 v128::eq32f(const v128& left, const v128& right)
|
||||
{
|
||||
return fromF(_mm_cmpeq_ps(left.vf, right.vf));
|
||||
}
|
||||
|
||||
inline v128 v128::fma32f(v128 a, const v128& b, const v128& c)
|
||||
{
|
||||
#ifndef __FMA__
|
||||
if (v128_use_fma) [[likely]]
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
a.vf = _mm_fmadd_ps(a.vf, b.vf, c.vf);
|
||||
return a;
|
||||
#else
|
||||
__asm__("vfmadd213ps %[c], %[b], %[a]"
|
||||
: [a] "+x" (a.vf)
|
||||
: [b] "x" (b.vf)
|
||||
, [c] "x" (c.vf));
|
||||
return a;
|
||||
#endif
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
a._f[i] = std::fmaf(a._f[i], b._f[i], c._f[i]);
|
||||
}
|
||||
return a;
|
||||
#else
|
||||
a.vf = _mm_fmadd_ps(a.vf, b.vf, c.vf);
|
||||
return a;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool v128::operator==(const v128& right) const
|
||||
{
|
||||
return _mm_movemask_epi8(v128::eq32(*this, right).vi) == 0xffff;
|
||||
}
|
||||
|
||||
// result = (~left) & (right)
|
||||
inline v128 v128::andnot(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_andnot_si128(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 operator|(const v128& left, const v128& right)
|
||||
{
|
||||
return v128::fromV(_mm_or_si128(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 operator&(const v128& left, const v128& right)
|
||||
{
|
||||
return v128::fromV(_mm_and_si128(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 operator^(const v128& left, const v128& right)
|
||||
{
|
||||
return v128::fromV(_mm_xor_si128(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline v128 operator~(const v128& other)
|
||||
{
|
||||
return other ^ v128::from32p(umax); // XOR with ones
|
||||
}
|
@ -27,7 +27,7 @@
|
||||
#ifdef __NR_memfd_create
|
||||
#elif __x86_64__
|
||||
#define __NR_memfd_create 319
|
||||
#elif __aarch64__
|
||||
#elif ARCH_ARM64
|
||||
#define __NR_memfd_create 279
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user