diff --git a/Utilities/Atomic.h b/Utilities/Atomic.h index 5edb45f2e0..1ea3e4f597 100644 --- a/Utilities/Atomic.h +++ b/Utilities/Atomic.h @@ -1023,7 +1023,7 @@ public: } // Conditionally decrement - simple_type fetch_dec_sat(simple_type greater_than = std::numeric_limits::min(), simple_type amount = 1) + bool try_dec(simple_type greater_than = std::numeric_limits::min()) { type _new, old = atomic_storage::load(m_data); @@ -1031,17 +1031,39 @@ public: { _new = old; - if (_new <= greater_than) + if (!(_new > greater_than)) { - // Early exit - return old; + return false; } - _new -= amount; + _new -= 1; if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) { - return old; + return true; + } + } + } + + // Conditionally increment + bool try_inc(simple_type less_than = std::numeric_limits::max()) + { + type _new, old = atomic_storage::load(m_data); + + while (true) + { + _new = old; + + if (!(_new < less_than)) + { + return false; + } + + _new += 1; + + if (LIKELY(atomic_storage::compare_exchange(m_data, old, _new))) + { + return true; } } } diff --git a/Utilities/GDBDebugServer.h b/Utilities/GDBDebugServer.h index d18861610b..75d0bd684a 100644 --- a/Utilities/GDBDebugServer.h +++ b/Utilities/GDBDebugServer.h @@ -40,7 +40,7 @@ public: const u64 ALL_THREADS = 0xffffffffffffffff; const u64 ANY_THREAD = 0; -class GDBDebugServer : public old_thread +class GDBDebugServer { socket_t server_socket; socket_t client_socket; @@ -112,29 +112,16 @@ class GDBDebugServer : public old_thread bool cmd_set_breakpoint(gdb_cmd& cmd); bool cmd_remove_breakpoint(gdb_cmd& cmd); -protected: - void on_task() override final; - void on_exit() override final; - public: bool from_breakpoint = true; bool stop = false; bool paused = false; u64 pausedBy; - virtual std::string get_name() const; - virtual void on_stop() override final; + void operator()(); void pause_from(cpu_thread* t); }; extern u32 g_gdb_debugger_id; -template <> -struct id_manager::on_stop { - static inline void func(GDBDebugServer* ptr) - { - if (ptr) ptr->on_stop(); - } -}; - #endif diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 1b8be3b960..f70ce02915 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -1091,33 +1091,40 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) const auto cpu = get_current_cpu_thread(); - if (rsx::g_access_violation_handler) { bool handled = false; + try { handled = rsx::g_access_violation_handler(addr, is_writing); } - catch (std::runtime_error &e) + catch (const std::exception& e) { LOG_FATAL(RSX, "g_access_violation_handler(0x%x, %d): %s", addr, is_writing, e.what()); + if (cpu) { vm::temporary_unlock(*cpu); cpu->state += cpu_flag::dbg_pause; - cpu->test_state(); - return false; + + if (cpu->test_stopped()) + { + std::terminate(); + } } + + return false; } if (handled) { g_tls_fault_rsx++; - if (cpu) + if (cpu && cpu->test_stopped()) { - cpu->test_state(); + std::terminate(); } + return true; } } @@ -1160,7 +1167,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) // check if address is RawSPU MMIO register if (addr - RAW_SPU_BASE_ADDR < (6 * RAW_SPU_OFFSET) && (addr % RAW_SPU_OFFSET) >= RAW_SPU_PROB_OFFSET) { - auto thread = idm::get((addr - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET); + auto thread = idm::get>(spu_thread::find_raw_spu((addr - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET)); if (!thread) { @@ -1255,9 +1262,9 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) if (vm::check_addr(addr, std::max(1, d_size), vm::page_allocated | (is_writing ? vm::page_writable : vm::page_readable))) { - if (cpu) + if (cpu && cpu->test_stopped()) { - cpu->test_state(); + std::terminate(); } return true; @@ -1321,6 +1328,11 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) LOG_FATAL(MEMORY, "Access violation %s location 0x%x", is_writing ? "writing" : "reading", addr); cpu->state += cpu_flag::dbg_pause; cpu->check_state(); + + if (cpu->test_stopped()) + { + std::terminate(); + } } return true; @@ -1571,53 +1583,6 @@ thread_local DECLARE(thread_ctrl::g_tls_this_thread) = nullptr; DECLARE(thread_ctrl::g_native_core_layout) { native_core_arrangement::undefined }; -void thread_base::start(const std::shared_ptr& ctrl, task_stack task) -{ -#ifdef _WIN32 - using thread_result = uint; -#else - using thread_result = void*; -#endif - - // Thread entry point - const native_entry entry = [](void* arg) -> thread_result - { - // Recover shared_ptr from short-circuited thread_base object pointer - std::shared_ptr ctrl = static_cast(arg)->m_self; - - try - { - ctrl->initialize(); - task_stack{std::move(ctrl->m_task)}.invoke(); - } - catch (...) - { - // Capture exception - ctrl->finalize(std::current_exception()); - finalize(); - return 0; - } - - ctrl->finalize(nullptr); - finalize(); - return 0; - }; - - ctrl->m_self = ctrl; - ctrl->m_task = std::move(task); - -#ifdef _WIN32 - std::uintptr_t thread = _beginthreadex(nullptr, 0, entry, ctrl.get(), 0, nullptr); - verify("thread_ctrl::start" HERE), thread != 0; -#else - pthread_t thread; - verify("thread_ctrl::start" HERE), pthread_create(&thread, nullptr, entry, ctrl.get()) == 0; -#endif - - // TODO: this is unsafe and must be duplicated in thread_ctrl::initialize - ctrl->m_thread = (uintptr_t)thread; -} - void thread_base::start(native_entry entry) { #ifdef _WIN32 @@ -1679,7 +1644,7 @@ void thread_base::initialize() #endif } -std::shared_ptr thread_base::finalize(std::exception_ptr eptr) noexcept +bool thread_base::finalize(int) noexcept { // Report pending errors error_code::error_report(0, 0, 0, 0); @@ -1712,17 +1677,13 @@ std::shared_ptr thread_base::finalize(std::exception_ptr eptr) noex g_tls_fault_rsx, g_tls_fault_spu); - // Untangle circular reference, set exception - std::unique_lock lock(m_mutex); - - // Possibly last reference to the thread object - std::shared_ptr self = std::move(m_self); - m_state = thread_state::finished; - m_exception = eptr; + // Return true if need to delete thread object + const bool result = m_state.exchange(thread_state::finished) == thread_state::detached; // Signal waiting threads - lock.unlock(), m_jcv.notify_all(); - return self; + m_mutex.lock_unlock(); + m_jcv.notify_all(); + return result; } void thread_base::finalize() noexcept @@ -1741,8 +1702,6 @@ bool thread_ctrl::_wait_for(u64 usec) // Mutex is unlocked at the start and after the waiting if (u32 sig = _this->m_signal.load()) { - thread_ctrl::test(); - if (sig & 1) { _this->m_signal &= ~1; @@ -1761,11 +1720,6 @@ bool thread_ctrl::_wait_for(u64 usec) // Double-check the value if (u32 sig = _this->m_signal.load()) { - if (sig & 2 && _this->m_exception) - { - _this->_throw(); - } - if (sig & 1) { _this->m_signal &= ~1; @@ -1780,20 +1734,6 @@ bool thread_ctrl::_wait_for(u64 usec) return false; } -[[noreturn]] void thread_base::_throw() -{ - std::exception_ptr ex = std::exchange(m_exception, std::exception_ptr{}); - m_signal &= ~3; - m_mutex.unlock(); - std::rethrow_exception(std::move(ex)); -} - -void thread_base::_notify(cond_variable thread_base::* ptr) -{ - m_mutex.lock_unlock(); - (this->*ptr).notify_one(); -} - thread_base::thread_base(std::string_view name) : m_name(name) { @@ -1811,22 +1751,6 @@ thread_base::~thread_base() } } -void thread_base::set_exception(std::exception_ptr ptr) -{ - std::lock_guard lock(m_mutex); - m_exception = ptr; - - if (m_exception) - { - m_signal |= 2; - m_cond.notify_one(); - } - else - { - m_signal &= ~2; - } -} - void thread_base::join() const { if (m_state == thread_state::finished) @@ -1842,33 +1766,13 @@ void thread_base::join() const } } -void thread_base::detach() -{ - auto self = weak_from_this().lock(); - - if (!self) - { - LOG_FATAL(GENERAL, "Cannot detach thread '%s'", get_name()); - return; - } - - if (self->m_state.compare_and_swap_test(thread_state::created, thread_state::detached)) - { - std::lock_guard lock(m_mutex); - - if (m_state == thread_state::detached) - { - m_self = std::move(self); - } - } -} - void thread_base::notify() { if (!(m_signal & 1)) { m_signal |= 1; - _notify(&thread_base::m_cond); + m_mutex.lock_unlock(); + m_cond.notify_one(); } } @@ -1886,16 +1790,13 @@ u64 thread_base::get_cycles() { cycles = static_cast(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec; #endif - // Report 0 the first time this function is called - if (m_cycles == 0) + if (const u64 old_cycles = m_cycles.exchange(cycles)) { - m_cycles = cycles; - return 0; + return cycles - old_cycles; } - const auto diff_cycles = cycles - m_cycles; - m_cycles = cycles; - return diff_cycles; + // Report 0 the first time this function is called + return 0; } else { @@ -1903,23 +1804,6 @@ u64 thread_base::get_cycles() } } -void thread_ctrl::test() -{ - const auto _this = g_tls_this_thread; - - if (_this->m_signal & 2) - { - _this->m_mutex.lock(); - - if (_this->m_exception) - { - _this->_throw(); - } - - _this->m_mutex.unlock(); - } -} - void thread_ctrl::detect_cpu_layout() { if (!g_native_core_layout.compare_and_swap_test(native_core_arrangement::undefined, native_core_arrangement::generic)) @@ -2067,45 +1951,3 @@ void thread_ctrl::set_thread_affinity_mask(u16 mask) pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cs); #endif } - -old_thread::old_thread() -{ -} - -old_thread::~old_thread() -{ -} - -std::string old_thread::get_name() const -{ - return fmt::format("('%s') Unnamed Thread", typeid(*this).name()); -} - -void old_thread::start_thread(const std::shared_ptr& _this) -{ - // Ensure it's not called from the constructor and the correct object is passed - verify("old_thread::start_thread" HERE), _this.get() == this; - - // Run thread - thread_ctrl::spawn(m_thread, get_name(), [this, _this]() - { - try - { - LOG_TRACE(GENERAL, "Thread started"); - on_spawn(); - on_task(); - LOG_TRACE(GENERAL, "Thread ended"); - } - catch (const std::exception& e) - { - LOG_FATAL(GENERAL, "%s thrown: %s", typeid(e).name(), e.what()); - Emu.Pause(); - } - - on_exit(); - }); -} - -task_stack::task_base::~task_base() -{ -} diff --git a/Utilities/Thread.h b/Utilities/Thread.h index dca858baed..a7c400e8ef 100644 --- a/Utilities/Thread.h +++ b/Utilities/Thread.h @@ -3,7 +3,6 @@ #include "types.h" #include "Atomic.h" -#include #include #include #include @@ -38,8 +37,8 @@ enum class thread_class : u32 enum class thread_state { created, // Initial state - detached, // Set if the thread has been detached successfully (only possible via shared_ptr) - aborting, // Set if the thread has been joined in destructor (mutually exclusive with detached) + detached, // The thread has been detached to destroy its own named_thread object (can be dangerously misused) + aborting, // The thread has been joined in the destructor or explicitly aborted (mutually exclusive with detached) finished // Final state, always set at the end of thread execution }; @@ -89,84 +88,15 @@ struct thread_on_abort : std::bool_constant {}; template struct thread_on_abort&>().on_abort())> : std::bool_constant {}; -// Detect on_cleanup() static function (should return void) +// Detect on_cleanup() static member function (should return void) (in C++20 can use destroying delete instead) template struct thread_on_cleanup : std::bool_constant {}; template struct thread_on_cleanup::on_cleanup(std::declval*>()))> : std::bool_constant {}; -// Simple list of void() functors -class task_stack -{ - struct task_base - { - std::unique_ptr next; - - virtual ~task_base(); - - virtual void invoke() - { - if (next) - { - next->invoke(); - } - } - }; - - template - struct task_type final : task_base - { - std::remove_reference_t func; - - task_type(F&& func) - : func(std::forward(func)) - { - } - - void invoke() final override - { - func(); - task_base::invoke(); - } - }; - - std::unique_ptr m_stack; - -public: - task_stack() = default; - - template - task_stack(F&& func) - : m_stack(new task_type(std::forward(func))) - { - } - - void push(task_stack stack) - { - auto _top = stack.m_stack.release(); - auto _next = m_stack.release(); - m_stack.reset(_top); - while (UNLIKELY(_top->next)) _top = _top->next.get(); - _top->next.reset(_next); - } - - void reset() - { - m_stack.reset(); - } - - void invoke() const - { - if (m_stack) - { - m_stack->invoke(); - } - } -}; - -// Thread base class (TODO: remove shared_ptr, make private base) -class thread_base : public std::enable_shared_from_this +// Thread base class +class thread_base { // Native thread entry point function type #ifdef _WIN32 @@ -175,9 +105,6 @@ class thread_base : public std::enable_shared_from_this using native_entry = void*(*)(void* arg); #endif - // Self pointer for detached thread - std::shared_ptr m_self; - // Thread handle (platform-specific) atomic_t m_thread{0}; @@ -196,71 +123,41 @@ class thread_base : public std::enable_shared_from_this // Thread state atomic_t m_state = thread_state::created; - // Remotely set or caught exception - std::exception_ptr m_exception; - - // Thread initial task - task_stack m_task; - // Thread name lf_value m_name; - // CPU cycles thread has run for - u64 m_cycles{0}; + // + atomic_t m_cycles = 0; // Start thread - static void start(const std::shared_ptr&, task_stack); - void start(native_entry); // Called at the thread start void initialize(); - // Called at the thread end, returns moved m_self (may be null) - std::shared_ptr finalize(std::exception_ptr) noexcept; + // Called at the thread end, returns true if needs destruction + bool finalize(int) noexcept; + // Cleanup after possibly deleting the thread instance static void finalize() noexcept; - // Internal throwing function. Mutex must be locked and will be unlocked. - [[noreturn]] void _throw(); - - // Internal notification function - void _notify(cond_variable thread_base::*); - friend class thread_ctrl; template friend class named_thread; -public: +protected: thread_base(std::string_view name); ~thread_base(); - // Get thread name - const std::string& get_name() const - { - return m_name; - } - - // Set thread name (not recommended) - void set_name(std::string_view name) - { - m_name.assign(name); - } - +public: // Get CPU cycles since last time this function was called. First call returns 0. u64 get_cycles(); - // Set exception - void set_exception(std::exception_ptr ptr); - // Wait for the thread (it does NOT change thread state, and can be called from multiple threads) void join() const; - // Make thread to manage a shared_ptr of itself - void detach(); - // Notify the thread void notify(); }; @@ -306,25 +203,37 @@ public: static_cast(thread).m_name.assign(name); } + template + static u64 get_cycles(named_thread& thread) + { + return static_cast(thread).get_cycles(); + } + + template + static void notify(named_thread& thread) + { + static_cast(thread).notify(); + } + // Read current state static inline thread_state state() { return g_tls_this_thread->m_state; } - // Wait once with timeout. Abortable, may throw. May spuriously return false. + // Wait once with timeout. May spuriously return false. static inline bool wait_for(u64 usec) { return _wait_for(usec); } - // Wait. Abortable, may throw. + // Wait. static inline void wait() { _wait_for(-1); } - // Wait until pred(). Abortable, may throw. + // Wait until pred(). template > static inline RT wait(F&& pred) { @@ -339,42 +248,12 @@ public: } } - // Wait eternally until aborted. - [[noreturn]] static inline void eternalize() - { - while (true) - { - _wait_for(-1); - } - } - - // Test exception (may throw). - static void test(); - // Get current thread (may be nullptr) static thread_base* get_current() { return g_tls_this_thread; } - // Create detached named thread - template - static inline void spawn(N&& name, F&& func) - { - auto out = std::make_shared(std::forward(name)); - - thread_base::start(out, std::forward(func)); - } - - // Named thread factory - template - static inline void spawn(std::shared_ptr& out, N&& name, F&& func) - { - out = std::make_shared(std::forward(name)); - - thread_base::start(out, std::forward(func)); - } - // Detect layout static void detect_cpu_layout(); @@ -387,22 +266,17 @@ public: // Sets the preferred affinity mask for this thread static void set_thread_affinity_mask(u16 mask); + // Spawn a detached named thread template - static inline std::shared_ptr> make_shared(std::string_view name, F&& lambda) + static void spawn(std::string_view name, F&& func) { - return std::make_shared>(name, std::forward(lambda)); - } - - template - static inline std::shared_ptr> make_shared(std::string_view name, Args&&... args) - { - return std::make_shared>(name, std::forward(args)...); + new named_thread(thread_state::detached, name, std::forward(func)); } }; // Derived from the callable object Context, possibly a lambda template -class named_thread final : public Context, result_storage_t, public thread_base +class named_thread final : public Context, result_storage_t, thread_base { using result = result_storage_t; using thread = thread_base; @@ -414,7 +288,22 @@ class named_thread final : public Context, result_storage_t, public thr static inline void* entry_point(void* arg) try #endif { - const auto maybe_last_ptr = static_cast(static_cast(arg))->entry_point(); + const auto _this = static_cast(static_cast(arg)); + + // Perform self-cleanup if necessary + if (_this->entry_point()) + { + // Call on_cleanup() static member function if it's available + if constexpr (thread_on_cleanup()) + { + Context::on_cleanup(_this); + } + else + { + delete _this; + } + } + thread::finalize(); return 0; } @@ -423,7 +312,7 @@ class named_thread final : public Context, result_storage_t, public thr catch_all_exceptions(); } - std::shared_ptr entry_point() + bool entry_point() { thread::initialize(); @@ -438,7 +327,16 @@ class named_thread final : public Context, result_storage_t, public thr new (result::get()) typename result::type(Context::operator()()); } - return thread::finalize(nullptr); + return thread::finalize(0); + } + + // Detached thread constructor + named_thread(thread_state s, std::string_view name, Context&& f) + : Context(std::forward(f)) + , thread(name) + { + thread::m_state.raw() = s; + thread::start(&named_thread::entry_point); } friend class thread_ctrl; @@ -493,21 +391,23 @@ public: return thread::m_state.load(); } - // Try to set thread_state::aborting + // Try to abort/detach named_thread& operator=(thread_state s) { - if (s != thread_state::aborting) + if (s != thread_state::aborting && s != thread_state::detached) { ASSUME(0); } - // Notify thread if not detached or terminated - if (thread::m_state.compare_and_swap_test(thread_state::created, thread_state::aborting)) + if (thread::m_state.compare_and_swap_test(thread_state::created, s)) { - // Call on_abort() method if it's available - if constexpr (thread_on_abort()) + if (s == thread_state::aborting) { - Context::on_abort(); + // Call on_abort() method if it's available + if constexpr (thread_on_abort()) + { + Context::on_abort(); + } } thread::notify(); @@ -528,63 +428,3 @@ public: } } }; - -// Old named_thread -class old_thread -{ - // Pointer to managed resource (shared with actual thread) - std::shared_ptr m_thread; - -public: - old_thread(); - - virtual ~old_thread(); - - old_thread(const old_thread&) = delete; - - old_thread& operator=(const old_thread&) = delete; - - // Get thread name - virtual std::string get_name() const; - -protected: - // Start thread (cannot be called from the constructor: should throw in such case) - void start_thread(const std::shared_ptr& _this); - - // Thread task (called in the thread) - virtual void on_task() = 0; - - // Thread finalization (called after on_task) - virtual void on_exit() {} - - // Called once upon thread spawn within the thread's own context - virtual void on_spawn() {} - -public: - // ID initialization - virtual void on_init(const std::shared_ptr& _this) - { - return start_thread(_this); - } - - // ID finalization - virtual void on_stop() - { - m_thread->join(); - } - - thread_base* get() const - { - return m_thread.get(); - } - - void join() const - { - return m_thread->join(); - } - - void notify() const - { - return m_thread->notify(); - } -}; diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index 0694c32dab..c68249aff0 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -21,7 +21,7 @@ bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept verify(HERE), rc == WAIT_TIMEOUT; // Retire - while (!m_value.fetch_dec_sat()) + while (!m_value.try_dec()) { timeout.QuadPart = 0; diff --git a/Utilities/sema.h b/Utilities/sema.h index 7b7d97c990..755bccf000 100644 --- a/Utilities/sema.h +++ b/Utilities/sema.h @@ -34,7 +34,7 @@ protected: bool try_wait() { - return m_value.fetch_dec_sat(0) > 0; + return m_value.try_dec(0); } void post(s32 _max) diff --git a/Utilities/typemap.h b/Utilities/typemap.h index 4034b19e6e..d12a4074f7 100644 --- a/Utilities/typemap.h +++ b/Utilities/typemap.h @@ -774,10 +774,8 @@ namespace utils // If max_count > 1 only id_new is supported static_assert(std::is_same_v && !std::is_const_v>); - // Try to acquire the semaphore (conditional increment) - const uint old_sema = head->m_sema.load(); - - if (UNLIKELY(old_sema > last || !head->m_sema.compare_and_swap_test(old_sema, old_sema + 1))) + // Try to acquire the semaphore + if (UNLIKELY(!head->m_sema.try_inc(last + 1))) { block = nullptr; } @@ -1225,7 +1223,7 @@ namespace utils template std::shared_lock<::notifier> get_free_notifier() const { - return std::shared_lock{get_head()->m_free_notifier}; + return std::shared_lock(get_head()->m_free_notifier, std::try_to_lock); } }; } // namespace utils diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 78fcac747d..c6ba7d3de7 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -4,11 +4,8 @@ #include "CPUThread.h" #include "Emu/IdManager.h" #include "Utilities/GDBDebugServer.h" -#include - -#ifdef _WIN32 -#include -#endif +#include "Emu/Cell/PPUThread.h" +#include "Emu/Cell/SPUThread.h" DECLARE(cpu_thread::g_threads_created){0}; DECLARE(cpu_thread::g_threads_deleted){0}; @@ -45,12 +42,22 @@ void fmt_class_string>::format(std::string& out, u64 arg) thread_local cpu_thread* g_tls_current_cpu_thread = nullptr; -void cpu_thread::on_task() +void cpu_thread::operator()() { state -= cpu_flag::exit; g_tls_current_cpu_thread = this; + if (g_cfg.core.thread_scheduler_enabled) + { + thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(id_type() == 1 ? thread_class::ppu : thread_class::spu)); + } + + if (g_cfg.core.lower_spu_priority && id_type() == 2) + { + thread_ctrl::set_native_priority(-1); + } + // Check thread status while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop))) { @@ -65,10 +72,12 @@ void cpu_thread::on_task() { state += _s; } - catch (const std::exception&) + catch (const std::exception& e) { + LOG_FATAL(GENERAL, "%s thrown: %s", typeid(e).name(), e.what()); LOG_NOTICE(GENERAL, "\n%s", dump()); - throw; + Emu.Pause(); + break; } state -= cpu_flag::ret; @@ -79,10 +88,9 @@ void cpu_thread::on_task() } } -void cpu_thread::on_stop() +void cpu_thread::on_abort() { state += cpu_flag::exit; - notify(); } cpu_thread::~cpu_thread() @@ -132,7 +140,7 @@ bool cpu_thread::check_state() cpu_sleep_called = false; } - if (!(state & cpu_state_pause)) + if (!is_paused()) { if (cpu_flag_memory) { @@ -167,21 +175,20 @@ bool cpu_thread::check_state() return false; } -void cpu_thread::test_state() +void cpu_thread::notify() { - if (UNLIKELY(state)) + if (id_type() == 1) { - if (check_state()) - { - throw cpu_flag::ret; - } + thread_ctrl::notify(*static_cast*>(this)); + } + else if (id_type() == 2) + { + thread_ctrl::notify(*static_cast*>(this)); + } + else + { + fmt::throw_exception("Invalid cpu_thread type"); } -} - -void cpu_thread::run() -{ - state -= cpu_flag::stop; - notify(); } std::string cpu_thread::dump() const diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 8272be5d45..5d270eef70 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -21,32 +21,53 @@ enum class cpu_flag : u32 __bitset_enum_max }; -// Flag set for pause state -constexpr bs_t cpu_state_pause = cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause; - -class cpu_thread : public old_thread +class cpu_thread { - void on_task() override final; + // PPU cache backward compatibility hack + char dummy[sizeof(std::shared_ptr)]; + +protected: + cpu_thread(u32 id); public: - virtual void on_stop() override; - virtual ~cpu_thread() override; + virtual ~cpu_thread(); + void operator()(); + void on_abort(); + // Self identifier const u32 id; - cpu_thread(u32 id); - // Public thread state atomic_bs_t state{+cpu_flag::stop}; // Process thread state, return true if the checker must return bool check_state(); - // Process thread state - void test_state(); + // Process thread state (pause) + [[nodiscard]] bool test_stopped() + { + if (UNLIKELY(state)) + { + if (check_state()) + { + return true; + } + } - // Run thread - void run(); + return false; + } + + // Test stopped state + bool is_stopped() + { + return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::dbg_global_stop)); + } + + // Test paused state + bool is_paused() + { + return !!(state & (cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause)); + } // Check thread type u32 id_type() @@ -54,10 +75,16 @@ public: return id >> 24; } + // Upcast and notify + void notify(); + // Thread stats for external observation static atomic_t g_threads_created, g_threads_deleted; - // Print CPU state + // Get thread name + virtual std::string get_name() const = 0; + + // Get CPU state dump virtual std::string dump() const; // Thread entry point function @@ -79,3 +106,6 @@ inline cpu_thread* get_current_cpu_thread() noexcept return g_tls_current_cpu_thread; } + +class ppu_thread; +class spu_thread; diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.cpp b/rpcs3/Emu/Cell/Modules/cellAdec.cpp index b74c31c4ec..8c6ea9054d 100644 --- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp @@ -71,7 +71,7 @@ public: bool use_ats_headers; AudioDecoder(s32 type, u32 addr, u32 size, vm::ptr func, u32 arg) - : ppu_thread("HLE Audio Decoder") + : ppu_thread({}, "", 0) , type(type) , memAddr(addr) , memSize(size) @@ -159,7 +159,7 @@ public: } } - virtual void cpu_task() override + void non_task() { while (true) { @@ -564,13 +564,7 @@ s32 cellAdecOpen(vm::ptr type, vm::ptr res, vm:: return CELL_ADEC_ERROR_ARG; } - auto&& adec = idm::make_ptr(type->audioCodecType, res->startAddr, res->totalMemSize, cb->cbFunc, cb->cbArg); - - *handle = adec->id; - - adec->run(); - - return CELL_OK; + fmt::throw_exception("cellAdec disabled, use LLE."); } s32 cellAdecOpenEx(vm::ptr type, vm::ptr res, vm::ptr cb, vm::ptr handle) @@ -582,13 +576,7 @@ s32 cellAdecOpenEx(vm::ptr type, vm::ptr res, return CELL_ADEC_ERROR_ARG; } - auto&& adec = idm::make_ptr(type->audioCodecType, res->startAddr, res->totalMemSize, cb->cbFunc, cb->cbArg); - - *handle = adec->id; - - adec->run(); - - return CELL_OK; + fmt::throw_exception("cellAdec disabled, use LLE."); } s32 cellAdecOpenExt(vm::ptr type, vm::ptr res, vm::ptr cb, vm::ptr handle) diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.cpp b/rpcs3/Emu/Cell/Modules/cellAudio.cpp index c5da498547..e38362a565 100644 --- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp @@ -349,7 +349,7 @@ error_code cellAudioInit() return CELL_OK; } -error_code cellAudioQuit() +error_code cellAudioQuit(ppu_thread& ppu) { cellAudio.warning("cellAudioQuit()"); @@ -367,6 +367,11 @@ error_code cellAudioQuit() while (true) { + if (ppu.is_stopped()) + { + return 0; + } + thread_ctrl::wait_for(1000); auto g_audio = g_idm->lock>(0); diff --git a/rpcs3/Emu/Cell/Modules/cellDmux.cpp b/rpcs3/Emu/Cell/Modules/cellDmux.cpp index c0124bd90c..9aaa7cfd52 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmux.cpp +++ b/rpcs3/Emu/Cell/Modules/cellDmux.cpp @@ -194,7 +194,7 @@ public: atomic_t is_working; Demuxer(u32 addr, u32 size, vm::ptr func, u32 arg) - : ppu_thread("HLE Demuxer") + : ppu_thread({}, "", 0) , is_finished(false) , is_closed(false) , is_running(false) @@ -206,7 +206,7 @@ public: { } - virtual void cpu_task() override + void non_task() { DemuxerTask task; DemuxerStream stream = {}; @@ -987,13 +987,7 @@ s32 cellDmuxOpen(vm::cptr type, vm::cptr res, vm } // TODO: check demuxerResource and demuxerCb arguments - auto&& dmux = idm::make_ptr(res->memAddr, res->memSize, cb->cbMsgFunc, cb->cbArg); - - *handle = dmux->id; - - dmux->run(); - - return CELL_OK; + fmt::throw_exception("cellDmux disabled, use LLE."); } s32 cellDmuxOpenEx(vm::cptr type, vm::cptr resEx, vm::cptr cb, vm::ptr handle) @@ -1006,13 +1000,7 @@ s32 cellDmuxOpenEx(vm::cptr type, vm::cptr res } // TODO: check demuxerResourceEx and demuxerCb arguments - auto&& dmux = idm::make_ptr(resEx->memAddr, resEx->memSize, cb->cbMsgFunc, cb->cbArg); - - *handle = dmux->id; - - dmux->run(); - - return CELL_OK; + fmt::throw_exception("cellDmux disabled, use LLE."); } s32 cellDmuxOpenExt(vm::cptr type, vm::cptr resEx, vm::cptr cb, vm::ptr handle) @@ -1032,13 +1020,7 @@ s32 cellDmuxOpen2(vm::cptr type2, vm::cptr res } // TODO: check demuxerType2, demuxerResource2 and demuxerCb arguments - auto&& dmux = idm::make_ptr(res2->memAddr, res2->memSize, cb->cbMsgFunc, cb->cbArg); - - *handle = dmux->id; - - dmux->run(); - - return CELL_OK; + fmt::throw_exception("cellDmux disabled, use LLE."); } s32 cellDmuxClose(u32 handle) diff --git a/rpcs3/Emu/Cell/Modules/cellFs.cpp b/rpcs3/Emu/Cell/Modules/cellFs.cpp index efddcbd76f..1931b3940c 100644 --- a/rpcs3/Emu/Cell/Modules/cellFs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellFs.cpp @@ -869,7 +869,7 @@ struct fs_aio_thread : ppu_thread { using ppu_thread::ppu_thread; - virtual void cpu_task() override + void non_task() { while (cmd64 cmd = cmd_wait()) { @@ -920,11 +920,7 @@ s32 cellFsAioInit(vm::cptr mount_point) // TODO: create AIO thread (if not exists) for specified mount point const auto m = fxm::make(); - if (m) - { - m->thread = idm::make_ptr("FS AIO Thread", 500); - m->thread->run(); - } + fmt::throw_exception("cellFsAio disabled, use LLE."); return CELL_OK; } @@ -961,8 +957,6 @@ s32 cellFsAioRead(vm::ptr aio, vm::ptr id, fs_aio_cb_t func) { aio, func }, }); - m->thread->notify(); - return CELL_OK; } @@ -987,8 +981,6 @@ s32 cellFsAioWrite(vm::ptr aio, vm::ptr id, fs_aio_cb_t func) { aio, func }, }); - m->thread->notify(); - return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index 0bf640c4d4..7d30c3a983 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -6,6 +6,7 @@ #include "Emu/Memory/vm.h" #include "Emu/RSX/GSRender.h" #include "cellGcmSys.h" +#include "sysPrxForUser.h" #include @@ -346,7 +347,7 @@ void _cellGcmFunc15(vm::ptr context) u32 g_defaultCommandBufferBegin, g_defaultCommandBufferFragmentCount; // Called by cellGcmInit -s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSize, u32 ioAddress) +s32 _cellGcmInitBody(ppu_thread& ppu, vm::pptr context, u32 cmdSize, u32 ioSize, u32 ioAddress) { cellGcmSys.warning("_cellGcmInitBody(context=**0x%x, cmdSize=0x%x, ioSize=0x%x, ioAddress=0x%x)", context, cmdSize, ioSize, ioAddress); @@ -429,8 +430,10 @@ s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSi ctrl.get = 0; ctrl.ref = 0; // Set later to -1 at RSX initialization - render->intr_thread = idm::make_ptr("_gcm_intr_thread", 1, 0x4000); - render->intr_thread->run(); + vm::var _tid; + vm::var _name = vm::make_str("_gcm_intr_thread"); + ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 128, 0, 1, 0x4000, 0, +_name); + render->intr_thread = idm::get>(*_tid); render->main_mem_addr = 0; render->isHLE = true; render->label_addr = m_config->gcm_info.label_addr; @@ -1380,7 +1383,11 @@ s32 cellGcmCallback(ppu_thread& ppu, vm::ptr context, u32 co if (isInCommandBufferExcept(getPos, newCommandBuffer.first, newCommandBuffer.second)) break; - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } + busy_wait(); } diff --git a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp index 302789c85d..183a508247 100644 --- a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp +++ b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp @@ -242,7 +242,7 @@ s32 cellMsgDialogClose(f32 delay) { if (auto dlg = manager->get()) { - thread_ctrl::make_shared("cellMsgDialogClose() Thread", [=] + thread_ctrl::spawn("cellMsgDialogClose() Thread", [=] { while (get_system_time() < wait_until) { @@ -256,7 +256,7 @@ s32 cellMsgDialogClose(f32 delay) } dlg->close(); - })->detach(); + }); return CELL_OK; } @@ -269,7 +269,7 @@ s32 cellMsgDialogClose(f32 delay) return CELL_MSGDIALOG_ERROR_DIALOG_NOT_OPENED; } - thread_ctrl::make_shared("cellMsgDialogClose() Thread", [=]() + thread_ctrl::spawn("cellMsgDialogClose() Thread", [=]() { while (dlg->state == MsgDialogState::Open && get_system_time() < wait_until) { @@ -279,7 +279,7 @@ s32 cellMsgDialogClose(f32 delay) } dlg->on_close(CELL_MSGDIALOG_BUTTON_NONE); - })->detach(); + }); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 98756e8f96..a7245ec909 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -36,7 +36,7 @@ struct cell_error_t // Function prototypes //---------------------------------------------------------------------------- -bool spursKernelEntry(SPUThread& spu); +bool spursKernelEntry(spu_thread& spu); // SPURS Internals namespace _spurs @@ -599,18 +599,18 @@ s32 _spurs::create_handler(vm::ptr spurs, u32 ppuPriority) { using ppu_thread::ppu_thread; - virtual void cpu_task() override + void non_task() { BIND_FUNC(_spurs::handler_entry)(*this); } }; - auto&& eht = idm::make_ptr(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr0", ppuPriority, 0x4000); + // auto eht = idm::make_ptr(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr0", ppuPriority, 0x4000); - spurs->ppu0 = eht->id; + // spurs->ppu0 = eht->id; - eht->gpr[3] = spurs.addr(); - eht->run(); + // eht->gpr[3] = spurs.addr(); + // eht->run(); return CELL_OK; } @@ -796,15 +796,15 @@ s32 _spurs::create_event_helper(ppu_thread& ppu, vm::ptr spurs, u32 p { using ppu_thread::ppu_thread; - virtual void cpu_task() override + void non_task() { BIND_FUNC(_spurs::event_helper_entry)(*this); } }; - auto&& eht = idm::make_ptr(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr1", ppuPriority, 0x8000); + //auto eht = idm::make_ptr(std::string(spurs->prefix, spurs->prefixSize) + "SpursHdlr1", ppuPriority, 0x8000); - if (!eht) + //if (!eht) { sys_event_port_disconnect(spurs->eventPort); sys_event_port_destroy(spurs->eventPort); @@ -818,10 +818,10 @@ s32 _spurs::create_event_helper(ppu_thread& ppu, vm::ptr spurs, u32 p return CELL_SPURS_CORE_ERROR_STAT; } - eht->gpr[3] = spurs.addr(); - eht->run(); + // eht->gpr[3] = spurs.addr(); + // eht->run(); - spurs->ppu1 = eht->id; + // spurs->ppu1 = eht->id; return CELL_OK; } @@ -1118,7 +1118,7 @@ s32 _spurs::initialize(ppu_thread& ppu, vm::ptr spurs, u32 revision, } // entry point cannot be initialized immediately because SPU LS will be rewritten by sys_spu_thread_group_start() - //idm::get(spurs->spus[num])->custom_task = [entry = spurs->spuImg.entry_point](SPUThread& spu) + //idm::get>(spurs->spus[num])->custom_task = [entry = spurs->spuImg.entry_point](spu_thread& spu) { // Disabled //spu.RegisterHleFunction(entry, spursKernelEntry); diff --git a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp index b4a1a6cf86..133de86a44 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp @@ -26,57 +26,57 @@ extern logs::channel cellSpurs; // SPURS utility functions // static void cellSpursModulePutTrace(CellSpursTracePacket* packet, u32 dmaTagId); -static u32 cellSpursModulePollStatus(SPUThread& spu, u32* status); -static void cellSpursModuleExit(SPUThread& spu); +static u32 cellSpursModulePollStatus(spu_thread& spu, u32* status); +static void cellSpursModuleExit(spu_thread& spu); -static bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); -static u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask); -static u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll = true); -static void spursHalt(SPUThread& spu); +static bool spursDma(spu_thread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); +static u32 spursDmaGetCompletionStatus(spu_thread& spu, u32 tagMask); +static u32 spursDmaWaitForCompletion(spu_thread& spu, u32 tagMask, bool waitForAll = true); +static void spursHalt(spu_thread& spu); // // SPURS kernel functions // -static bool spursKernel1SelectWorkload(SPUThread& spu); -static bool spursKernel2SelectWorkload(SPUThread& spu); -static void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus); -static bool spursKernelWorkloadExit(SPUThread& spu); -bool spursKernelEntry(SPUThread& spu); +static bool spursKernel1SelectWorkload(spu_thread& spu); +static bool spursKernel2SelectWorkload(spu_thread& spu); +static void spursKernelDispatchWorkload(spu_thread& spu, u64 widAndPollStatus); +static bool spursKernelWorkloadExit(spu_thread& spu); +bool spursKernelEntry(spu_thread& spu); // // SPURS system workload functions // -static bool spursSysServiceEntry(SPUThread& spu); +static bool spursSysServiceEntry(spu_thread& spu); // TODO: Exit -static void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt); -static void spursSysServiceMain(SPUThread& spu, u32 pollStatus); -static void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt); -static void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt); +static void spursSysServiceIdleHandler(spu_thread& spu, SpursKernelContext* ctxt); +static void spursSysServiceMain(spu_thread& spu, u32 pollStatus); +static void spursSysServiceProcessRequests(spu_thread& spu, SpursKernelContext* ctxt); +static void spursSysServiceActivateWorkload(spu_thread& spu, SpursKernelContext* ctxt); // TODO: Deactivate workload -static void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet); -static void spursSysServiceTraceSaveCount(SPUThread& spu, SpursKernelContext* ctxt); -static void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify); +static void spursSysServiceUpdateShutdownCompletionEvents(spu_thread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet); +static void spursSysServiceTraceSaveCount(spu_thread& spu, SpursKernelContext* ctxt); +static void spursSysServiceTraceUpdate(spu_thread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify); // TODO: Deactivate trace // TODO: System workload entry -static void spursSysServiceCleanupAfterSystemWorkload(SPUThread& spu, SpursKernelContext* ctxt); +static void spursSysServiceCleanupAfterSystemWorkload(spu_thread& spu, SpursKernelContext* ctxt); // // SPURS taskset policy module functions // -static bool spursTasksetEntry(SPUThread& spu); -static bool spursTasksetSyscallEntry(SPUThread& spu); -static void spursTasksetResumeTask(SPUThread& spu); -static void spursTasksetStartTask(SPUThread& spu, CellSpursTaskArgument& taskArgs); -static s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* isWaiting); -static void spursTasksetProcessPollStatus(SPUThread& spu, u32 pollStatus); -static bool spursTasksetPollStatus(SPUThread& spu); -static void spursTasksetExit(SPUThread& spu); -static void spursTasksetOnTaskExit(SPUThread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args); -static s32 spursTasketSaveTaskContext(SPUThread& spu); -static void spursTasksetDispatch(SPUThread& spu); -static s32 spursTasksetProcessSyscall(SPUThread& spu, u32 syscallNum, u32 args); -static void spursTasksetInit(SPUThread& spu, u32 pollStatus); -static s32 spursTasksetLoadElf(SPUThread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments); +static bool spursTasksetEntry(spu_thread& spu); +static bool spursTasksetSyscallEntry(spu_thread& spu); +static void spursTasksetResumeTask(spu_thread& spu); +static void spursTasksetStartTask(spu_thread& spu, CellSpursTaskArgument& taskArgs); +static s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* isWaiting); +static void spursTasksetProcessPollStatus(spu_thread& spu, u32 pollStatus); +static bool spursTasksetPollStatus(spu_thread& spu); +static void spursTasksetExit(spu_thread& spu); +static void spursTasksetOnTaskExit(spu_thread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args); +static s32 spursTasketSaveTaskContext(spu_thread& spu); +static void spursTasksetDispatch(spu_thread& spu); +static s32 spursTasksetProcessSyscall(spu_thread& spu, u32 syscallNum, u32 args); +static void spursTasksetInit(spu_thread& spu, u32 pollStatus); +static s32 spursTasksetLoadElf(spu_thread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments); //---------------------------------------------------------------------------- // SPURS utility functions @@ -89,7 +89,7 @@ void cellSpursModulePutTrace(CellSpursTracePacket* packet, u32 dmaTagId) } // Check for execution right requests -u32 cellSpursModulePollStatus(SPUThread& spu, u32* status) +u32 cellSpursModulePollStatus(spu_thread& spu, u32* status) { auto ctxt = vm::_ptr(spu.offset + 0x100); @@ -114,7 +114,7 @@ u32 cellSpursModulePollStatus(SPUThread& spu, u32* status) } // Exit current workload -void cellSpursModuleExit(SPUThread& spu) +void cellSpursModuleExit(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x100); spu.pc = ctxt->exitToKernelAddr; @@ -122,7 +122,7 @@ void cellSpursModuleExit(SPUThread& spu) } // Execute a DMA operation -bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) +bool spursDma(spu_thread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) { spu.set_ch_value(MFC_LSA, lsa); spu.set_ch_value(MFC_EAH, (u32)(ea >> 32)); @@ -141,7 +141,7 @@ bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) } // Get the status of DMA operations -u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask) +u32 spursDmaGetCompletionStatus(spu_thread& spu, u32 tagMask) { spu.set_ch_value(MFC_WrTagMask, tagMask); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_IMMEDIATE); @@ -149,7 +149,7 @@ u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask) } // Wait for DMA operations to complete -u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll) +u32 spursDmaWaitForCompletion(spu_thread& spu, u32 tagMask, bool waitForAll) { spu.set_ch_value(MFC_WrTagMask, tagMask); spu.set_ch_value(MFC_WrTagUpdate, waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY); @@ -157,12 +157,12 @@ u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll) } // Halt the SPU -void spursHalt(SPUThread& spu) +void spursHalt(spu_thread& spu) { spu.halt(); } -void sys_spu_thread_exit(SPUThread& spu, s32 status) +void sys_spu_thread_exit(spu_thread& spu, s32 status) { // Cancel any pending status update requests spu.set_ch_value(MFC_WrTagUpdate, 0); @@ -178,7 +178,7 @@ void sys_spu_thread_exit(SPUThread& spu, s32 status) spu.stop_and_signal(0x102); } -void sys_spu_thread_group_exit(SPUThread& spu, s32 status) +void sys_spu_thread_group_exit(spu_thread& spu, s32 status) { // Cancel any pending status update requests spu.set_ch_value(MFC_WrTagUpdate, 0); @@ -194,7 +194,7 @@ void sys_spu_thread_group_exit(SPUThread& spu, s32 status) spu.stop_and_signal(0x101); } -s32 sys_spu_thread_send_event(SPUThread& spu, u8 spup, u32 data0, u32 data1) +s32 sys_spu_thread_send_event(spu_thread& spu, u8 spup, u32 data0, u32 data1) { if (spup > 0x3F) { @@ -211,7 +211,7 @@ s32 sys_spu_thread_send_event(SPUThread& spu, u8 spup, u32 data0, u32 data1) return static_cast(spu.get_ch_value(SPU_RdInMbox)); } -s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status) +s32 sys_spu_thread_switch_system_module(spu_thread& spu, u32 status) { if (spu.get_ch_count(SPU_RdInMbox)) { @@ -246,7 +246,7 @@ s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status) //---------------------------------------------------------------------------- // Select a workload to run -bool spursKernel1SelectWorkload(SPUThread& spu) +bool spursKernel1SelectWorkload(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x100); @@ -430,7 +430,7 @@ bool spursKernel1SelectWorkload(SPUThread& spu) } // Select a workload to run -bool spursKernel2SelectWorkload(SPUThread& spu) +bool spursKernel2SelectWorkload(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x100); @@ -603,7 +603,7 @@ bool spursKernel2SelectWorkload(SPUThread& spu) } // SPURS kernel dispatch workload -void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus) +void spursKernelDispatchWorkload(spu_thread& spu, u64 widAndPollStatus) { auto ctxt = vm::_ptr(spu.offset + 0x100); auto isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false; @@ -655,7 +655,7 @@ void spursKernelDispatchWorkload(SPUThread& spu, u64 widAndPollStatus) } // SPURS kernel workload exit -bool spursKernelWorkloadExit(SPUThread& spu) +bool spursKernelWorkloadExit(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x100); auto isKernel2 = ctxt->spurs->flags1 & SF1_32_WORKLOADS ? true : false; @@ -676,10 +676,8 @@ bool spursKernelWorkloadExit(SPUThread& spu) } // SPURS kernel entry point -bool spursKernelEntry(SPUThread& spu) +bool spursKernelEntry(spu_thread& spu) { - thread_ctrl::eternalize(); - auto ctxt = vm::_ptr(spu.offset + 0x100); memset(ctxt, 0, sizeof(SpursKernelContext)); @@ -728,7 +726,7 @@ bool spursKernelEntry(SPUThread& spu) //---------------------------------------------------------------------------- // Entry point of the system service -bool spursSysServiceEntry(SPUThread& spu) +bool spursSysServiceEntry(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + spu.gpr[3]._u32[3]); auto arg = spu.gpr[4]._u64[1]; @@ -757,7 +755,7 @@ bool spursSysServiceEntry(SPUThread& spu) } // Wait for an external event or exit the SPURS thread group if no workloads can be scheduled -void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt) +void spursSysServiceIdleHandler(spu_thread& spu, SpursKernelContext* ctxt) { bool shouldExit; @@ -865,7 +863,7 @@ void spursSysServiceIdleHandler(SPUThread& spu, SpursKernelContext* ctxt) } // Main function for the system service -void spursSysServiceMain(SPUThread& spu, u32 pollStatus) +void spursSysServiceMain(spu_thread& spu, u32 pollStatus) { auto ctxt = vm::_ptr(spu.offset + 0x100); @@ -970,7 +968,7 @@ void spursSysServiceMain(SPUThread& spu, u32 pollStatus) } // Process any requests -void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt) +void spursSysServiceProcessRequests(spu_thread& spu, SpursKernelContext* ctxt) { bool updateTrace = false; bool updateWorkload = false; @@ -1023,7 +1021,7 @@ void spursSysServiceProcessRequests(SPUThread& spu, SpursKernelContext* ctxt) } // Activate a workload -void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt) +void spursSysServiceActivateWorkload(spu_thread& spu, SpursKernelContext* ctxt) { auto spurs = vm::_ptr(spu.offset + 0x100); std::memcpy(vm::base(spu.offset + 0x30000), ctxt->spurs->wklInfo1, 0x200); @@ -1121,7 +1119,7 @@ void spursSysServiceActivateWorkload(SPUThread& spu, SpursKernelContext* ctxt) } // Update shutdown completion events -void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet) +void spursSysServiceUpdateShutdownCompletionEvents(spu_thread& spu, SpursKernelContext* ctxt, u32 wklShutdownBitSet) { // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed // workloads that have a shutdown completion hook registered @@ -1164,7 +1162,7 @@ void spursSysServiceUpdateShutdownCompletionEvents(SPUThread& spu, SpursKernelCo } // Update the trace count for this SPU -void spursSysServiceTraceSaveCount(SPUThread& spu, SpursKernelContext* ctxt) +void spursSysServiceTraceSaveCount(spu_thread& spu, SpursKernelContext* ctxt) { if (ctxt->traceBuffer) { @@ -1174,7 +1172,7 @@ void spursSysServiceTraceSaveCount(SPUThread& spu, SpursKernelContext* ctxt) } // Update trace control -void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify) +void spursSysServiceTraceUpdate(spu_thread& spu, SpursKernelContext* ctxt, u32 arg2, u32 arg3, u32 forceNotify) { bool notify; @@ -1238,7 +1236,7 @@ void spursSysServiceTraceUpdate(SPUThread& spu, SpursKernelContext* ctxt, u32 ar } // Restore state after executing the system workload -void spursSysServiceCleanupAfterSystemWorkload(SPUThread& spu, SpursKernelContext* ctxt) +void spursSysServiceCleanupAfterSystemWorkload(spu_thread& spu, SpursKernelContext* ctxt) { u8 wklId; @@ -1314,7 +1312,7 @@ enum SpursTasksetRequest }; // Taskset PM entry point -bool spursTasksetEntry(SPUThread& spu) +bool spursTasksetEntry(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x2700); auto kernelCtxt = vm::_ptr(spu.offset + spu.gpr[3]._u32[3]); @@ -1353,7 +1351,7 @@ bool spursTasksetEntry(SPUThread& spu) } // Entry point into the Taskset PM for task syscalls -bool spursTasksetSyscallEntry(SPUThread& spu) +bool spursTasksetSyscallEntry(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x2700); @@ -1384,7 +1382,7 @@ bool spursTasksetSyscallEntry(SPUThread& spu) } // Resume a task -void spursTasksetResumeTask(SPUThread& spu) +void spursTasksetResumeTask(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x2700); @@ -1400,7 +1398,7 @@ void spursTasksetResumeTask(SPUThread& spu) } // Start a task -void spursTasksetStartTask(SPUThread& spu, CellSpursTaskArgument& taskArgs) +void spursTasksetStartTask(spu_thread& spu, CellSpursTaskArgument& taskArgs) { auto ctxt = vm::_ptr(spu.offset + 0x2700); auto taskset = vm::_ptr(spu.offset + 0x2700); @@ -1418,7 +1416,7 @@ void spursTasksetStartTask(SPUThread& spu, CellSpursTaskArgument& taskArgs) } // Process a request and update the state of the taskset -s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* isWaiting) +s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* isWaiting) { auto kernelCtxt = vm::_ptr(spu.offset + 0x100); auto ctxt = vm::_ptr(spu.offset + 0x2700); @@ -1611,7 +1609,7 @@ s32 spursTasksetProcessRequest(SPUThread& spu, s32 request, u32* taskId, u32* is } // Process pollStatus received from the SPURS kernel -void spursTasksetProcessPollStatus(SPUThread& spu, u32 pollStatus) +void spursTasksetProcessPollStatus(spu_thread& spu, u32 pollStatus) { if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) { @@ -1620,7 +1618,7 @@ void spursTasksetProcessPollStatus(SPUThread& spu, u32 pollStatus) } // Check execution rights -bool spursTasksetPollStatus(SPUThread& spu) +bool spursTasksetPollStatus(spu_thread& spu) { u32 pollStatus; @@ -1634,7 +1632,7 @@ bool spursTasksetPollStatus(SPUThread& spu) } // Exit the Taskset PM -void spursTasksetExit(SPUThread& spu) +void spursTasksetExit(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x2700); @@ -1656,7 +1654,7 @@ void spursTasksetExit(SPUThread& spu) } // Invoked when a task exits -void spursTasksetOnTaskExit(SPUThread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args) +void spursTasksetOnTaskExit(spu_thread& spu, u64 addr, u32 taskId, s32 exitCode, u64 args) { auto ctxt = vm::_ptr(spu.offset + 0x2700); @@ -1670,7 +1668,7 @@ void spursTasksetOnTaskExit(SPUThread& spu, u64 addr, u32 taskId, s32 exitCode, } // Save the context of a task -s32 spursTasketSaveTaskContext(SPUThread& spu) +s32 spursTasketSaveTaskContext(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x2700); auto taskInfo = vm::_ptr(spu.offset + 0x2780); @@ -1733,7 +1731,7 @@ s32 spursTasketSaveTaskContext(SPUThread& spu) } // Taskset dispatcher -void spursTasksetDispatch(SPUThread& spu) +void spursTasksetDispatch(spu_thread& spu) { auto ctxt = vm::_ptr(spu.offset + 0x2700); auto taskset = vm::_ptr(spu.offset + 0x2700); @@ -1864,7 +1862,7 @@ void spursTasksetDispatch(SPUThread& spu) } // Process a syscall request -s32 spursTasksetProcessSyscall(SPUThread& spu, u32 syscallNum, u32 args) +s32 spursTasksetProcessSyscall(spu_thread& spu, u32 syscallNum, u32 args) { auto ctxt = vm::_ptr(spu.offset + 0x2700); auto taskset = vm::_ptr(spu.offset + 0x2700); @@ -1974,7 +1972,7 @@ s32 spursTasksetProcessSyscall(SPUThread& spu, u32 syscallNum, u32 args) } // Initialise the Taskset PM -void spursTasksetInit(SPUThread& spu, u32 pollStatus) +void spursTasksetInit(spu_thread& spu, u32 pollStatus) { auto ctxt = vm::_ptr(spu.offset + 0x2700); auto kernelCtxt = vm::_ptr(spu.offset + 0x100); @@ -1995,7 +1993,7 @@ void spursTasksetInit(SPUThread& spu, u32 pollStatus) } // Load an ELF -s32 spursTasksetLoadElf(SPUThread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) +s32 spursTasksetLoadElf(spu_thread& spu, u32* entryPoint, u32* lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) { if (elfAddr == 0 || (elfAddr & 0x0F) != 0) { diff --git a/rpcs3/Emu/Cell/Modules/cellSync.cpp b/rpcs3/Emu/Cell/Modules/cellSync.cpp index 50d5d6aca9..82aa1eb4f3 100644 --- a/rpcs3/Emu/Cell/Modules/cellSync.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSync.cpp @@ -79,7 +79,10 @@ error_code cellSyncMutexLock(ppu_thread& ppu, vm::ptr mutex) // Wait until rel value is equal to old acq value while (mutex->ctrl.load().rel != order) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } _mm_mfence(); @@ -169,7 +172,10 @@ error_code cellSyncBarrierNotify(ppu_thread& ppu, vm::ptr barri while (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_notify>()) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } return CELL_OK; @@ -217,7 +223,10 @@ error_code cellSyncBarrierWait(ppu_thread& ppu, vm::ptr barrier while (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>()) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } return CELL_OK; @@ -293,7 +302,10 @@ error_code cellSyncRwmRead(ppu_thread& ppu, vm::ptr rwm, vm::ptrctrl.atomic_op<&CellSyncRwm::try_read_begin>()) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } // copy data to buffer @@ -357,13 +369,19 @@ error_code cellSyncRwmWrite(ppu_thread& ppu, vm::ptr rwm, vm::cptr< // wait until `writers` is zero, set to 1 while (!rwm->ctrl.atomic_op<&CellSyncRwm::try_write_begin>()) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } // wait until `readers` is zero while (rwm->ctrl.load().readers != 0) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } // copy data from buffer @@ -462,7 +480,10 @@ error_code cellSyncQueuePush(ppu_thread& ppu, vm::ptr queue, vm:: return CellSyncQueue::try_push_begin(ctrl, depth, &position); })) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } // copy data from the buffer at the position @@ -530,7 +551,10 @@ error_code cellSyncQueuePop(ppu_thread& ppu, vm::ptr queue, vm::p return CellSyncQueue::try_pop_begin(ctrl, depth, &position); })) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } // copy data at the position to the buffer @@ -598,7 +622,10 @@ error_code cellSyncQueuePeek(ppu_thread& ppu, vm::ptr queue, vm:: return CellSyncQueue::try_peek_begin(ctrl, depth, &position); })) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } // copy data at the position to the buffer @@ -680,12 +707,18 @@ error_code cellSyncQueueClear(ppu_thread& ppu, vm::ptr queue) while (!queue->ctrl.atomic_op<&CellSyncQueue::try_clear_begin_1>()) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } while (!queue->ctrl.atomic_op<&CellSyncQueue::try_clear_begin_2>()) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } queue->ctrl.exchange({ 0, 0 }); @@ -1120,7 +1153,10 @@ error_code _cellSyncLFQueuePushBody(ppu_thread& ppu, vm::ptr qu break; } - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } const s32 depth = queue->m_depth; @@ -1415,7 +1451,10 @@ error_code _cellSyncLFQueuePopBody(ppu_thread& ppu, vm::ptr que break; } - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } const s32 depth = queue->m_depth; diff --git a/rpcs3/Emu/Cell/Modules/cellSysutil.cpp b/rpcs3/Emu/Cell/Modules/cellSysutil.cpp index accb2f131f..84da30ecc8 100644 --- a/rpcs3/Emu/Cell/Modules/cellSysutil.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSysutil.cpp @@ -232,20 +232,23 @@ s32 cellSysutilGetSystemParamString(CellSysutilParamId id, vm::ptr buf, u3 return CELL_OK; } -s32 cellSysutilCheckCallback(ppu_thread& ppu) +error_code cellSysutilCheckCallback(ppu_thread& ppu) { cellSysutil.trace("cellSysutilCheckCallback()"); const auto cbm = fxm::get_always(); - while (auto&& func = cbm->get_cb()) + while (auto func = cbm->get_cb()) { if (s32 res = func(ppu)) { - return res; + return not_an_error(res); } - thread_ctrl::test(); + if (ppu.is_stopped()) + { + return 0; + } } return CELL_OK; diff --git a/rpcs3/Emu/Cell/Modules/cellVdec.cpp b/rpcs3/Emu/Cell/Modules/cellVdec.cpp index cb90cbed31..f56a55143a 100644 --- a/rpcs3/Emu/Cell/Modules/cellVdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellVdec.cpp @@ -19,6 +19,8 @@ extern "C" #include #include #include +#include "Utilities/lockless.h" +#include std::mutex g_mutex_avcodec_open2; @@ -26,15 +28,13 @@ LOG_CHANNEL(cellVdec); vm::gvar _cell_vdec_prx_ver; // ??? -enum class vdec_cmd : u32 -{ - null, +constexpr struct vdec_start_seq_t{} vdec_start_seq{}; +constexpr struct vdec_close_t{} vdec_close{}; - start_seq, - end_seq, - decode, - set_frc, - close, +struct vdec_cmd +{ + s32 mode; + CellVdecAuInfo au; }; struct vdec_frame @@ -60,14 +60,19 @@ struct vdec_frame } }; -struct vdec_thread : ppu_thread +struct vdec_context final { + static constexpr u32 id_base = 0xf0000000; + static constexpr u32 id_step = 0x00000100; + static constexpr u32 id_count = 1024; + AVCodec* codec{}; AVCodecContext* ctx{}; SwsContext* sws{}; - const s32 type; - const u32 profile; + shared_mutex mutex; // Used for 'out' queue (TODO) + + const u32 type; const u32 mem_addr; const u32 mem_size; const vm::ptr cb_func; @@ -79,16 +84,16 @@ struct vdec_thread : ppu_thread u64 next_dts{}; u64 ppu_tid{}; - std::mutex mutex; std::queue out; - u32 max_frames = 60; + atomic_t out_max = 60; atomic_t au_count{0}; - vdec_thread(s32 type, u32 profile, u32 addr, u32 size, vm::ptr func, u32 arg, u32 prio, u32 stack) - : ppu_thread("HLE Video Decoder", prio, stack) - , type(type) - , profile(profile) + notifier in_cv; + lf_queue> in_cmd; + + vdec_context(s32 type, u32 profile, u32 addr, u32 size, vm::ptr func, u32 arg) + : type(type) , mem_addr(addr) , mem_size(size) , cb_func(func) @@ -144,55 +149,51 @@ struct vdec_thread : ppu_thread } } - virtual ~vdec_thread() override + ~vdec_context() { avcodec_close(ctx); avcodec_free_context(&ctx); sws_freeContext(sws); } - virtual std::string dump() const override + void exec(ppu_thread& ppu, u32 vid) { - // TODO - return ppu_thread::dump(); - } + ppu_tid = ppu.id; - virtual void cpu_task() override - { - while (cmd64 cmd = cmd_wait()) + std::shared_lock no_lock(in_cv, std::try_to_lock); + + for (auto cmds = in_cmd.pop_all(); !Emu.IsStopped(); cmds ? cmds = cmds->pop_all() : cmds = in_cmd.pop_all()) { - switch (vdec_cmd vcmd = cmd.arg1()) + if (!cmds) { - case vdec_cmd::start_seq: + in_cv.wait(1000); + continue; + } + + if (std::get_if(&cmds->get())) { - cmd_pop(); avcodec_flush_buffers(ctx); frc_set = 0; // TODO: ??? next_pts = 0; next_dts = 0; cellVdec.trace("Start sequence..."); - break; } - - case vdec_cmd::decode: - case vdec_cmd::end_seq: + else if (auto* cmd = std::get_if(&cmds->get())) { AVPacket packet{}; packet.pos = -1; u64 au_usrd{}; - if (vcmd == vdec_cmd::decode) + if (cmd->mode != -1) { - const u32 au_mode = cmd.arg2(); // TODO - const u32 au_addr = cmd_get(1).arg1(); - const u32 au_size = cmd_get(1).arg2(); - const u64 au_pts = cmd_get(2).as(); - const u64 au_dts = cmd_get(3).as(); - au_usrd = cmd_get(4).as(); // TODO - const u64 au_spec = cmd_get(5).as(); // Unused - cmd_pop(5); + const u32 au_mode = cmd->mode; + const u32 au_addr = cmd->au.startAddr; + const u32 au_size = cmd->au.size; + const u64 au_pts = u64{cmd->au.pts.upper} << 32 | cmd->au.pts.lower; + const u64 au_dts = u64{cmd->au.dts.upper} << 32 | cmd->au.dts.lower; + au_usrd = cmd->au.userData; packet.data = vm::_ptr(au_addr); packet.size = au_size; @@ -217,16 +218,14 @@ struct vdec_thread : ppu_thread } else { - cmd_pop(); - packet.pts = AV_NOPTS_VALUE; packet.dts = AV_NOPTS_VALUE; cellVdec.trace("End sequence..."); } - while (max_frames) + while (out_max) { - if (vcmd == vdec_cmd::end_seq) + if (cmd->mode == -1) { break; } @@ -356,59 +355,52 @@ struct vdec_thread : ppu_thread std::lock_guard{mutex}, out.push(std::move(frame)); - cb_func(*this, id, CELL_VDEC_MSG_TYPE_PICOUT, CELL_OK, cb_arg); - lv2_obj::sleep(*this); + cb_func(ppu, vid, CELL_VDEC_MSG_TYPE_PICOUT, CELL_OK, cb_arg); + lv2_obj::sleep(ppu); } - if (vcmd == vdec_cmd::decode) + if (cmd->mode != -1) { break; } } - if (max_frames) + if (out_max) { - cb_func(*this, id, vcmd == vdec_cmd::decode ? CELL_VDEC_MSG_TYPE_AUDONE : CELL_VDEC_MSG_TYPE_SEQDONE, CELL_OK, cb_arg); - lv2_obj::sleep(*this); + cb_func(ppu, vid, cmd->mode != -1 ? CELL_VDEC_MSG_TYPE_AUDONE : CELL_VDEC_MSG_TYPE_SEQDONE, CELL_OK, cb_arg); + lv2_obj::sleep(ppu); } - if (vcmd == vdec_cmd::decode) + if (cmd->mode != -1) { au_count--; } - while (std::lock_guard{mutex}, max_frames && out.size() > max_frames) + while (!Emu.IsStopped() && out_max && (std::lock_guard{mutex}, out.size() > out_max)) { - thread_ctrl::wait(); + in_cv.wait(1000); } - + } + else if (auto* frc = std::get_if(&cmds->get())) + { + frc_set = *frc; + } + else + { break; } - - case vdec_cmd::set_frc: - { - cmd_pop(); - frc_set = cmd.arg2(); - break; - } - - case vdec_cmd::close: - { - cmd_pop(); - state += cpu_flag::exit; - return; - } - - default: - { - fmt::throw_exception("Unknown command (0x%x)" HERE, (u32)vcmd); - } - } } } }; -u32 vdecQueryAttr(s32 type, u32 profile, u32 spec_addr /* may be 0 */, vm::ptr attr) +static void vdecEntry(ppu_thread& ppu, u32 vid) +{ + idm::get(vid)->exec(ppu, vid); + + _sys_ppu_thread_exit(ppu, 0); +} + +static u32 vdecQueryAttr(s32 type, u32 profile, u32 spec_addr /* may be 0 */, vm::ptr attr) { switch (type) // TODO: check profile levels { @@ -440,51 +432,51 @@ s32 cellVdecQueryAttrEx(vm::cptr type, vm::ptr att return vdecQueryAttr(type->codecType, type->profileLevel, type->codecSpecificInfo_addr, attr); } +template +static s32 vdecOpen(ppu_thread& ppu, T type, U res, vm::cptr cb, vm::ptr handle) +{ + // Create decoder context + const u32 vid = idm::make(type->codecType, type->profileLevel, res->memAddr, res->memSize, cb->cbFunc, cb->cbArg); + + // Run thread + vm::var _tid; + vm::var _name = vm::make_str("HLE Video Decoder"); + ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 0, vid, +res->ppuThreadPriority, +res->ppuThreadStackSize, SYS_PPU_THREAD_CREATE_INTERRUPT, +_name); + *handle = vid; + + const auto thrd = idm::get>(*_tid); + + thrd->cmd_list + ({ + { ppu_cmd::set_args, 1 }, u64{vid}, + { ppu_cmd::hle_call, FIND_FUNC(vdecEntry) }, + }); + + thrd->state -= cpu_flag::stop; + thread_ctrl::notify(*thrd); + + return CELL_OK; +} + s32 cellVdecOpen(ppu_thread& ppu, vm::cptr type, vm::cptr res, vm::cptr cb, vm::ptr handle) { cellVdec.warning("cellVdecOpen(type=*0x%x, res=*0x%x, cb=*0x%x, handle=*0x%x)", type, res, cb, handle); - // Create decoder thread - auto&& vdec = idm::make_ptr(type->codecType, type->profileLevel, res->memAddr, res->memSize, cb->cbFunc, cb->cbArg, res->ppuThreadPriority, res->ppuThreadStackSize); - - // Hack: store thread id (normally it should be pointer) - *handle = vdec->id; - - vm::var _tid; - ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 1148, 0, 900, 0x4000, SYS_PPU_THREAD_CREATE_INTERRUPT, vm::null); - vdec->gpr[13] = idm::get(*_tid)->gpr[13]; - vdec->ppu_tid = *_tid; - - vdec->run(); - - return CELL_OK; + return vdecOpen(ppu, type, res, cb, handle); } s32 cellVdecOpenEx(ppu_thread& ppu, vm::cptr type, vm::cptr res, vm::cptr cb, vm::ptr handle) { cellVdec.warning("cellVdecOpenEx(type=*0x%x, res=*0x%x, cb=*0x%x, handle=*0x%x)", type, res, cb, handle); - // Create decoder thread - auto&& vdec = idm::make_ptr(type->codecType, type->profileLevel, res->memAddr, res->memSize, cb->cbFunc, cb->cbArg, res->ppuThreadPriority, res->ppuThreadStackSize); - - // Hack: store thread id (normally it should be pointer) - *handle = vdec->id; - - vm::var _tid; - ppu_execute<&sys_ppu_thread_create>(ppu, +_tid, 1148, 0, 900, 0x4000, SYS_PPU_THREAD_CREATE_INTERRUPT, vm::null); - vdec->gpr[13] = idm::get(*_tid)->gpr[13]; - vdec->ppu_tid = *_tid; - - vdec->run(); - - return CELL_OK; + return vdecOpen(ppu, type, res, cb, handle); } s32 cellVdecClose(ppu_thread& ppu, u32 handle) { cellVdec.warning("cellVdecClose(handle=0x%x)", handle); - const auto vdec = idm::get(handle); + const auto vdec = idm::get(handle); if (!vdec) { @@ -492,15 +484,9 @@ s32 cellVdecClose(ppu_thread& ppu, u32 handle) } lv2_obj::sleep(ppu); - - { - std::lock_guard lock(vdec->mutex); - vdec->cmd_push({vdec_cmd::close, 0}); - vdec->max_frames = 0; - } - - vdec->notify(); - vdec->join(); + vdec->out_max = 0; + vdec->in_cmd.push(vdec_close); + vdec->in_cv.notify_all(); ppu_execute<&sys_interrupt_thread_disestablish>(ppu, vdec->ppu_tid); return CELL_OK; } @@ -509,15 +495,15 @@ s32 cellVdecStartSeq(u32 handle) { cellVdec.trace("cellVdecStartSeq(handle=0x%x)", handle); - const auto vdec = idm::get(handle); + const auto vdec = idm::get(handle); if (!vdec) { return CELL_VDEC_ERROR_ARG; } - vdec->cmd_push({vdec_cmd::start_seq, 0}); - vdec->notify(); + vdec->in_cmd.push(vdec_start_seq); + vdec->in_cv.notify_all(); return CELL_OK; } @@ -525,15 +511,15 @@ s32 cellVdecEndSeq(u32 handle) { cellVdec.warning("cellVdecEndSeq(handle=0x%x)", handle); - const auto vdec = idm::get(handle); + const auto vdec = idm::get(handle); if (!vdec) { return CELL_VDEC_ERROR_ARG; } - vdec->cmd_push({vdec_cmd::end_seq, 0}); - vdec->notify(); + vdec->in_cmd.push(vdec_cmd{-1}); + vdec->in_cv.notify_all(); return CELL_OK; } @@ -541,30 +527,21 @@ s32 cellVdecDecodeAu(u32 handle, CellVdecDecodeMode mode, vm::cptr(handle); + const auto vdec = idm::get(handle); - if (mode > CELL_VDEC_DEC_MODE_PB_SKIP || !vdec) + if (mode < 0 || mode > CELL_VDEC_DEC_MODE_PB_SKIP || !vdec) { return CELL_VDEC_ERROR_ARG; } - if (vdec->au_count.fetch_op([](u32& c) { if (c < 4) c++; }) >= 4) + if (!vdec->au_count.try_inc(4)) { return CELL_VDEC_ERROR_BUSY; } // TODO: check info - vdec->cmd_list - ({ - { vdec_cmd::decode, mode }, - { auInfo->startAddr, auInfo->size }, - u64{auInfo->pts.upper} << 32 | auInfo->pts.lower, - u64{auInfo->dts.upper} << 32 | auInfo->dts.lower, - auInfo->userData, - auInfo->codecSpecificData, - }); - - vdec->notify(); + vdec->in_cmd.push(vdec_cmd{mode, *auInfo}); + vdec->in_cv.notify_all(); return CELL_OK; } @@ -572,7 +549,7 @@ s32 cellVdecGetPicture(u32 handle, vm::cptr format, vm::ptr(handle); + const auto vdec = idm::get(handle); if (!format || !vdec) { @@ -580,6 +557,7 @@ s32 cellVdecGetPicture(u32 handle, vm::cptr format, vm::ptrmutex); @@ -591,14 +569,12 @@ s32 cellVdecGetPicture(u32 handle, vm::cptr format, vm::ptrout.front()); vdec->out.pop(); - - if (vdec->out.size() <= vdec->max_frames) - { - vdec->notify(); - } + if (vdec->out.size() + 1 == vdec->out_max) + notify = true; } - vdec->notify(); + if (notify) + vdec->in_cv.notify_all(); if (outBuff) { @@ -698,7 +674,7 @@ s32 cellVdecGetPicItem(u32 handle, vm::pptr picItem) { cellVdec.trace("cellVdecGetPicItem(handle=0x%x, picItem=**0x%x)", handle, picItem); - const auto vdec = idm::get(handle); + const auto vdec = idm::get(handle); if (!vdec) { @@ -893,7 +869,7 @@ s32 cellVdecSetFrameRate(u32 handle, CellVdecFrameRate frc) { cellVdec.trace("cellVdecSetFrameRate(handle=0x%x, frc=0x%x)", handle, (s32)frc); - const auto vdec = idm::get(handle); + const auto vdec = idm::get(handle); if (!vdec) { @@ -901,8 +877,8 @@ s32 cellVdecSetFrameRate(u32 handle, CellVdecFrameRate frc) } // TODO: check frc value - vdec->cmd_push({vdec_cmd::set_frc, frc}); - vdec->notify(); + vdec->in_cmd.push(frc); + vdec->in_cv.notify_all(); return CELL_OK; } @@ -966,4 +942,6 @@ DECLARE(ppu_module_manager::cellVdec)("libvdec", []() REG_FUNC(libvdec, cellVdecSetFrameRate); REG_FUNC(libvdec, cellVdecSetFrameRateExt); // 0xcffc42a5 REG_FUNC(libvdec, cellVdecSetPts); // 0x3ce2e4f8 + + REG_FUNC(libvdec, vdecEntry).flag(MFF_HIDDEN); }); diff --git a/rpcs3/Emu/Cell/Modules/libmixer.cpp b/rpcs3/Emu/Cell/Modules/libmixer.cpp index 6352d220b5..f503c59a85 100644 --- a/rpcs3/Emu/Cell/Modules/libmixer.cpp +++ b/rpcs3/Emu/Cell/Modules/libmixer.cpp @@ -326,7 +326,7 @@ struct surmixer_thread : ppu_thread { using ppu_thread::ppu_thread; - virtual void cpu_task() override + void non_task() { const auto g_audio = fxm::get(); @@ -489,9 +489,7 @@ s32 cellSurMixerCreate(vm::cptr config) libmixer.warning("*** surMixer created (ch1=%d, ch2=%d, ch6=%d, ch8=%d)", config->chStrips1, config->chStrips2, config->chStrips6, config->chStrips8); - auto&& thread = idm::make_ptr("Surmixer Thread"); - - thread->run(); + //auto thread = idm::make_ptr("Surmixer Thread"); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp b/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp index a43f1891cb..c5a7ace924 100644 --- a/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp +++ b/rpcs3/Emu/Cell/Modules/sys_lwcond_.cpp @@ -72,7 +72,11 @@ error_code sys_lwcond_signal(ppu_thread& ppu, vm::ptr lwcond) // call the syscall if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, -1, 1)) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } + lwmutex->all_info--; if (res != CELL_EPERM) @@ -103,7 +107,11 @@ error_code sys_lwcond_signal(ppu_thread& ppu, vm::ptr lwcond) // call the syscall if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, -1, 3)) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } + lwmutex->all_info--; // unlock the lightweight mutex @@ -145,9 +153,12 @@ error_code sys_lwcond_signal_all(ppu_thread& ppu, vm::ptr lwcond) return res; } - ppu.test_state(); - lwmutex->all_info += +res; + if (ppu.test_stopped()) + { + return 0; + } + lwmutex->all_info += +res; return CELL_OK; } @@ -167,7 +178,10 @@ error_code sys_lwcond_signal_all(ppu_thread& ppu, vm::ptr lwcond) // if locking succeeded, call the syscall error_code res = _sys_lwcond_signal_all(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, 1); - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } if (res > 0) { @@ -206,7 +220,11 @@ error_code sys_lwcond_signal_to(ppu_thread& ppu, vm::ptr lwcond, u // call the syscall if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, ppu_thread_id, 1)) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } + lwmutex->all_info--; return res; @@ -234,7 +252,11 @@ error_code sys_lwcond_signal_to(ppu_thread& ppu, vm::ptr lwcond, u // call the syscall if (error_code res = _sys_lwcond_signal(ppu, lwcond->lwcond_queue, lwmutex->sleep_queue, ppu_thread_id, 3)) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } + lwmutex->all_info--; // unlock the lightweight mutex diff --git a/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp b/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp index f206cf9754..2f7276645f 100644 --- a/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp +++ b/rpcs3/Emu/Cell/Modules/sys_spinlock.cpp @@ -16,15 +16,20 @@ void sys_spinlock_initialize(vm::ptr> lock) } } -void sys_spinlock_lock(ppu_thread& ppu, vm::ptr> lock) +error_code sys_spinlock_lock(ppu_thread& ppu, vm::ptr> lock) { sysPrxForUser.trace("sys_spinlock_lock(lock=*0x%x)", lock); // Try to exchange with 0xabadcafe, repeat until exchanged with 0 while (*lock || lock->exchange(0xabadcafe)) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } + + return not_an_error(ppu.gpr[3]); } s32 sys_spinlock_trylock(vm::ptr> lock) diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 00c42866f7..f26fadc49e 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -983,7 +983,11 @@ std::shared_ptr ppu_load_prx(const ppu_prx_object& elf, const std::stri if (Emu.IsReady() && fxm::import([&] { return prx; })) { // Special loading mode - auto ppu = idm::make_ptr("test_thread", 0, 0x100000); + ppu_thread_params p{}; + p.stack_addr = vm::cast(vm::alloc(0x100000, vm::stack, 4096)); + p.stack_size = 0x100000; + + auto ppu = idm::make_ptr>("PPU[0x1000000] Thread (test_thread)", p, "test_thread", 0); ppu->cmd_push({ppu_cmd::initialize, 0}); } @@ -1463,7 +1467,7 @@ void ppu_load_exec(const ppu_exec_object& elf) } // Fix primary stack size - switch (primary_stacksize) + switch (u32 sz = primary_stacksize) { case 0x10: primary_stacksize = 32 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_32K case 0x20: primary_stacksize = 64 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_64K @@ -1472,10 +1476,19 @@ void ppu_load_exec(const ppu_exec_object& elf) case 0x50: primary_stacksize = 256 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_256K case 0x60: primary_stacksize = 512 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_512K case 0x70: primary_stacksize = 1024 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_1M + default: + { + primary_stacksize = sz >= 4096 ? ::align(std::min(sz, 0x100000), 4096) : 0x4000; + break; + } } // Initialize main thread - auto ppu = idm::make_ptr("main_thread", primary_prio, primary_stacksize); + ppu_thread_params p{}; + p.stack_addr = vm::cast(vm::alloc(primary_stacksize, vm::stack, 4096)); + p.stack_size = primary_stacksize; + + auto ppu = idm::make_ptr>("PPU[0x1000000] Thread (main_thread)", p, "main_thread", primary_prio); // Write initial data (exitspawn) if (Emu.data.size()) diff --git a/rpcs3/Emu/Cell/PPUModule.h b/rpcs3/Emu/Cell/PPUModule.h index d3c434513b..f4d7efb768 100644 --- a/rpcs3/Emu/Cell/PPUModule.h +++ b/rpcs3/Emu/Cell/PPUModule.h @@ -77,9 +77,6 @@ class ppu_static_module final public: const std::string name; - task_stack on_load; - task_stack on_unload; - std::unordered_map> functions; std::unordered_map> variables; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 277ca698d7..5c300d9ca6 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -334,37 +334,6 @@ extern void ppu_breakpoint(u32 addr, bool isAdding) } } -void ppu_thread::on_spawn() -{ - if (g_cfg.core.thread_scheduler_enabled) - { - // Bind to primary set - thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::ppu)); - } -} - -void ppu_thread::on_init(const std::shared_ptr& _this) -{ - if (!stack_addr) - { - // Allocate stack + gap between stacks - auto new_stack_base = vm::alloc(stack_size + 4096, vm::stack, 4096); - if (!new_stack_base) - { - fmt::throw_exception("Out of stack memory (size=0x%x)" HERE, stack_size); - } - - const_cast(stack_addr) = new_stack_base + 4096; - - // Make the gap inaccessible - vm::page_protect(new_stack_base, 4096, 0, 0, vm::page_readable + vm::page_writable); - - gpr[1] = ::align(stack_addr + stack_size, 0x200) - 0x200; - - cpu_thread::on_init(_this); - } -} - //sets breakpoint, does nothing if there is a breakpoint there already extern void ppu_set_breakpoint(u32 addr) { @@ -427,9 +396,15 @@ extern bool ppu_patch(u32 addr, u32 value) return true; } +void ppu_thread::on_cleanup(named_thread* _this) +{ + // Remove thread id + idm::remove>(_this->id); +} + std::string ppu_thread::get_name() const { - return fmt::format("PPU[0x%x] Thread (%s)", id, m_name); + return fmt::format("PPU[0x%x] Thread (%s)", id, ppu_name.get()); } std::string ppu_thread::dump() const @@ -564,6 +539,12 @@ void ppu_thread::cpu_task() cmd_pop(), ppu_function_manager::get().at(arg)(*this); break; } + case ppu_cmd::ptr_call: + { + const ppu_function_t func = cmd_get(1).as(); + cmd_pop(1), func(*this); + break; + } case ppu_cmd::initialize: { cmd_pop(), ppu_initialize(); @@ -697,20 +678,38 @@ void ppu_thread::exec_task() ppu_thread::~ppu_thread() { - if (stack_addr) - { - vm::dealloc_verbose_nothrow(stack_addr - 4096, vm::stack); - } + // Deallocate Stack Area + vm::dealloc_verbose_nothrow(stack_addr, vm::stack); } -ppu_thread::ppu_thread(const std::string& name, u32 prio, u32 stack) +ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached) : cpu_thread(idm::last_id()) , prio(prio) - , stack_size(stack >= 0x1000 ? ::align(std::min(stack, 0x100000), 0x1000) : 0x4000) - , stack_addr(0) + , stack_size(param.stack_size) + , stack_addr(param.stack_addr) , start_time(get_system_time()) - , m_name(name) + , joiner(-!!detached) + , ppu_name(name) { + gpr[1] = ::align(stack_addr + stack_size, 0x200) - 0x200; + + gpr[13] = param.tls_addr; + + if (detached >= 0 && id != id_base) + { + // Initialize thread entry point + cmd_list + ({ + {ppu_cmd::set_args, 2}, param.arg0, param.arg1, + {ppu_cmd::lle_call, param.entry}, + }); + } + else + { + // Save entry for further use (interrupt handler workaround) + gpr[2] = param.entry; + } + // Trigger the scheduler state += cpu_flag::suspend; @@ -765,7 +764,7 @@ cmd64 ppu_thread::cmd_wait() { if (UNLIKELY(state)) { - if (state & (cpu_flag::stop + cpu_flag::exit)) + if (is_stopped()) { return cmd64{}; } @@ -802,8 +801,7 @@ void ppu_thread::fast_call(u32 addr, u32 rtoc) g_tls_log_prefix = [] { const auto _this = static_cast(get_current_cpu_thread()); - - return fmt::format("%s [0x%08x]", _this->get_name(), _this->cia); + return fmt::format("%s [0x%08x]", thread_ctrl::get_name(), _this->cia); }; auto at_ret = gsl::finally([&]() @@ -930,7 +928,11 @@ extern void sse_cellbe_stvrx_v0(u64 addr, __m128i a); static void ppu_check(ppu_thread& ppu, u64 addr) { ppu.cia = ::narrow(addr); - ppu.test_state(); + + if (ppu.test_stopped()) + { + return; + } } static void ppu_trace(u64 addr) diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index c4394dbbc4..c00ae7f9b7 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -14,6 +14,7 @@ enum class ppu_cmd : u32 set_args, // Set general-purpose args (+arg cmd) lle_call, // Load addr and rtoc at *arg or *gpr[arg] and execute hle_call, // Execute function by index (arg) + ptr_call, // Execute function by pointer initialize, // ppu_initialize() sleep, reset_stack, // resets stack address @@ -24,6 +25,17 @@ enum class ppu_syscall_code : u64 { }; +// ppu_thread constructor argument +struct ppu_thread_params +{ + vm::addr_t stack_addr; + u32 stack_size; + u32 tls_addr; + u32 entry; + u64 arg0; + u64 arg1; +}; + class ppu_thread : public cpu_thread { public: @@ -31,17 +43,17 @@ public: static const u32 id_step = 1; static const u32 id_count = 2048; - virtual void on_spawn() override; - virtual void on_init(const std::shared_ptr&) override; + static void on_cleanup(named_thread*); + virtual std::string get_name() const override; virtual std::string dump() const override; - virtual void cpu_task() override; + virtual void cpu_task() override final; virtual void cpu_sleep() override; virtual void cpu_mem() override; virtual void cpu_unmem() override; virtual ~ppu_thread() override; - ppu_thread(const std::string& name, u32 prio = 0, u32 stack = 0x10000); + ppu_thread(const ppu_thread_params&, std::string_view name, u32 prio, int detached = 0); u64 gpr[32] = {}; // General-Purpose Registers f64 fpr[32] = {}; // Floating Point Registers @@ -153,7 +165,7 @@ public: u64 start_time{0}; // Sleep start timepoint const char* last_function{}; // Last function name for diagnosis, optimized for speed. - const std::string m_name; // Thread name + lf_value ppu_name; // Thread name be_t* get_stack_arg(s32 i, u64 align = alignof(u64)); void exec_task(); diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index f74986c6be..aaedc088a0 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -9,39 +9,7 @@ // Originally, SPU MFC registers are accessed externally in a concurrent manner (don't mix with channels, SPU MFC channels are isolated) thread_local spu_mfc_cmd g_tls_mfc[8] = {}; -void RawSPUThread::cpu_task() -{ - // get next PC and SPU Interrupt status - pc = npc.exchange(0); - - set_interrupt_status((pc & 1) != 0); - - pc &= 0x3fffc; - - SPUThread::cpu_task(); - - // save next PC and current SPU Interrupt status - npc = pc | (interrupts_enabled); -} - -void RawSPUThread::on_init(const std::shared_ptr& _this) -{ - if (!offset) - { - // Install correct SPU index and LS address - const_cast(index) = id; - const_cast(offset) = verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, 0x40000)); - - cpu_thread::on_init(_this); - } -} - -RawSPUThread::RawSPUThread(const std::string& name) - : SPUThread(name, 0, nullptr) -{ -} - -bool RawSPUThread::read_reg(const u32 addr, u32& value) +bool spu_thread::read_reg(const u32 addr, u32& value) { const u32 offset = addr - RAW_SPU_BASE_ADDR - index * RAW_SPU_OFFSET - RAW_SPU_PROB_OFFSET; @@ -101,7 +69,7 @@ bool RawSPUThread::read_reg(const u32 addr, u32& value) return false; } -bool RawSPUThread::write_reg(const u32 addr, const u32 value) +bool spu_thread::write_reg(const u32 addr, const u32 value) { auto try_start = [this]() { @@ -116,7 +84,8 @@ bool RawSPUThread::write_reg(const u32 addr, const u32 value) return true; })) { - run(); + state -= cpu_flag::stop; + thread_ctrl::notify(static_cast&>(*this)); } }; @@ -291,7 +260,11 @@ bool RawSPUThread::write_reg(const u32 addr, const u32 value) void spu_load_exec(const spu_exec_object& elf) { - auto spu = idm::make_ptr("TEST_SPU"); + auto ls0 = vm::cast(vm::falloc(RAW_SPU_BASE_ADDR, 0x40000, vm::spu)); + auto spu = idm::make_ptr>("TEST_SPU", ls0, nullptr, 0, ""); + + spu_thread::g_raw_spu_ctr++; + spu_thread::g_raw_spu_id[0] = spu->id; for (const auto& prog : elf.progs) { @@ -301,6 +274,5 @@ void spu_load_exec(const spu_exec_object& elf) } } - spu->cpu_init(); spu->npc = elf.header.e_entry; } diff --git a/rpcs3/Emu/Cell/RawSPUThread.h b/rpcs3/Emu/Cell/RawSPUThread.h index 668db24678..921db8f1f6 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.h +++ b/rpcs3/Emu/Cell/RawSPUThread.h @@ -1,20 +1,3 @@ #pragma once #include "SPUThread.h" - -class RawSPUThread final : public SPUThread -{ - void cpu_task() override; - -public: - static const u32 id_base = 0; - static const u32 id_step = 1; - static const u32 id_count = 5; - - void on_init(const std::shared_ptr&) override; - - RawSPUThread(const std::string& name); - - bool read_reg(const u32 addr, u32& value); - bool write_reg(const u32 addr, const u32 value); -}; diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 03402567c0..2d3a230de5 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -16,11 +16,11 @@ #include "SPUASMJITRecompiler.h" -#define SPU_OFF_128(x, ...) asmjit::x86::oword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__)) -#define SPU_OFF_64(x, ...) asmjit::x86::qword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__)) -#define SPU_OFF_32(x, ...) asmjit::x86::dword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__)) -#define SPU_OFF_16(x, ...) asmjit::x86::word_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__)) -#define SPU_OFF_8(x, ...) asmjit::x86::byte_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__)) +#define SPU_OFF_128(x, ...) asmjit::x86::oword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__)) +#define SPU_OFF_64(x, ...) asmjit::x86::qword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__)) +#define SPU_OFF_32(x, ...) asmjit::x86::dword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__)) +#define SPU_OFF_16(x, ...) asmjit::x86::word_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__)) +#define SPU_OFF_8(x, ...) asmjit::x86::byte_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__)) extern const spu_decoder g_spu_interpreter_fast; // TODO: avoid const spu_decoder s_spu_decoder; @@ -1177,12 +1177,12 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data) return XmmConst(v128::fromV(data)); } -static void check_state_ret(SPUThread& _spu, void*, u8*) +static void check_state_ret(spu_thread& _spu, void*, u8*) { // MSVC workaround (TCO) } -static void check_state(SPUThread* _spu, spu_function_t _ret) +static void check_state(spu_thread* _spu, spu_function_t _ret) { if (_spu->state && _spu->check_state()) { @@ -1209,7 +1209,7 @@ void spu_recompiler::branch_fixed(u32 target) return; } - c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2)); + c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&spu_thread::jit_dispatcher) + target * 2)); c->mov(SPU_OFF_32(pc), target); c->cmp(SPU_OFF_32(state), 0); c->jnz(label_stop); @@ -1251,7 +1251,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) } else if (op.e) { - auto _throw = [](SPUThread* _spu) + auto _throw = [](spu_thread* _spu) { fmt::throw_exception("SPU Interrupts not implemented (mask=0x%x)" HERE, +_spu->ch_event_mask); }; @@ -1270,7 +1270,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) c->jmp(no_intr); c->bind(fail); c->mov(SPU_OFF_32(pc), *addr); - c->jmp(imm_ptr(_throw)); + c->jmp(imm_ptr(_throw)); // Save addr in srr0 and disable interrupts c->bind(intr); @@ -1292,7 +1292,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga) { // Simply external call (return or indirect call) - c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher))); + c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher))); } else { @@ -1311,7 +1311,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) c->lea(x86::r10, x86::qword_ptr(instr_table)); c->cmp(qw1->r32(), end - start); c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0)); - c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher))); + c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher))); c->cmovae(x86::r10, *qw1); c->mov(x86::r10, x86::qword_ptr(x86::r10)); } @@ -1321,7 +1321,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) // Get stack pointer, try to use native return address (check SPU return address) c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3)); c->and_(qw1->r32(), 0x3fff0); - c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror))); + c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror))); c->cmp(x86::dword_ptr(*qw1, 8), *addr); c->cmove(x86::r10, x86::qword_ptr(*qw1)); } @@ -1352,7 +1352,7 @@ void spu_recompiler::branch_set_link(u32 target) // Get stack pointer, write native and SPU return addresses into the stack mirror c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3)); c->and_(qw1->r32(), 0x3fff0); - c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror))); + c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror))); c->lea(x86::r10, x86::qword_ptr(ret)); c->mov(x86::qword_ptr(*qw1, 0), x86::r10); c->mov(x86::qword_ptr(*qw1, 8), target); @@ -1365,7 +1365,7 @@ void spu_recompiler::branch_set_link(u32 target) c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3)); c->and_(qw1->r32(), 0x3fff0); c->pcmpeqd(x86::xmm0, x86::xmm0); - c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)), x86::xmm0); + c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror)), x86::xmm0); c->jmp(target); }); } @@ -1374,7 +1374,7 @@ void spu_recompiler::branch_set_link(u32 target) void spu_recompiler::fall(spu_opcode_t op) { - auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret) + auto gate = [](spu_thread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret) { if (!_func(*_spu, {opcode})) { @@ -1391,7 +1391,7 @@ void spu_recompiler::fall(spu_opcode_t op) c->mov(*ls, op.opcode); c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast(g_spu_interpreter_fast.decode(op.opcode)))); c->lea(*qw1, asmjit::x86::qword_ptr(next)); - c->jmp(asmjit::imm_ptr(gate)); + c->jmp(asmjit::imm_ptr(gate)); c->align(asmjit::kAlignCode, 16); c->bind(next); } @@ -1442,13 +1442,13 @@ void spu_recompiler::get_events() if (utils::has_avx()) { - c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 0)); + c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 0)); c->vxorps(x86::ymm1, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 0)); - c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 32)); + c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 32)); c->vxorps(x86::ymm2, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 32)); - c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 64)); + c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 64)); c->vxorps(x86::ymm3, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 64)); - c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 96)); + c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 96)); c->vxorps(x86::ymm4, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 96)); c->vorps(x86::ymm0, x86::ymm1, x86::ymm2); c->vorps(x86::ymm1, x86::ymm3, x86::ymm4); @@ -1460,11 +1460,11 @@ void spu_recompiler::get_events() else { c->movaps(x86::xmm0, x86::dqword_ptr(*qw0, *addr)); - c->xorps(x86::xmm0, x86::dqword_ptr(*cpu, offset32(&SPUThread::rdata) + 0)); + c->xorps(x86::xmm0, x86::dqword_ptr(*cpu, offset32(&spu_thread::rdata) + 0)); for (u32 i = 16; i < 128; i += 16) { c->movaps(x86::xmm1, x86::dqword_ptr(*qw0, *addr, 0, i)); - c->xorps(x86::xmm1, x86::dqword_ptr(*cpu, offset32(&SPUThread::rdata) + i)); + c->xorps(x86::xmm1, x86::dqword_ptr(*cpu, offset32(&spu_thread::rdata) + i)); c->orps(x86::xmm0, x86::xmm1); } @@ -1495,7 +1495,7 @@ void spu_recompiler::get_events() // Check decrementer event (unlikely) after.emplace_back([=] { - auto sub = [](SPUThread* _spu, spu_function_t _ret) + auto sub = [](spu_thread* _spu, spu_function_t _ret) { if ((_spu->ch_dec_value - (get_timebased_time() - _spu->ch_dec_start_timestamp)) >> 31) { @@ -1508,7 +1508,7 @@ void spu_recompiler::get_events() c->bind(tcheck); c->lea(*ls, x86::qword_ptr(label2)); - c->jmp(imm_ptr(sub)); + c->jmp(imm_ptr(sub)); }); // Check whether SPU_EVENT_TM is already set @@ -1527,13 +1527,13 @@ void spu_recompiler::get_events() after.emplace_back([=] { - auto _throw = [](SPUThread* _spu) + auto _throw = [](spu_thread* _spu) { fmt::throw_exception("SPU Events not implemented (mask=0x%x)" HERE, +_spu->ch_event_mask); }; c->bind(fail); - c->jmp(imm_ptr(_throw)); + c->jmp(imm_ptr(_throw)); }); // Load active events into addr @@ -1547,18 +1547,18 @@ void spu_recompiler::get_events() void spu_recompiler::UNK(spu_opcode_t op) { - auto gate = [](SPUThread* _spu, u32 op) + auto gate = [](spu_thread* _spu, u32 op) { fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op); }; c->mov(SPU_OFF_32(pc), m_pos); c->mov(*ls, op.opcode); - c->jmp(asmjit::imm_ptr(gate)); + c->jmp(asmjit::imm_ptr(gate)); m_pos = -1; } -void spu_stop(SPUThread* _spu, u32 code, spu_function_t _ret) +void spu_stop(spu_thread* _spu, u32 code, spu_function_t _ret) { if (!_spu->stop_and_signal(code)) { @@ -1619,12 +1619,12 @@ void spu_recompiler::MFSPR(spu_opcode_t op) c->movdqa(SPU_OFF_128(gpr, op.rt), vr); } -static void spu_rdch_ret(SPUThread& spu, void*, u32) +static void spu_rdch_ret(spu_thread& spu, void*, u32) { // MSVC workaround (TCO) } -static void spu_rdch(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32)) +static void spu_rdch(spu_thread* _spu, u32 ch, void(*_ret)(spu_thread&, void*, u32)) { const s64 result = _spu->get_ch_value(ch); @@ -1733,7 +1733,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) { LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos); - auto sub1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) + auto sub1 = [](spu_thread* _spu, v128* _res, spu_function_t _ret) { const u32 out = _spu->ch_dec_value - static_cast(get_timebased_time() - _spu->ch_dec_start_timestamp); @@ -1744,7 +1744,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) _ret(*_spu, _spu->_ptr(0), nullptr); }; - auto sub2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) + auto sub2 = [](spu_thread* _spu, v128* _res, spu_function_t _ret) { const u32 out = _spu->ch_dec_value - static_cast(get_timebased_time() - _spu->ch_dec_start_timestamp); @@ -1752,7 +1752,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) _ret(*_spu, _spu->_ptr(0), nullptr); }; - using ftype = void (*)(SPUThread*, v128*, spu_function_t); + using ftype = void (*)(spu_thread*, v128*, spu_function_t); asmjit::Label next = c->newLabel(); c->mov(SPU_OFF_32(pc), m_pos); @@ -1817,7 +1817,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0); } -static void spu_rchcnt(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32 res)) +static void spu_rchcnt(spu_thread* _spu, u32 ch, void(*_ret)(spu_thread&, void*, u32 res)) { // Put result into the third argument const u32 res = _spu->get_ch_count(ch); @@ -2565,12 +2565,12 @@ void spu_recompiler::MTSPR(spu_opcode_t op) // Check SPUInterpreter for notes. } -static void spu_wrch_ret(SPUThread& _spu, void*, u8*) +static void spu_wrch_ret(spu_thread& _spu, void*, u8*) { // MSVC workaround (TCO) } -static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret) +static void spu_wrch(spu_thread* _spu, u32 ch, u32 value, spu_function_t _ret) { if (!_spu->set_ch_value(ch, value)) { @@ -2580,7 +2580,7 @@ static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret) _ret(*_spu, _spu->_ptr(0), nullptr); } -static void spu_wrch_mfc(SPUThread* _spu, spu_function_t _ret) +static void spu_wrch_mfc(spu_thread* _spu, spu_function_t _ret) { if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd)) { @@ -2744,7 +2744,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) } case MFC_WrListStallAck: { - auto sub = [](SPUThread* _spu, spu_function_t _ret) + auto sub = [](spu_thread* _spu, spu_function_t _ret) { _spu->do_mfc(true); _ret(*_spu, _spu->_ptr(0), nullptr); @@ -2756,14 +2756,14 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->btr(SPU_OFF_32(ch_stall_mask), qw0->r32()); c->jnc(ret); c->lea(*ls, x86::qword_ptr(ret)); - c->jmp(imm_ptr(sub)); + c->jmp(imm_ptr(sub)); c->align(kAlignCode, 16); c->bind(ret); return; } case SPU_WrDec: { - auto sub = [](SPUThread* _spu, spu_function_t _ret) + auto sub = [](spu_thread* _spu, spu_function_t _ret) { _spu->ch_dec_start_timestamp = get_timebased_time(); _ret(*_spu, _spu->_ptr(0), nullptr); @@ -2771,7 +2771,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) Label ret = c->newLabel(); c->lea(*ls, x86::qword_ptr(ret)); - c->jmp(imm_ptr(sub)); + c->jmp(imm_ptr(sub)); c->align(kAlignCode, 16); c->bind(ret); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); @@ -3113,7 +3113,7 @@ void spu_recompiler::CBX(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x03); + c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x03); } void spu_recompiler::CHX(spu_opcode_t op) @@ -3126,7 +3126,7 @@ void spu_recompiler::CHX(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x0203); + c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x0203); } void spu_recompiler::CWX(spu_opcode_t op) @@ -3139,7 +3139,7 @@ void spu_recompiler::CWX(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x00010203); + c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x00010203); } void spu_recompiler::CDX(spu_opcode_t op) @@ -3153,7 +3153,7 @@ void spu_recompiler::CDX(spu_opcode_t op) c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); c->mov(*qw0, asmjit::imm_u(0x0001020304050607)); - c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), *qw0); + c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0); } void spu_recompiler::ROTQBI(spu_opcode_t op) @@ -3292,7 +3292,7 @@ void spu_recompiler::CBD(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x03); + c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x03); } void spu_recompiler::CHD(spu_opcode_t op) @@ -3316,7 +3316,7 @@ void spu_recompiler::CHD(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x0203); + c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x0203); } void spu_recompiler::CWD(spu_opcode_t op) @@ -3340,7 +3340,7 @@ void spu_recompiler::CWD(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x00010203); + c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x00010203); } void spu_recompiler::CDD(spu_opcode_t op) @@ -3365,7 +3365,7 @@ void spu_recompiler::CDD(spu_opcode_t op) c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); c->mov(*qw0, asmjit::imm_u(0x0001020304050607)); - c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), *qw0); + c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0); } void spu_recompiler::ROTQBII(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 6f5a6e0355..62d918a111 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -60,7 +60,7 @@ namespace asmjit c.shl(x86::eax, I + 4); } - const auto ptr = x86::oword_ptr(spu, x86::rax, 0, ::offset32(&SPUThread::gpr)); + const auto ptr = x86::oword_ptr(spu, x86::rax, 0, offsetof(spu_thread, gpr)); if (utils::has_avx()) { @@ -85,13 +85,13 @@ namespace asmjit } } -bool spu_interpreter::UNK(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::UNK(spu_thread& spu, spu_opcode_t op) { fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op.opcode); } -void spu_interpreter::set_interrupt_status(SPUThread& spu, spu_opcode_t op) +void spu_interpreter::set_interrupt_status(spu_thread& spu, spu_opcode_t op) { if (op.e) { @@ -115,37 +115,37 @@ void spu_interpreter::set_interrupt_status(SPUThread& spu, spu_opcode_t op) } -bool spu_interpreter::STOP(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STOP(spu_thread& spu, spu_opcode_t op) { return spu.stop_and_signal(op.opcode & 0x3fff); } -bool spu_interpreter::LNOP(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LNOP(spu_thread& spu, spu_opcode_t op) { return true; } // This instruction must be used following a store instruction that modifies the instruction stream. -bool spu_interpreter::SYNC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SYNC(spu_thread& spu, spu_opcode_t op) { _mm_mfence(); return true; } // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. -bool spu_interpreter::DSYNC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DSYNC(spu_thread& spu, spu_opcode_t op) { _mm_mfence(); return true; } -bool spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MFSPR(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].clear(); // All SPRs read as zero. TODO: check it. return true; } -bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::RDCH(spu_thread& spu, spu_opcode_t op) { const s64 result = spu.get_ch_value(op.ra); @@ -158,43 +158,43 @@ bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::RCHCNT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::RCHCNT(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(spu.get_ch_count(op.ra)); return true; } -bool spu_interpreter::SF(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SF(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::sub32(spu.gpr[op.rb], spu.gpr[op.ra]); return true; } -bool spu_interpreter::OR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::OR(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] | spu.gpr[op.rb]; return true; } -bool spu_interpreter::BG(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BG(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi32(sse_cmpgt_epu32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi), _mm_set1_epi32(1)); return true; } -bool spu_interpreter::SFH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::sub16(spu.gpr[op.rb], spu.gpr[op.ra]); return true; } -bool spu_interpreter::NOR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::NOR(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = ~(spu.gpr[op.ra] | spu.gpr[op.rb]); return true; } -bool spu_interpreter::ABSDB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ABSDB(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -202,7 +202,7 @@ bool spu_interpreter::ABSDB(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROT(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -214,7 +214,7 @@ bool spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTM(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -227,7 +227,7 @@ bool spu_interpreter::ROTM(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMA(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -240,7 +240,7 @@ bool spu_interpreter::ROTMA(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::SHL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHL(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -253,7 +253,7 @@ bool spu_interpreter::SHL(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTH(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -265,7 +265,7 @@ bool spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTHM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTHM(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -278,7 +278,7 @@ bool spu_interpreter::ROTHM(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTMAH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMAH(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -291,7 +291,7 @@ bool spu_interpreter::ROTMAH(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::SHLH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLH(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra]; const auto b = spu.gpr[op.rb]; @@ -304,7 +304,7 @@ bool spu_interpreter::SHLH(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0x1f; @@ -312,25 +312,25 @@ bool spu_interpreter::ROTI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTMI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srli_epi32(spu.gpr[op.ra].vi, 0-op.i7 & 0x3f); return true; } -bool spu_interpreter::ROTMAI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMAI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi32(spu.gpr[op.ra].vi, 0-op.i7 & 0x3f); return true; } -bool spu_interpreter::SHLI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_slli_epi32(spu.gpr[op.ra].vi, op.i7 & 0x3f); return true; } -bool spu_interpreter::ROTHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTHI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0xf; @@ -338,37 +338,37 @@ bool spu_interpreter::ROTHI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTHMI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTHMI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srli_epi16(spu.gpr[op.ra].vi, 0-op.i7 & 0x1f); return true; } -bool spu_interpreter::ROTMAHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTMAHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi16(spu.gpr[op.ra].vi, 0-op.i7 & 0x1f); return true; } -bool spu_interpreter::SHLHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_slli_epi16(spu.gpr[op.ra].vi, op.i7 & 0x1f); return true; } -bool spu_interpreter::A(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::A(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::add32(spu.gpr[op.ra], spu.gpr[op.rb]); return true; } -bool spu_interpreter::AND(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AND(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] & spu.gpr[op.rb]; return true; } -bool spu_interpreter::CG(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CG(spu_thread& spu, spu_opcode_t op) { const auto a = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x7fffffff)); const auto b = _mm_xor_si128(spu.gpr[op.rb].vi, _mm_set1_epi32(0x80000000)); @@ -376,36 +376,36 @@ bool spu_interpreter::CG(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::AH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::add16(spu.gpr[op.ra], spu.gpr[op.rb]); return true; } -bool spu_interpreter::NAND(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::NAND(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = ~(spu.gpr[op.ra] & spu.gpr[op.rb]); return true; } -bool spu_interpreter::AVGB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AVGB(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_avg_epu8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::MTSPR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MTSPR(spu_thread& spu, spu_opcode_t op) { // SPR writes are ignored. TODO: check it. return true; } -bool spu_interpreter::WRCH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::WRCH(spu_thread& spu, spu_opcode_t op) { return spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3]); } -bool spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BIZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] == 0) { @@ -416,7 +416,7 @@ bool spu_interpreter::BIZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BINZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] != 0) { @@ -427,7 +427,7 @@ bool spu_interpreter::BINZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BIHZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] == 0) { @@ -438,7 +438,7 @@ bool spu_interpreter::BIHZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BIHNZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] != 0) { @@ -449,25 +449,25 @@ bool spu_interpreter::BIHNZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::STOPD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STOPD(spu_thread& spu, spu_opcode_t op) { return spu.stop_and_signal(0x3fff); } -bool spu_interpreter::STQX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQX(spu_thread& spu, spu_opcode_t op) { spu._ref((spu.gpr[op.ra]._u32[3] + spu.gpr[op.rb]._u32[3]) & 0x3fff0) = spu.gpr[op.rt]; return true; } -bool spu_interpreter::BI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BI(spu_thread& spu, spu_opcode_t op) { spu.pc = spu_branch_target(spu.gpr[op.ra]._u32[3]); set_interrupt_status(spu, op); return false; } -bool spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BISL(spu_thread& spu, spu_opcode_t op) { const u32 target = spu_branch_target(spu.gpr[op.ra]._u32[3]); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4)); @@ -476,43 +476,43 @@ bool spu_interpreter::BISL(SPUThread& spu, spu_opcode_t op) return false; } -bool spu_interpreter::IRET(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::IRET(spu_thread& spu, spu_opcode_t op) { spu.pc = spu_branch_target(spu.srr0); set_interrupt_status(spu, op); return false; } -bool spu_interpreter::BISLED(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BISLED(spu_thread& spu, spu_opcode_t op) { fmt::throw_exception("Unimplemented instruction" HERE); return true; } -bool spu_interpreter::HBR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HBR(spu_thread& spu, spu_opcode_t op) { return true; } -bool spu_interpreter::GB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::GB(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(_mm_movemask_ps(_mm_castsi128_ps(_mm_slli_epi32(spu.gpr[op.ra].vi, 31)))); return true; } -bool spu_interpreter::GBH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::GBH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_packs_epi16(_mm_slli_epi16(spu.gpr[op.ra].vi, 15), _mm_setzero_si128()))); return true; } -bool spu_interpreter::GBB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::GBB(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(spu.gpr[op.ra].vi, 7))); return true; } -bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSM(spu_thread& spu, spu_opcode_t op) { const auto bits = _mm_shuffle_epi32(spu.gpr[op.ra].vi, 0xff); const auto mask = _mm_set_epi32(8, 4, 2, 1); @@ -520,7 +520,7 @@ bool spu_interpreter::FSM(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSMH(spu_thread& spu, spu_opcode_t op) { const auto vsrc = spu.gpr[op.ra].vi; const auto bits = _mm_shuffle_epi32(_mm_unpackhi_epi16(vsrc, vsrc), 0xaa); @@ -529,7 +529,7 @@ bool spu_interpreter::FSMH(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSMB(spu_thread& spu, spu_opcode_t op) { const auto vsrc = spu.gpr[op.ra].vi; const auto bits = _mm_shuffle_epi32(_mm_shufflehi_epi16(_mm_unpackhi_epi8(vsrc, vsrc), 0x50), 0xfa); @@ -538,26 +538,26 @@ bool spu_interpreter::FSMB(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::FREST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FREST(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vf = _mm_rcp_ps(spu.gpr[op.ra].vf); return true; } -bool spu_interpreter_fast::FRSQEST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FRSQEST(spu_thread& spu, spu_opcode_t op) { const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); spu.gpr[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(spu.gpr[op.ra].vf, mask)); return true; } -bool spu_interpreter::LQX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQX(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref((spu.gpr[op.ra]._u32[3] + spu.gpr[op.rb]._u32[3]) & 0x3fff0); return true; } -bool spu_interpreter::ROTQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBYBI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; @@ -565,7 +565,7 @@ bool spu_interpreter::ROTQBYBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBYBI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; @@ -573,7 +573,7 @@ bool spu_interpreter::ROTQMBYBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::SHLQBYBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBYBI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; @@ -581,7 +581,7 @@ bool spu_interpreter::SHLQBYBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CBX(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -594,7 +594,7 @@ bool spu_interpreter::CBX(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CHX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CHX(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -607,7 +607,7 @@ bool spu_interpreter::CHX(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CWX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CWX(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -620,7 +620,7 @@ bool spu_interpreter::CWX(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CDX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CDX(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -633,7 +633,7 @@ bool spu_interpreter::CDX(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = spu.gpr[op.rb]._s32[3] & 0x7; @@ -641,7 +641,7 @@ bool spu_interpreter::ROTQBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQMBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = -spu.gpr[op.rb]._s32[3] & 0x7; @@ -649,7 +649,7 @@ bool spu_interpreter::ROTQMBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = spu.gpr[op.rb]._u32[3] & 0x7; @@ -657,7 +657,7 @@ bool spu_interpreter::SHLQBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBY(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; @@ -665,7 +665,7 @@ bool spu_interpreter::ROTQBY(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQMBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBY(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; @@ -673,7 +673,7 @@ bool spu_interpreter::ROTQMBY(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::SHLQBY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBY(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; @@ -681,13 +681,13 @@ bool spu_interpreter::SHLQBY(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ORX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORX(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::from32r(spu.gpr[op.ra]._u32[0] | spu.gpr[op.ra]._u32[1] | spu.gpr[op.ra]._u32[2] | spu.gpr[op.ra]._u32[3]); return true; } -bool spu_interpreter::CBD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CBD(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -700,7 +700,7 @@ bool spu_interpreter::CBD(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CHD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CHD(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -713,7 +713,7 @@ bool spu_interpreter::CHD(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CWD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CWD(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -726,7 +726,7 @@ bool spu_interpreter::CWD(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CDD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CDD(spu_thread& spu, spu_opcode_t op) { if (op.ra == 1 && (spu.gpr[1]._u32[3] & 0xF)) { @@ -739,7 +739,7 @@ bool spu_interpreter::CDD(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQBII(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBII(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0x7; @@ -747,7 +747,7 @@ bool spu_interpreter::ROTQBII(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQMBII(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBII(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = 0-op.i7 & 0x7; @@ -755,7 +755,7 @@ bool spu_interpreter::ROTQMBII(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBII(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const s32 n = op.i7 & 0x7; @@ -763,7 +763,7 @@ bool spu_interpreter::SHLQBII(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQBYI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(32) const __m128i buf[2]{a, a}; @@ -771,7 +771,7 @@ bool spu_interpreter::ROTQBYI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ROTQMBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ROTQMBYI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{a, _mm_setzero_si128(), _mm_setzero_si128()}; @@ -779,7 +779,7 @@ bool spu_interpreter::ROTQMBYI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::SHLQBYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SHLQBYI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; alignas(64) const __m128i buf[3]{_mm_setzero_si128(), _mm_setzero_si128(), a}; @@ -787,42 +787,42 @@ bool spu_interpreter::SHLQBYI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::NOP(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::NOP(spu_thread& spu, spu_opcode_t op) { return true; } -bool spu_interpreter::CGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGT(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::XOR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XOR(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] ^ spu.gpr[op.rb]; return true; } -bool spu_interpreter::CGTH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::EQV(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::EQV(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = ~(spu.gpr[op.ra] ^ spu.gpr[op.rb]); return true; } -bool spu_interpreter::CGTB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTB(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::SUMB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SUMB(spu_thread& spu, spu_opcode_t op) { const auto m1 = _mm_set1_epi16(0xff); const auto m2 = _mm_set1_epi32(0xffff); @@ -842,7 +842,7 @@ bool spu_interpreter::SUMB(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::HGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HGT(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] > spu.gpr[op.rb]._s32[3]) { @@ -851,7 +851,7 @@ bool spu_interpreter::HGT(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CLZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLZ(spu_thread& spu, spu_opcode_t op) { for (u32 i = 0; i < 4; i++) { @@ -860,20 +860,20 @@ bool spu_interpreter::CLZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::XSWD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XSWD(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt]._s64[0] = spu.gpr[op.ra]._s32[0]; spu.gpr[op.rt]._s64[1] = spu.gpr[op.ra]._s32[2]; return true; } -bool spu_interpreter::XSHW(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XSHW(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(spu.gpr[op.ra].vi, 16), 16); return true; } -bool spu_interpreter::CNTB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CNTB(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto mask1 = _mm_set1_epi8(0x55); @@ -886,25 +886,25 @@ bool spu_interpreter::CNTB(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::XSBH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XSBH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi16(_mm_slli_epi16(spu.gpr[op.ra].vi, 8), 8); return true; } -bool spu_interpreter::CLGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGT(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = sse_cmpgt_epu32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::ANDC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDC(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::andnot(spu.gpr[op.rb], spu.gpr[op.ra]); return true; } -bool spu_interpreter_fast::FCGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCGT(spu_thread& spu, spu_opcode_t op) { // IMPL NOTES: // if (v is inf) v = (inf - 1) i.e nearest normal value to inf with mantissa bits left intact @@ -943,25 +943,25 @@ bool spu_interpreter_fast::FCGT(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::DFCGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCGT(spu_thread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); return true; } -bool spu_interpreter_fast::FA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FA(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::addfs(spu.gpr[op.ra], spu.gpr[op.rb]); return true; } -bool spu_interpreter_fast::FS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FS(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::subfs(spu.gpr[op.ra], spu.gpr[op.rb]); return true; } -bool spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FM(spu_thread& spu, spu_opcode_t op) { const auto zero = _mm_set1_ps(0.f); const auto sign_bits = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); @@ -991,19 +991,19 @@ bool spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CLGTH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = sse_cmpgt_epu16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::ORC(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORC(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu.gpr[op.ra] | ~spu.gpr[op.rb]; return true; } -bool spu_interpreter_fast::FCMGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCMGT(spu_thread& spu, spu_opcode_t op) { //IMPL NOTES: See FCGT @@ -1030,7 +1030,7 @@ bool spu_interpreter_fast::FCMGT(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::DFCMGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCMGT(spu_thread& spu, spu_opcode_t op) { const auto mask = _mm_castsi128_pd(_mm_set1_epi64x(0x7fffffffffffffff)); const auto ra = _mm_and_pd(spu.gpr[op.ra].vd, mask); @@ -1039,31 +1039,31 @@ bool spu_interpreter::DFCMGT(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::DFA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFA(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::addfd(spu.gpr[op.ra], spu.gpr[op.rb]); return true; } -bool spu_interpreter_fast::DFS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFS(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::subfd(spu.gpr[op.ra], spu.gpr[op.rb]); return true; } -bool spu_interpreter_fast::DFM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFM(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd); return true; } -bool spu_interpreter::CLGTB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTB(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = sse_cmpgt_epu8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::HLGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HLGT(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._u32[3] > spu.gpr[op.rb]._u32[3]) { @@ -1072,37 +1072,37 @@ bool spu_interpreter::HLGT(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::DFMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFMA(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_add_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd); return true; } -bool spu_interpreter_fast::DFMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFMS(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_sub_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd); return true; } -bool spu_interpreter_fast::DFNMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFNMS(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_sub_pd(spu.gpr[op.rt].vd, _mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd)); return true; } -bool spu_interpreter_fast::DFNMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::DFNMA(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vd = _mm_xor_pd(_mm_add_pd(_mm_mul_pd(spu.gpr[op.ra].vd, spu.gpr[op.rb].vd), spu.gpr[op.rt].vd), _mm_set1_pd(-0.0)); return true; } -bool spu_interpreter::CEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQ(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi32(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter::MPYHHU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHHU(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto b = spu.gpr[op.rb].vi; @@ -1110,19 +1110,19 @@ bool spu_interpreter::MPYHHU(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::ADDX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ADDX(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::add32(v128::add32(spu.gpr[op.ra], spu.gpr[op.rb]), spu.gpr[op.rt] & v128::from32p(1)); return true; } -bool spu_interpreter::SFX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFX(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = v128::sub32(v128::sub32(spu.gpr[op.rb], spu.gpr[op.ra]), v128::andnot(spu.gpr[op.rt], v128::from32p(1))); return true; } -bool spu_interpreter::CGX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGX(spu_thread& spu, spu_opcode_t op) { for (s32 i = 0; i < 4; i++) { @@ -1132,7 +1132,7 @@ bool spu_interpreter::CGX(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::BGX(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BGX(spu_thread& spu, spu_opcode_t op) { for (s32 i = 0; i < 4; i++) { @@ -1142,13 +1142,13 @@ bool spu_interpreter::BGX(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::MPYHHA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHHA(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi32(spu.gpr[op.rt].vi, _mm_madd_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), _mm_srli_epi32(spu.gpr[op.rb].vi, 16))); return true; } -bool spu_interpreter::MPYHHAU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHHAU(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto b = spu.gpr[op.rb].vi; @@ -1156,94 +1156,94 @@ bool spu_interpreter::MPYHHAU(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::FSCRRD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FSCRRD(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].clear(); return true; } -bool spu_interpreter_fast::FESD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FESD(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vf; spu.gpr[op.rt].vd = _mm_cvtps_pd(_mm_shuffle_ps(a, a, 0x8d)); return true; } -bool spu_interpreter_fast::FRDS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FRDS(spu_thread& spu, spu_opcode_t op) { const auto t = _mm_cvtpd_ps(spu.gpr[op.ra].vd); spu.gpr[op.rt].vf = _mm_shuffle_ps(t, t, 0x72); return true; } -bool spu_interpreter_fast::FSCRWR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FSCRWR(spu_thread& spu, spu_opcode_t op) { return true; } -bool spu_interpreter::DFTSV(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFTSV(spu_thread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); return true; } -bool spu_interpreter_fast::FCEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCEQ(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vf = _mm_cmpeq_ps(spu.gpr[op.rb].vf, spu.gpr[op.ra].vf); return true; } -bool spu_interpreter::DFCEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCEQ(spu_thread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); return true; } -bool spu_interpreter::MPY(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPY(spu_thread& spu, spu_opcode_t op) { const auto mask = _mm_set1_epi32(0xffff); spu.gpr[op.rt].vi = _mm_madd_epi16(_mm_and_si128(spu.gpr[op.ra].vi, mask), _mm_and_si128(spu.gpr[op.rb].vi, mask)); return true; } -bool spu_interpreter::MPYH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), spu.gpr[op.rb].vi), 16); return true; } -bool spu_interpreter::MPYHH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYHH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(spu.gpr[op.ra].vi, 16), _mm_srli_epi32(spu.gpr[op.rb].vi, 16)); return true; } -bool spu_interpreter::MPYS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYS(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(_mm_mulhi_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi), 16), 16); return true; } -bool spu_interpreter::CEQH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi16(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter_fast::FCMEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FCMEQ(spu_thread& spu, spu_opcode_t op) { const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); spu.gpr[op.rt].vf = _mm_cmpeq_ps(_mm_and_ps(spu.gpr[op.rb].vf, mask), _mm_and_ps(spu.gpr[op.ra].vf, mask)); return true; } -bool spu_interpreter::DFCMEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::DFCMEQ(spu_thread& spu, spu_opcode_t op) { fmt::throw_exception("Unexpected instruction" HERE); return true; } -bool spu_interpreter::MPYU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYU(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto b = spu.gpr[op.rb].vi; @@ -1251,13 +1251,13 @@ bool spu_interpreter::MPYU(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CEQB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQB(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi8(spu.gpr[op.ra].vi, spu.gpr[op.rb].vi); return true; } -bool spu_interpreter_fast::FI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FI(spu_thread& spu, spu_opcode_t op) { // TODO const auto mask_se = _mm_castsi128_ps(_mm_set1_epi32(0xff800000)); // sign and exponent mask @@ -1271,7 +1271,7 @@ bool spu_interpreter_fast::FI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::HEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HEQ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] == spu.gpr[op.rb]._s32[3]) { @@ -1281,14 +1281,14 @@ bool spu_interpreter::HEQ(SPUThread& spu, spu_opcode_t op) } -bool spu_interpreter_fast::CFLTS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CFLTS(spu_thread& spu, spu_opcode_t op) { const auto scaled = _mm_mul_ps(spu.gpr[op.ra].vf, g_spu_imm.scale[173 - op.i8]); spu.gpr[op.rt].vi = _mm_xor_si128(_mm_cvttps_epi32(scaled), _mm_castps_si128(_mm_cmpge_ps(scaled, _mm_set1_ps(0x80000000)))); return true; } -bool spu_interpreter_fast::CFLTU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CFLTU(spu_thread& spu, spu_opcode_t op) { const auto scaled1 = _mm_max_ps(_mm_mul_ps(spu.gpr[op.ra].vf, g_spu_imm.scale[173 - op.i8]), _mm_set1_ps(0.0f)); const auto scaled2 = _mm_and_ps(_mm_sub_ps(scaled1, _mm_set1_ps(0x80000000)), _mm_cmpge_ps(scaled1, _mm_set1_ps(0x80000000))); @@ -1296,13 +1296,13 @@ bool spu_interpreter_fast::CFLTU(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::CSFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CSFLT(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vf = _mm_mul_ps(_mm_cvtepi32_ps(spu.gpr[op.ra].vi), g_spu_imm.scale[op.i8 - 155]); return true; } -bool spu_interpreter_fast::CUFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::CUFLT(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(a, 31)), _mm_set1_ps(0x80000000)); @@ -1311,7 +1311,7 @@ bool spu_interpreter_fast::CUFLT(SPUThread& spu, spu_opcode_t op) } -bool spu_interpreter::BRZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] == 0) { @@ -1321,13 +1321,13 @@ bool spu_interpreter::BRZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::STQA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQA(spu_thread& spu, spu_opcode_t op) { spu._ref(spu_ls_target(0, op.i16)) = spu.gpr[op.rt]; return true; } -bool spu_interpreter::BRNZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRNZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u32[3] != 0) { @@ -1337,7 +1337,7 @@ bool spu_interpreter::BRNZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::BRHZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRHZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] == 0) { @@ -1347,7 +1347,7 @@ bool spu_interpreter::BRHZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::BRHNZ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRHNZ(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.rt]._u16[6] != 0) { @@ -1357,25 +1357,25 @@ bool spu_interpreter::BRHNZ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::STQR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQR(spu_thread& spu, spu_opcode_t op) { spu._ref(spu_ls_target(spu.pc, op.i16)) = spu.gpr[op.rt]; return true; } -bool spu_interpreter::BRA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRA(spu_thread& spu, spu_opcode_t op) { spu.pc = spu_branch_target(0, op.i16); return false; } -bool spu_interpreter::LQA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQA(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref(spu_ls_target(0, op.i16)); return true; } -bool spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRASL(spu_thread& spu, spu_opcode_t op) { const u32 target = spu_branch_target(0, op.i16); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4)); @@ -1383,13 +1383,13 @@ bool spu_interpreter::BRASL(SPUThread& spu, spu_opcode_t op) return false; } -bool spu_interpreter::BR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BR(spu_thread& spu, spu_opcode_t op) { spu.pc = spu_branch_target(spu.pc, op.i16); return false; } -bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::FSMBI(spu_thread& spu, spu_opcode_t op) { const auto vsrc = _mm_set_epi32(0, 0, 0, op.i16); const auto bits = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_unpacklo_epi8(vsrc, vsrc), 0x50), 0x50); @@ -1398,7 +1398,7 @@ bool spu_interpreter::FSMBI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::BRSL(spu_thread& spu, spu_opcode_t op) { const u32 target = spu_branch_target(spu.pc, op.i16); spu.gpr[op.rt] = v128::from32r(spu_branch_target(spu.pc + 4)); @@ -1406,146 +1406,146 @@ bool spu_interpreter::BRSL(SPUThread& spu, spu_opcode_t op) return false; } -bool spu_interpreter::LQR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQR(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref(spu_ls_target(spu.pc, op.i16)); return true; } -bool spu_interpreter::IL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::IL(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi32(op.si16); return true; } -bool spu_interpreter::ILHU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ILHU(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi32(op.i16 << 16); return true; } -bool spu_interpreter::ILH(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ILH(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi16(op.i16); return true; } -bool spu_interpreter::IOHL(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::IOHL(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.rt].vi, _mm_set1_epi32(op.i16)); return true; } -bool spu_interpreter::ORI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); return true; } -bool spu_interpreter::ORHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); return true; } -bool spu_interpreter::ORBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ORBI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_or_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); return true; } -bool spu_interpreter::SFI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_sub_epi32(_mm_set1_epi32(op.si10), spu.gpr[op.ra].vi); return true; } -bool spu_interpreter::SFHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SFHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_sub_epi16(_mm_set1_epi16(op.si10), spu.gpr[op.ra].vi); return true; } -bool spu_interpreter::ANDI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); return true; } -bool spu_interpreter::ANDHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); return true; } -bool spu_interpreter::ANDBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ANDBI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_and_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); return true; } -bool spu_interpreter::AI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi32(_mm_set1_epi32(op.si10), spu.gpr[op.ra].vi); return true; } -bool spu_interpreter::AHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::AHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_add_epi16(_mm_set1_epi16(op.si10), spu.gpr[op.ra].vi); return true; } -bool spu_interpreter::STQD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::STQD(spu_thread& spu, spu_opcode_t op) { spu._ref((spu.gpr[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0) = spu.gpr[op.rt]; return true; } -bool spu_interpreter::LQD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::LQD(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt] = spu._ref((spu.gpr[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0); return true; } -bool spu_interpreter::XORI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XORI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); return true; } -bool spu_interpreter::XORHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XORHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); return true; } -bool spu_interpreter::XORBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::XORBI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); return true; } -bool spu_interpreter::CGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi32(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); return true; } -bool spu_interpreter::CGTHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi16(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); return true; } -bool spu_interpreter::CGTBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CGTBI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi8(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); return true; } -bool spu_interpreter::HGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HGTI(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] > op.si10) { @@ -1554,25 +1554,25 @@ bool spu_interpreter::HGTI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CLGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi32(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80000000)), _mm_set1_epi32(op.si10 ^ 0x80000000)); return true; } -bool spu_interpreter::CLGTHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi16(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80008000)), _mm_set1_epi16(op.si10 ^ 0x8000)); return true; } -bool spu_interpreter::CLGTBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CLGTBI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpgt_epi8(_mm_xor_si128(spu.gpr[op.ra].vi, _mm_set1_epi32(0x80808080)), _mm_set1_epi8(op.i8 ^ 0x80)); return true; } -bool spu_interpreter::HLGTI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HLGTI(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._u32[3] > static_cast(op.si10)) { @@ -1581,13 +1581,13 @@ bool spu_interpreter::HLGTI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::MPYI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_madd_epi16(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10 & 0xffff)); return true; } -bool spu_interpreter::MPYUI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYUI(spu_thread& spu, spu_opcode_t op) { const auto a = spu.gpr[op.ra].vi; const auto i = _mm_set1_epi32(op.si10 & 0xffff); @@ -1595,25 +1595,25 @@ bool spu_interpreter::MPYUI(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter::CEQI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi32(spu.gpr[op.ra].vi, _mm_set1_epi32(op.si10)); return true; } -bool spu_interpreter::CEQHI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQHI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi16(spu.gpr[op.ra].vi, _mm_set1_epi16(op.si10)); return true; } -bool spu_interpreter::CEQBI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::CEQBI(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_cmpeq_epi8(spu.gpr[op.ra].vi, _mm_set1_epi8(op.i8)); return true; } -bool spu_interpreter::HEQI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HEQI(spu_thread& spu, spu_opcode_t op) { if (spu.gpr[op.ra]._s32[3] == op.si10) { @@ -1623,30 +1623,30 @@ bool spu_interpreter::HEQI(SPUThread& spu, spu_opcode_t op) } -bool spu_interpreter::HBRA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HBRA(spu_thread& spu, spu_opcode_t op) { return true; } -bool spu_interpreter::HBRR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::HBRR(spu_thread& spu, spu_opcode_t op) { return true; } -bool spu_interpreter::ILA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::ILA(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt].vi = _mm_set1_epi32(op.i18); return true; } -bool spu_interpreter::SELB(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::SELB(spu_thread& spu, spu_opcode_t op) { spu.gpr[op.rt4] = (spu.gpr[op.rc] & spu.gpr[op.rb]) | v128::andnot(spu.gpr[op.rc], spu.gpr[op.ra]); return true; } -static bool SHUFB_(SPUThread& spu, spu_opcode_t op) +static bool SHUFB_(spu_thread& spu, spu_opcode_t op) { __m128i ab[2]{spu.gpr[op.rb].vi, spu.gpr[op.ra].vi}; v128 c = spu.gpr[op.rc]; @@ -1741,14 +1741,14 @@ const spu_inter_func_t spu_interpreter::SHUFB = !utils::has_ssse3() ? &SHUFB_ : c.dq(0x0f0f0f0f0f0f0f0f); }); -bool spu_interpreter::MPYA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter::MPYA(spu_thread& spu, spu_opcode_t op) { const auto mask = _mm_set1_epi32(0xffff); spu.gpr[op.rt4].vi = _mm_add_epi32(spu.gpr[op.rc].vi, _mm_madd_epi16(_mm_and_si128(spu.gpr[op.ra].vi, mask), _mm_and_si128(spu.gpr[op.rb].vi, mask))); return true; } -bool spu_interpreter_fast::FNMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FNMS(spu_thread& spu, spu_opcode_t op) { const u32 test_bits = 0x7f800000; auto mask = _mm_set1_ps((f32&)test_bits); @@ -1765,7 +1765,7 @@ bool spu_interpreter_fast::FNMS(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::FMA(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FMA(spu_thread& spu, spu_opcode_t op) { const u32 test_bits = 0x7f800000; auto mask = _mm_set1_ps((f32&)test_bits); @@ -1782,7 +1782,7 @@ bool spu_interpreter_fast::FMA(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_fast::FMS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_fast::FMS(spu_thread& spu, spu_opcode_t op) { const u32 test_bits = 0x7f800000; auto mask = _mm_set1_ps((f32&)test_bits); @@ -1864,7 +1864,7 @@ inline bool isdenormal(double x) return std::fpclassify(x) == FP_SUBNORMAL; } -bool spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FREST(spu_thread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); for (int i = 0; i < 4; i++) @@ -1885,7 +1885,7 @@ bool spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FRSQEST(spu_thread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); for (int i = 0; i < 4; i++) @@ -1906,7 +1906,7 @@ bool spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCGT(spu_thread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -1930,7 +1930,7 @@ bool spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op) return true; } -static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub) +static void FA_FS(spu_thread& spu, spu_opcode_t op, bool sub) { fesetround(FE_TOWARDZERO); for (int w = 0; w < 4; w++) @@ -2015,11 +2015,11 @@ static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub) } } -bool spu_interpreter_precise::FA(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, false); return true; } +bool spu_interpreter_precise::FA(spu_thread& spu, spu_opcode_t op) { FA_FS(spu, op, false); return true; } -bool spu_interpreter_precise::FS(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, op, true); return true; } +bool spu_interpreter_precise::FS(spu_thread& spu, spu_opcode_t op) { FA_FS(spu, op, true); return true; } -bool spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FM(spu_thread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); for (int w = 0; w < 4; w++) @@ -2093,7 +2093,7 @@ bool spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::FCMGT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCMGT(spu_thread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -2122,7 +2122,7 @@ enum DoubleOp DFASM_M, }; -static void DFASM(SPUThread& spu, spu_opcode_t op, DoubleOp operation) +static void DFASM(spu_thread& spu, spu_opcode_t op, DoubleOp operation) { for (int i = 0; i < 2; i++) { @@ -2176,13 +2176,13 @@ static void DFASM(SPUThread& spu, spu_opcode_t op, DoubleOp operation) } } -bool spu_interpreter_precise::DFA(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_A); return true; } +bool spu_interpreter_precise::DFA(spu_thread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_A); return true; } -bool spu_interpreter_precise::DFS(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_S); return true; } +bool spu_interpreter_precise::DFS(spu_thread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_S); return true; } -bool spu_interpreter_precise::DFM(SPUThread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_M); return true; } +bool spu_interpreter_precise::DFM(spu_thread& spu, spu_opcode_t op) { DFASM(spu, op, DFASM_M); return true; } -static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) +static void DFMA(spu_thread& spu, spu_opcode_t op, bool neg, bool sub) { for (int i = 0; i < 2; i++) { @@ -2238,21 +2238,21 @@ static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) } } -bool spu_interpreter_precise::DFMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, false); return true; } +bool spu_interpreter_precise::DFMA(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, false); return true; } -bool spu_interpreter_precise::DFMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, true); return true; } +bool spu_interpreter_precise::DFMS(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, false, true); return true; } -bool spu_interpreter_precise::DFNMS(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, true); return true; } +bool spu_interpreter_precise::DFNMS(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, true); return true; } -bool spu_interpreter_precise::DFNMA(SPUThread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, false); return true; } +bool spu_interpreter_precise::DFNMA(spu_thread& spu, spu_opcode_t op) { ::DFMA(spu, op, true, false); return true; } -bool spu_interpreter_precise::FSCRRD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FSCRRD(spu_thread& spu, spu_opcode_t op) { spu.fpscr.Read(spu.gpr[op.rt]); return true; } -bool spu_interpreter_precise::FESD(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FESD(spu_thread& spu, spu_opcode_t op) { for (int i = 0; i < 2; i++) { @@ -2277,7 +2277,7 @@ bool spu_interpreter_precise::FESD(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FRDS(spu_thread& spu, spu_opcode_t op) { for (int i = 0; i < 2; i++) { @@ -2307,13 +2307,13 @@ bool spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::FSCRWR(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FSCRWR(spu_thread& spu, spu_opcode_t op) { spu.fpscr.Write(spu.gpr[op.ra]); return true; } -bool spu_interpreter_precise::FCEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCEQ(spu_thread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -2329,7 +2329,7 @@ bool spu_interpreter_precise::FCEQ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::FCMEQ(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FCMEQ(spu_thread& spu, spu_opcode_t op) { for (int i = 0; i < 4; i++) { @@ -2345,14 +2345,14 @@ bool spu_interpreter_precise::FCMEQ(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::FI(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::FI(spu_thread& spu, spu_opcode_t op) { // TODO spu.gpr[op.rt] = spu.gpr[op.rb]; return true; } -bool spu_interpreter_precise::CFLTS(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CFLTS(spu_thread& spu, spu_opcode_t op) { const int scale = 173 - (op.i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) @@ -2375,7 +2375,7 @@ bool spu_interpreter_precise::CFLTS(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CFLTU(spu_thread& spu, spu_opcode_t op) { const int scale = 173 - (op.i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) @@ -2398,7 +2398,7 @@ bool spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CSFLT(spu_thread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); const int scale = 155 - (op.i8 & 0xff); //unsigned immediate @@ -2422,7 +2422,7 @@ bool spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op) return true; } -bool spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op) +bool spu_interpreter_precise::CUFLT(spu_thread& spu, spu_opcode_t op) { fesetround(FE_TOWARDZERO); const int scale = 155 - (op.i8 & 0xff); //unsigned immediate @@ -2446,7 +2446,7 @@ bool spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op) return true; } -static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) +static void FMA(spu_thread& spu, spu_opcode_t op, bool neg, bool sub) { fesetround(FE_TOWARDZERO); for (int w = 0; w < 4; w++) @@ -2591,11 +2591,11 @@ static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) } } -bool spu_interpreter_precise::FNMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, true, true); return true; } +bool spu_interpreter_precise::FNMS(spu_thread& spu, spu_opcode_t op) { ::FMA(spu, op, true, true); return true; } -bool spu_interpreter_precise::FMA(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); return true; } +bool spu_interpreter_precise::FMA(spu_thread& spu, spu_opcode_t op) { ::FMA(spu, op, false, false); return true; } -bool spu_interpreter_precise::FMS(SPUThread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); return true; } +bool spu_interpreter_precise::FMS(spu_thread& spu, spu_opcode_t op) { ::FMA(spu, op, false, true); return true; } extern const spu_decoder g_spu_interpreter_precise{}; diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 19cdb2cd50..86bfb57c92 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -2,248 +2,248 @@ #include "SPUOpcodes.h" -class SPUThread; +class spu_thread; -using spu_inter_func_t = bool(*)(SPUThread& spu, spu_opcode_t op); +using spu_inter_func_t = bool(*)(spu_thread& spu, spu_opcode_t op); struct spu_interpreter { - static bool UNK(SPUThread&, spu_opcode_t); - static void set_interrupt_status(SPUThread&, spu_opcode_t); + static bool UNK(spu_thread&, spu_opcode_t); + static void set_interrupt_status(spu_thread&, spu_opcode_t); - static bool STOP(SPUThread&, spu_opcode_t); - static bool LNOP(SPUThread&, spu_opcode_t); - static bool SYNC(SPUThread&, spu_opcode_t); - static bool DSYNC(SPUThread&, spu_opcode_t); - static bool MFSPR(SPUThread&, spu_opcode_t); - static bool RDCH(SPUThread&, spu_opcode_t); - static bool RCHCNT(SPUThread&, spu_opcode_t); - static bool SF(SPUThread&, spu_opcode_t); - static bool OR(SPUThread&, spu_opcode_t); - static bool BG(SPUThread&, spu_opcode_t); - static bool SFH(SPUThread&, spu_opcode_t); - static bool NOR(SPUThread&, spu_opcode_t); - static bool ABSDB(SPUThread&, spu_opcode_t); - static bool ROT(SPUThread&, spu_opcode_t); - static bool ROTM(SPUThread&, spu_opcode_t); - static bool ROTMA(SPUThread&, spu_opcode_t); - static bool SHL(SPUThread&, spu_opcode_t); - static bool ROTH(SPUThread&, spu_opcode_t); - static bool ROTHM(SPUThread&, spu_opcode_t); - static bool ROTMAH(SPUThread&, spu_opcode_t); - static bool SHLH(SPUThread&, spu_opcode_t); - static bool ROTI(SPUThread&, spu_opcode_t); - static bool ROTMI(SPUThread&, spu_opcode_t); - static bool ROTMAI(SPUThread&, spu_opcode_t); - static bool SHLI(SPUThread&, spu_opcode_t); - static bool ROTHI(SPUThread&, spu_opcode_t); - static bool ROTHMI(SPUThread&, spu_opcode_t); - static bool ROTMAHI(SPUThread&, spu_opcode_t); - static bool SHLHI(SPUThread&, spu_opcode_t); - static bool A(SPUThread&, spu_opcode_t); - static bool AND(SPUThread&, spu_opcode_t); - static bool CG(SPUThread&, spu_opcode_t); - static bool AH(SPUThread&, spu_opcode_t); - static bool NAND(SPUThread&, spu_opcode_t); - static bool AVGB(SPUThread&, spu_opcode_t); - static bool MTSPR(SPUThread&, spu_opcode_t); - static bool WRCH(SPUThread&, spu_opcode_t); - static bool BIZ(SPUThread&, spu_opcode_t); - static bool BINZ(SPUThread&, spu_opcode_t); - static bool BIHZ(SPUThread&, spu_opcode_t); - static bool BIHNZ(SPUThread&, spu_opcode_t); - static bool STOPD(SPUThread&, spu_opcode_t); - static bool STQX(SPUThread&, spu_opcode_t); - static bool BI(SPUThread&, spu_opcode_t); - static bool BISL(SPUThread&, spu_opcode_t); - static bool IRET(SPUThread&, spu_opcode_t); - static bool BISLED(SPUThread&, spu_opcode_t); - static bool HBR(SPUThread&, spu_opcode_t); - static bool GB(SPUThread&, spu_opcode_t); - static bool GBH(SPUThread&, spu_opcode_t); - static bool GBB(SPUThread&, spu_opcode_t); - static bool FSM(SPUThread&, spu_opcode_t); - static bool FSMH(SPUThread&, spu_opcode_t); - static bool FSMB(SPUThread&, spu_opcode_t); - static bool LQX(SPUThread&, spu_opcode_t); - static bool ROTQBYBI(SPUThread&, spu_opcode_t); - static bool ROTQMBYBI(SPUThread&, spu_opcode_t); - static bool SHLQBYBI(SPUThread&, spu_opcode_t); - static bool CBX(SPUThread&, spu_opcode_t); - static bool CHX(SPUThread&, spu_opcode_t); - static bool CWX(SPUThread&, spu_opcode_t); - static bool CDX(SPUThread&, spu_opcode_t); - static bool ROTQBI(SPUThread&, spu_opcode_t); - static bool ROTQMBI(SPUThread&, spu_opcode_t); - static bool SHLQBI(SPUThread&, spu_opcode_t); - static bool ROTQBY(SPUThread&, spu_opcode_t); - static bool ROTQMBY(SPUThread&, spu_opcode_t); - static bool SHLQBY(SPUThread&, spu_opcode_t); - static bool ORX(SPUThread&, spu_opcode_t); - static bool CBD(SPUThread&, spu_opcode_t); - static bool CHD(SPUThread&, spu_opcode_t); - static bool CWD(SPUThread&, spu_opcode_t); - static bool CDD(SPUThread&, spu_opcode_t); - static bool ROTQBII(SPUThread&, spu_opcode_t); - static bool ROTQMBII(SPUThread&, spu_opcode_t); - static bool SHLQBII(SPUThread&, spu_opcode_t); - static bool ROTQBYI(SPUThread&, spu_opcode_t); - static bool ROTQMBYI(SPUThread&, spu_opcode_t); - static bool SHLQBYI(SPUThread&, spu_opcode_t); - static bool NOP(SPUThread&, spu_opcode_t); - static bool CGT(SPUThread&, spu_opcode_t); - static bool XOR(SPUThread&, spu_opcode_t); - static bool CGTH(SPUThread&, spu_opcode_t); - static bool EQV(SPUThread&, spu_opcode_t); - static bool CGTB(SPUThread&, spu_opcode_t); - static bool SUMB(SPUThread&, spu_opcode_t); - static bool HGT(SPUThread&, spu_opcode_t); - static bool CLZ(SPUThread&, spu_opcode_t); - static bool XSWD(SPUThread&, spu_opcode_t); - static bool XSHW(SPUThread&, spu_opcode_t); - static bool CNTB(SPUThread&, spu_opcode_t); - static bool XSBH(SPUThread&, spu_opcode_t); - static bool CLGT(SPUThread&, spu_opcode_t); - static bool ANDC(SPUThread&, spu_opcode_t); - static bool CLGTH(SPUThread&, spu_opcode_t); - static bool ORC(SPUThread&, spu_opcode_t); - static bool CLGTB(SPUThread&, spu_opcode_t); - static bool HLGT(SPUThread&, spu_opcode_t); - static bool CEQ(SPUThread&, spu_opcode_t); - static bool MPYHHU(SPUThread&, spu_opcode_t); - static bool ADDX(SPUThread&, spu_opcode_t); - static bool SFX(SPUThread&, spu_opcode_t); - static bool CGX(SPUThread&, spu_opcode_t); - static bool BGX(SPUThread&, spu_opcode_t); - static bool MPYHHA(SPUThread&, spu_opcode_t); - static bool MPYHHAU(SPUThread&, spu_opcode_t); - static bool MPY(SPUThread&, spu_opcode_t); - static bool MPYH(SPUThread&, spu_opcode_t); - static bool MPYHH(SPUThread&, spu_opcode_t); - static bool MPYS(SPUThread&, spu_opcode_t); - static bool CEQH(SPUThread&, spu_opcode_t); - static bool MPYU(SPUThread&, spu_opcode_t); - static bool CEQB(SPUThread&, spu_opcode_t); - static bool HEQ(SPUThread&, spu_opcode_t); - static bool BRZ(SPUThread&, spu_opcode_t); - static bool STQA(SPUThread&, spu_opcode_t); - static bool BRNZ(SPUThread&, spu_opcode_t); - static bool BRHZ(SPUThread&, spu_opcode_t); - static bool BRHNZ(SPUThread&, spu_opcode_t); - static bool STQR(SPUThread&, spu_opcode_t); - static bool BRA(SPUThread&, spu_opcode_t); - static bool LQA(SPUThread&, spu_opcode_t); - static bool BRASL(SPUThread&, spu_opcode_t); - static bool BR(SPUThread&, spu_opcode_t); - static bool FSMBI(SPUThread&, spu_opcode_t); - static bool BRSL(SPUThread&, spu_opcode_t); - static bool LQR(SPUThread&, spu_opcode_t); - static bool IL(SPUThread&, spu_opcode_t); - static bool ILHU(SPUThread&, spu_opcode_t); - static bool ILH(SPUThread&, spu_opcode_t); - static bool IOHL(SPUThread&, spu_opcode_t); - static bool ORI(SPUThread&, spu_opcode_t); - static bool ORHI(SPUThread&, spu_opcode_t); - static bool ORBI(SPUThread&, spu_opcode_t); - static bool SFI(SPUThread&, spu_opcode_t); - static bool SFHI(SPUThread&, spu_opcode_t); - static bool ANDI(SPUThread&, spu_opcode_t); - static bool ANDHI(SPUThread&, spu_opcode_t); - static bool ANDBI(SPUThread&, spu_opcode_t); - static bool AI(SPUThread&, spu_opcode_t); - static bool AHI(SPUThread&, spu_opcode_t); - static bool STQD(SPUThread&, spu_opcode_t); - static bool LQD(SPUThread&, spu_opcode_t); - static bool XORI(SPUThread&, spu_opcode_t); - static bool XORHI(SPUThread&, spu_opcode_t); - static bool XORBI(SPUThread&, spu_opcode_t); - static bool CGTI(SPUThread&, spu_opcode_t); - static bool CGTHI(SPUThread&, spu_opcode_t); - static bool CGTBI(SPUThread&, spu_opcode_t); - static bool HGTI(SPUThread&, spu_opcode_t); - static bool CLGTI(SPUThread&, spu_opcode_t); - static bool CLGTHI(SPUThread&, spu_opcode_t); - static bool CLGTBI(SPUThread&, spu_opcode_t); - static bool HLGTI(SPUThread&, spu_opcode_t); - static bool MPYI(SPUThread&, spu_opcode_t); - static bool MPYUI(SPUThread&, spu_opcode_t); - static bool CEQI(SPUThread&, spu_opcode_t); - static bool CEQHI(SPUThread&, spu_opcode_t); - static bool CEQBI(SPUThread&, spu_opcode_t); - static bool HEQI(SPUThread&, spu_opcode_t); - static bool HBRA(SPUThread&, spu_opcode_t); - static bool HBRR(SPUThread&, spu_opcode_t); - static bool ILA(SPUThread&, spu_opcode_t); - static bool SELB(SPUThread&, spu_opcode_t); + static bool STOP(spu_thread&, spu_opcode_t); + static bool LNOP(spu_thread&, spu_opcode_t); + static bool SYNC(spu_thread&, spu_opcode_t); + static bool DSYNC(spu_thread&, spu_opcode_t); + static bool MFSPR(spu_thread&, spu_opcode_t); + static bool RDCH(spu_thread&, spu_opcode_t); + static bool RCHCNT(spu_thread&, spu_opcode_t); + static bool SF(spu_thread&, spu_opcode_t); + static bool OR(spu_thread&, spu_opcode_t); + static bool BG(spu_thread&, spu_opcode_t); + static bool SFH(spu_thread&, spu_opcode_t); + static bool NOR(spu_thread&, spu_opcode_t); + static bool ABSDB(spu_thread&, spu_opcode_t); + static bool ROT(spu_thread&, spu_opcode_t); + static bool ROTM(spu_thread&, spu_opcode_t); + static bool ROTMA(spu_thread&, spu_opcode_t); + static bool SHL(spu_thread&, spu_opcode_t); + static bool ROTH(spu_thread&, spu_opcode_t); + static bool ROTHM(spu_thread&, spu_opcode_t); + static bool ROTMAH(spu_thread&, spu_opcode_t); + static bool SHLH(spu_thread&, spu_opcode_t); + static bool ROTI(spu_thread&, spu_opcode_t); + static bool ROTMI(spu_thread&, spu_opcode_t); + static bool ROTMAI(spu_thread&, spu_opcode_t); + static bool SHLI(spu_thread&, spu_opcode_t); + static bool ROTHI(spu_thread&, spu_opcode_t); + static bool ROTHMI(spu_thread&, spu_opcode_t); + static bool ROTMAHI(spu_thread&, spu_opcode_t); + static bool SHLHI(spu_thread&, spu_opcode_t); + static bool A(spu_thread&, spu_opcode_t); + static bool AND(spu_thread&, spu_opcode_t); + static bool CG(spu_thread&, spu_opcode_t); + static bool AH(spu_thread&, spu_opcode_t); + static bool NAND(spu_thread&, spu_opcode_t); + static bool AVGB(spu_thread&, spu_opcode_t); + static bool MTSPR(spu_thread&, spu_opcode_t); + static bool WRCH(spu_thread&, spu_opcode_t); + static bool BIZ(spu_thread&, spu_opcode_t); + static bool BINZ(spu_thread&, spu_opcode_t); + static bool BIHZ(spu_thread&, spu_opcode_t); + static bool BIHNZ(spu_thread&, spu_opcode_t); + static bool STOPD(spu_thread&, spu_opcode_t); + static bool STQX(spu_thread&, spu_opcode_t); + static bool BI(spu_thread&, spu_opcode_t); + static bool BISL(spu_thread&, spu_opcode_t); + static bool IRET(spu_thread&, spu_opcode_t); + static bool BISLED(spu_thread&, spu_opcode_t); + static bool HBR(spu_thread&, spu_opcode_t); + static bool GB(spu_thread&, spu_opcode_t); + static bool GBH(spu_thread&, spu_opcode_t); + static bool GBB(spu_thread&, spu_opcode_t); + static bool FSM(spu_thread&, spu_opcode_t); + static bool FSMH(spu_thread&, spu_opcode_t); + static bool FSMB(spu_thread&, spu_opcode_t); + static bool LQX(spu_thread&, spu_opcode_t); + static bool ROTQBYBI(spu_thread&, spu_opcode_t); + static bool ROTQMBYBI(spu_thread&, spu_opcode_t); + static bool SHLQBYBI(spu_thread&, spu_opcode_t); + static bool CBX(spu_thread&, spu_opcode_t); + static bool CHX(spu_thread&, spu_opcode_t); + static bool CWX(spu_thread&, spu_opcode_t); + static bool CDX(spu_thread&, spu_opcode_t); + static bool ROTQBI(spu_thread&, spu_opcode_t); + static bool ROTQMBI(spu_thread&, spu_opcode_t); + static bool SHLQBI(spu_thread&, spu_opcode_t); + static bool ROTQBY(spu_thread&, spu_opcode_t); + static bool ROTQMBY(spu_thread&, spu_opcode_t); + static bool SHLQBY(spu_thread&, spu_opcode_t); + static bool ORX(spu_thread&, spu_opcode_t); + static bool CBD(spu_thread&, spu_opcode_t); + static bool CHD(spu_thread&, spu_opcode_t); + static bool CWD(spu_thread&, spu_opcode_t); + static bool CDD(spu_thread&, spu_opcode_t); + static bool ROTQBII(spu_thread&, spu_opcode_t); + static bool ROTQMBII(spu_thread&, spu_opcode_t); + static bool SHLQBII(spu_thread&, spu_opcode_t); + static bool ROTQBYI(spu_thread&, spu_opcode_t); + static bool ROTQMBYI(spu_thread&, spu_opcode_t); + static bool SHLQBYI(spu_thread&, spu_opcode_t); + static bool NOP(spu_thread&, spu_opcode_t); + static bool CGT(spu_thread&, spu_opcode_t); + static bool XOR(spu_thread&, spu_opcode_t); + static bool CGTH(spu_thread&, spu_opcode_t); + static bool EQV(spu_thread&, spu_opcode_t); + static bool CGTB(spu_thread&, spu_opcode_t); + static bool SUMB(spu_thread&, spu_opcode_t); + static bool HGT(spu_thread&, spu_opcode_t); + static bool CLZ(spu_thread&, spu_opcode_t); + static bool XSWD(spu_thread&, spu_opcode_t); + static bool XSHW(spu_thread&, spu_opcode_t); + static bool CNTB(spu_thread&, spu_opcode_t); + static bool XSBH(spu_thread&, spu_opcode_t); + static bool CLGT(spu_thread&, spu_opcode_t); + static bool ANDC(spu_thread&, spu_opcode_t); + static bool CLGTH(spu_thread&, spu_opcode_t); + static bool ORC(spu_thread&, spu_opcode_t); + static bool CLGTB(spu_thread&, spu_opcode_t); + static bool HLGT(spu_thread&, spu_opcode_t); + static bool CEQ(spu_thread&, spu_opcode_t); + static bool MPYHHU(spu_thread&, spu_opcode_t); + static bool ADDX(spu_thread&, spu_opcode_t); + static bool SFX(spu_thread&, spu_opcode_t); + static bool CGX(spu_thread&, spu_opcode_t); + static bool BGX(spu_thread&, spu_opcode_t); + static bool MPYHHA(spu_thread&, spu_opcode_t); + static bool MPYHHAU(spu_thread&, spu_opcode_t); + static bool MPY(spu_thread&, spu_opcode_t); + static bool MPYH(spu_thread&, spu_opcode_t); + static bool MPYHH(spu_thread&, spu_opcode_t); + static bool MPYS(spu_thread&, spu_opcode_t); + static bool CEQH(spu_thread&, spu_opcode_t); + static bool MPYU(spu_thread&, spu_opcode_t); + static bool CEQB(spu_thread&, spu_opcode_t); + static bool HEQ(spu_thread&, spu_opcode_t); + static bool BRZ(spu_thread&, spu_opcode_t); + static bool STQA(spu_thread&, spu_opcode_t); + static bool BRNZ(spu_thread&, spu_opcode_t); + static bool BRHZ(spu_thread&, spu_opcode_t); + static bool BRHNZ(spu_thread&, spu_opcode_t); + static bool STQR(spu_thread&, spu_opcode_t); + static bool BRA(spu_thread&, spu_opcode_t); + static bool LQA(spu_thread&, spu_opcode_t); + static bool BRASL(spu_thread&, spu_opcode_t); + static bool BR(spu_thread&, spu_opcode_t); + static bool FSMBI(spu_thread&, spu_opcode_t); + static bool BRSL(spu_thread&, spu_opcode_t); + static bool LQR(spu_thread&, spu_opcode_t); + static bool IL(spu_thread&, spu_opcode_t); + static bool ILHU(spu_thread&, spu_opcode_t); + static bool ILH(spu_thread&, spu_opcode_t); + static bool IOHL(spu_thread&, spu_opcode_t); + static bool ORI(spu_thread&, spu_opcode_t); + static bool ORHI(spu_thread&, spu_opcode_t); + static bool ORBI(spu_thread&, spu_opcode_t); + static bool SFI(spu_thread&, spu_opcode_t); + static bool SFHI(spu_thread&, spu_opcode_t); + static bool ANDI(spu_thread&, spu_opcode_t); + static bool ANDHI(spu_thread&, spu_opcode_t); + static bool ANDBI(spu_thread&, spu_opcode_t); + static bool AI(spu_thread&, spu_opcode_t); + static bool AHI(spu_thread&, spu_opcode_t); + static bool STQD(spu_thread&, spu_opcode_t); + static bool LQD(spu_thread&, spu_opcode_t); + static bool XORI(spu_thread&, spu_opcode_t); + static bool XORHI(spu_thread&, spu_opcode_t); + static bool XORBI(spu_thread&, spu_opcode_t); + static bool CGTI(spu_thread&, spu_opcode_t); + static bool CGTHI(spu_thread&, spu_opcode_t); + static bool CGTBI(spu_thread&, spu_opcode_t); + static bool HGTI(spu_thread&, spu_opcode_t); + static bool CLGTI(spu_thread&, spu_opcode_t); + static bool CLGTHI(spu_thread&, spu_opcode_t); + static bool CLGTBI(spu_thread&, spu_opcode_t); + static bool HLGTI(spu_thread&, spu_opcode_t); + static bool MPYI(spu_thread&, spu_opcode_t); + static bool MPYUI(spu_thread&, spu_opcode_t); + static bool CEQI(spu_thread&, spu_opcode_t); + static bool CEQHI(spu_thread&, spu_opcode_t); + static bool CEQBI(spu_thread&, spu_opcode_t); + static bool HEQI(spu_thread&, spu_opcode_t); + static bool HBRA(spu_thread&, spu_opcode_t); + static bool HBRR(spu_thread&, spu_opcode_t); + static bool ILA(spu_thread&, spu_opcode_t); + static bool SELB(spu_thread&, spu_opcode_t); static const spu_inter_func_t SHUFB; - static bool MPYA(SPUThread&, spu_opcode_t); - static bool DFCGT(SPUThread&, spu_opcode_t); - static bool DFCMGT(SPUThread&, spu_opcode_t); - static bool DFTSV(SPUThread&, spu_opcode_t); - static bool DFCEQ(SPUThread&, spu_opcode_t); - static bool DFCMEQ(SPUThread&, spu_opcode_t); + static bool MPYA(spu_thread&, spu_opcode_t); + static bool DFCGT(spu_thread&, spu_opcode_t); + static bool DFCMGT(spu_thread&, spu_opcode_t); + static bool DFTSV(spu_thread&, spu_opcode_t); + static bool DFCEQ(spu_thread&, spu_opcode_t); + static bool DFCMEQ(spu_thread&, spu_opcode_t); }; struct spu_interpreter_fast final : spu_interpreter { - static bool FREST(SPUThread&, spu_opcode_t); - static bool FRSQEST(SPUThread&, spu_opcode_t); - static bool FCGT(SPUThread&, spu_opcode_t); - static bool FA(SPUThread&, spu_opcode_t); - static bool FS(SPUThread&, spu_opcode_t); - static bool FM(SPUThread&, spu_opcode_t); - static bool FCMGT(SPUThread&, spu_opcode_t); - static bool DFA(SPUThread&, spu_opcode_t); - static bool DFS(SPUThread&, spu_opcode_t); - static bool DFM(SPUThread&, spu_opcode_t); - static bool DFMA(SPUThread&, spu_opcode_t); - static bool DFMS(SPUThread&, spu_opcode_t); - static bool DFNMS(SPUThread&, spu_opcode_t); - static bool DFNMA(SPUThread&, spu_opcode_t); - static bool FSCRRD(SPUThread&, spu_opcode_t); - static bool FESD(SPUThread&, spu_opcode_t); - static bool FRDS(SPUThread&, spu_opcode_t); - static bool FSCRWR(SPUThread&, spu_opcode_t); - static bool FCEQ(SPUThread&, spu_opcode_t); - static bool FCMEQ(SPUThread&, spu_opcode_t); - static bool FI(SPUThread&, spu_opcode_t); - static bool CFLTS(SPUThread&, spu_opcode_t); - static bool CFLTU(SPUThread&, spu_opcode_t); - static bool CSFLT(SPUThread&, spu_opcode_t); - static bool CUFLT(SPUThread&, spu_opcode_t); - static bool FNMS(SPUThread&, spu_opcode_t); - static bool FMA(SPUThread&, spu_opcode_t); - static bool FMS(SPUThread&, spu_opcode_t); + static bool FREST(spu_thread&, spu_opcode_t); + static bool FRSQEST(spu_thread&, spu_opcode_t); + static bool FCGT(spu_thread&, spu_opcode_t); + static bool FA(spu_thread&, spu_opcode_t); + static bool FS(spu_thread&, spu_opcode_t); + static bool FM(spu_thread&, spu_opcode_t); + static bool FCMGT(spu_thread&, spu_opcode_t); + static bool DFA(spu_thread&, spu_opcode_t); + static bool DFS(spu_thread&, spu_opcode_t); + static bool DFM(spu_thread&, spu_opcode_t); + static bool DFMA(spu_thread&, spu_opcode_t); + static bool DFMS(spu_thread&, spu_opcode_t); + static bool DFNMS(spu_thread&, spu_opcode_t); + static bool DFNMA(spu_thread&, spu_opcode_t); + static bool FSCRRD(spu_thread&, spu_opcode_t); + static bool FESD(spu_thread&, spu_opcode_t); + static bool FRDS(spu_thread&, spu_opcode_t); + static bool FSCRWR(spu_thread&, spu_opcode_t); + static bool FCEQ(spu_thread&, spu_opcode_t); + static bool FCMEQ(spu_thread&, spu_opcode_t); + static bool FI(spu_thread&, spu_opcode_t); + static bool CFLTS(spu_thread&, spu_opcode_t); + static bool CFLTU(spu_thread&, spu_opcode_t); + static bool CSFLT(spu_thread&, spu_opcode_t); + static bool CUFLT(spu_thread&, spu_opcode_t); + static bool FNMS(spu_thread&, spu_opcode_t); + static bool FMA(spu_thread&, spu_opcode_t); + static bool FMS(spu_thread&, spu_opcode_t); }; struct spu_interpreter_precise final : spu_interpreter { - static bool FREST(SPUThread&, spu_opcode_t); - static bool FRSQEST(SPUThread&, spu_opcode_t); - static bool FCGT(SPUThread&, spu_opcode_t); - static bool FA(SPUThread&, spu_opcode_t); - static bool FS(SPUThread&, spu_opcode_t); - static bool FM(SPUThread&, spu_opcode_t); - static bool FCMGT(SPUThread&, spu_opcode_t); - static bool DFA(SPUThread&, spu_opcode_t); - static bool DFS(SPUThread&, spu_opcode_t); - static bool DFM(SPUThread&, spu_opcode_t); - static bool DFMA(SPUThread&, spu_opcode_t); - static bool DFMS(SPUThread&, spu_opcode_t); - static bool DFNMS(SPUThread&, spu_opcode_t); - static bool DFNMA(SPUThread&, spu_opcode_t); - static bool FSCRRD(SPUThread&, spu_opcode_t); - static bool FESD(SPUThread&, spu_opcode_t); - static bool FRDS(SPUThread&, spu_opcode_t); - static bool FSCRWR(SPUThread&, spu_opcode_t); - static bool FCEQ(SPUThread&, spu_opcode_t); - static bool FCMEQ(SPUThread&, spu_opcode_t); - static bool FI(SPUThread&, spu_opcode_t); - static bool CFLTS(SPUThread&, spu_opcode_t); - static bool CFLTU(SPUThread&, spu_opcode_t); - static bool CSFLT(SPUThread&, spu_opcode_t); - static bool CUFLT(SPUThread&, spu_opcode_t); - static bool FNMS(SPUThread&, spu_opcode_t); - static bool FMA(SPUThread&, spu_opcode_t); - static bool FMS(SPUThread&, spu_opcode_t); + static bool FREST(spu_thread&, spu_opcode_t); + static bool FRSQEST(spu_thread&, spu_opcode_t); + static bool FCGT(spu_thread&, spu_opcode_t); + static bool FA(spu_thread&, spu_opcode_t); + static bool FS(spu_thread&, spu_opcode_t); + static bool FM(spu_thread&, spu_opcode_t); + static bool FCMGT(spu_thread&, spu_opcode_t); + static bool DFA(spu_thread&, spu_opcode_t); + static bool DFS(spu_thread&, spu_opcode_t); + static bool DFM(spu_thread&, spu_opcode_t); + static bool DFMA(spu_thread&, spu_opcode_t); + static bool DFMS(spu_thread&, spu_opcode_t); + static bool DFNMS(spu_thread&, spu_opcode_t); + static bool DFNMA(spu_thread&, spu_opcode_t); + static bool FSCRRD(spu_thread&, spu_opcode_t); + static bool FESD(spu_thread&, spu_opcode_t); + static bool FRDS(spu_thread&, spu_opcode_t); + static bool FSCRWR(spu_thread&, spu_opcode_t); + static bool FCEQ(spu_thread&, spu_opcode_t); + static bool FCMEQ(spu_thread&, spu_opcode_t); + static bool FI(spu_thread&, spu_opcode_t); + static bool CFLTS(spu_thread&, spu_opcode_t); + static bool CFLTU(spu_thread&, spu_opcode_t); + static bool CSFLT(spu_thread&, spu_opcode_t); + static bool CUFLT(spu_thread&, spu_opcode_t); + static bool FNMS(spu_thread&, spu_opcode_t); + static bool FMA(spu_thread&, spu_opcode_t); + static bool FMS(spu_thread&, spu_opcode_t); }; diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 9bd8b5abe4..0fa85f78c8 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -211,7 +211,7 @@ spu_recompiler_base::~spu_recompiler_base() { } -void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip) +void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) { // If code verification failed from a patched patchpoint, clear it with a single NOP if (rip) @@ -255,7 +255,7 @@ void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip) } } -void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip) +void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip) { // Compile (TODO: optimize search of the existing functions) const auto func = verify(HERE, spu.jit->compile(spu.jit->block(spu._ptr(0), spu.pc))); @@ -1692,7 +1692,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto cblock = m_ir->GetInsertBlock(); const auto result = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->SetInsertPoint(result); - m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&SPUThread::pc)); + m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc)); tail(add_function(target)); m_ir->SetInsertPoint(cblock); return result; @@ -1708,8 +1708,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto cblock = m_ir->GetInsertBlock(); const auto result = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->SetInsertPoint(result); - m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&SPUThread::pc)); - const auto addr = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&SPUThread::jit_dispatcher) + target * 2)); + m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc)); + const auto addr = m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher) + target * 2)); const auto type = llvm::FunctionType::get(get_type(), {get_type(), get_type(), get_type()}, false)->getPointerTo()->getPointerTo(); tail(m_ir->CreateLoad(m_ir->CreateBitCast(addr, type))); m_ir->SetInsertPoint(cblock); @@ -1789,15 +1789,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { if (index < 128) { - return ::offset32(&SPUThread::gpr, index); + return ::offset32(&spu_thread::gpr, index); } switch (index) { - case s_reg_mfc_eal: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::eal); - case s_reg_mfc_lsa: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::lsa); - case s_reg_mfc_tag: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::tag); - case s_reg_mfc_size: return ::offset32(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::size); + case s_reg_mfc_eal: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::eal); + case s_reg_mfc_lsa: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::lsa); + case s_reg_mfc_tag: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::tag); + case s_reg_mfc_size: return ::offset32(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::size); default: fmt::throw_exception("get_reg_offset(%u): invalid register index" HERE, index); } @@ -2183,19 +2183,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator void update_pc() { - m_ir->CreateStore(m_ir->getInt32(m_pos), spu_ptr(&SPUThread::pc))->setVolatile(true); + m_ir->CreateStore(m_ir->getInt32(m_pos), spu_ptr(&spu_thread::pc))->setVolatile(true); } // Call cpu_thread::check_state if necessary and return or continue (full check) void check_state(u32 addr) { - const auto pstate = spu_ptr(&SPUThread::state); + const auto pstate = spu_ptr(&spu_thread::state); const auto _body = llvm::BasicBlock::Create(m_context, "", m_function); const auto check = llvm::BasicBlock::Create(m_context, "", m_function); const auto stop = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(pstate), m_ir->getInt32(0)), _body, check); m_ir->SetInsertPoint(check); - m_ir->CreateStore(m_ir->getInt32(addr), spu_ptr(&SPUThread::pc)); + m_ir->CreateStore(m_ir->getInt32(addr), spu_ptr(&spu_thread::pc)); m_ir->CreateCondBr(call(&exec_check_state, m_thread), stop, _body); m_ir->SetInsertPoint(stop); m_ir->CreateRetVoid(); @@ -2386,7 +2386,7 @@ public: const auto label_stop = BasicBlock::Create(m_context, "", m_function); // Emit state check - const auto pstate = spu_ptr(&SPUThread::state); + const auto pstate = spu_ptr(&spu_thread::state); m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(pstate, true), m_ir->getInt32(0)), label_stop, label_test); // Emit code check @@ -2482,7 +2482,7 @@ public: // Increase block counter with statistics m_ir->SetInsertPoint(label_body); - const auto pbcount = spu_ptr(&SPUThread::block_counter); + const auto pbcount = spu_ptr(&spu_thread::block_counter); m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbcount), m_ir->getInt64(check_iterations)), pbcount); // Call the entry function chunk @@ -2497,7 +2497,7 @@ public: if (g_cfg.core.spu_verification) { - const auto pbfail = spu_ptr(&SPUThread::block_failure); + const auto pbfail = spu_ptr(&spu_thread::block_failure); m_ir->CreateStore(m_ir->CreateAdd(m_ir->CreateLoad(pbfail), m_ir->getInt64(1)), pbfail); tail(&spu_recompiler_base::dispatch, m_thread, m_ir->getInt32(0), m_ir->getInt32(0)); } @@ -3001,13 +3001,13 @@ public: return fn; } - static bool exec_check_state(SPUThread* _spu) + static bool exec_check_state(spu_thread* _spu) { return _spu->check_state(); } template - static void exec_fall(SPUThread* _spu, spu_opcode_t op) + static void exec_fall(spu_thread* _spu, spu_opcode_t op) { if (F(*_spu, op)) { @@ -3022,7 +3022,7 @@ public: call(&exec_fall, m_thread, m_ir->getInt32(op.opcode)); } - static void exec_unk(SPUThread* _spu, u32 op) + static void exec_unk(spu_thread* _spu, u32 op) { fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op); } @@ -3034,7 +3034,7 @@ public: tail(&exec_unk, m_thread, m_ir->getInt32(op_unk.opcode)); } - static bool exec_stop(SPUThread* _spu, u32 code) + static bool exec_stop(spu_thread* _spu, u32 code) { return _spu->stop_and_signal(code); } @@ -3053,7 +3053,7 @@ public: if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { m_block->block_end = m_ir->GetInsertBlock(); - m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr(&SPUThread::pc)); + m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr(&spu_thread::pc)); m_ir->CreateRetVoid(); } } @@ -3063,18 +3063,18 @@ public: STOP(spu_opcode_t{0x3fff}); } - static s64 exec_rdch(SPUThread* _spu, u32 ch) + static s64 exec_rdch(spu_thread* _spu, u32 ch) { return _spu->get_ch_value(ch); } - static s64 exec_read_in_mbox(SPUThread* _spu) + static s64 exec_read_in_mbox(spu_thread* _spu) { // TODO return _spu->get_ch_value(SPU_RdInMbox); } - static u32 exec_read_dec(SPUThread* _spu) + static u32 exec_read_dec(spu_thread* _spu) { const u32 res = _spu->ch_dec_value - static_cast(get_timebased_time() - _spu->ch_dec_start_timestamp); @@ -3086,7 +3086,7 @@ public: return res; } - static s64 exec_read_events(SPUThread* _spu) + static s64 exec_read_events(spu_thread* _spu) { if (const u32 events = _spu->get_events()) { @@ -3139,7 +3139,7 @@ public: { case SPU_RdSRR0: { - res.value = m_ir->CreateLoad(spu_ptr(&SPUThread::srr0)); + res.value = m_ir->CreateLoad(spu_ptr(&spu_thread::srr0)); break; } case SPU_RdInMbox: @@ -3157,32 +3157,32 @@ public: } case MFC_RdTagStat: { - res.value = get_rdch(op, ::offset32(&SPUThread::ch_tag_stat), false); + res.value = get_rdch(op, ::offset32(&spu_thread::ch_tag_stat), false); break; } case MFC_RdTagMask: { - res.value = m_ir->CreateLoad(spu_ptr(&SPUThread::ch_tag_mask)); + res.value = m_ir->CreateLoad(spu_ptr(&spu_thread::ch_tag_mask)); break; } case SPU_RdSigNotify1: { - res.value = get_rdch(op, ::offset32(&SPUThread::ch_snr1), true); + res.value = get_rdch(op, ::offset32(&spu_thread::ch_snr1), true); break; } case SPU_RdSigNotify2: { - res.value = get_rdch(op, ::offset32(&SPUThread::ch_snr2), true); + res.value = get_rdch(op, ::offset32(&spu_thread::ch_snr2), true); break; } case MFC_RdAtomicStat: { - res.value = get_rdch(op, ::offset32(&SPUThread::ch_atomic_stat), false); + res.value = get_rdch(op, ::offset32(&spu_thread::ch_atomic_stat), false); break; } case MFC_RdListStallStat: { - res.value = get_rdch(op, ::offset32(&SPUThread::ch_stall_stat), false); + res.value = get_rdch(op, ::offset32(&spu_thread::ch_stall_stat), false); break; } case SPU_RdDec: @@ -3192,7 +3192,7 @@ public: } case SPU_RdEventMask: { - res.value = m_ir->CreateLoad(spu_ptr(&SPUThread::ch_event_mask)); + res.value = m_ir->CreateLoad(spu_ptr(&spu_thread::ch_event_mask)); break; } case SPU_RdEventStat: @@ -3210,7 +3210,7 @@ public: } case SPU_RdMachStat: { - res.value = m_ir->CreateZExt(m_ir->CreateLoad(spu_ptr(&SPUThread::interrupts_enabled)), get_type()); + res.value = m_ir->CreateZExt(m_ir->CreateLoad(spu_ptr(&spu_thread::interrupts_enabled)), get_type()); break; } @@ -3232,12 +3232,12 @@ public: set_vr(op.rt, insert(splat(0), 3, res)); } - static u32 exec_rchcnt(SPUThread* _spu, u32 ch) + static u32 exec_rchcnt(spu_thread* _spu, u32 ch) { return _spu->get_ch_count(ch); } - static u32 exec_get_events(SPUThread* _spu) + static u32 exec_get_events(spu_thread* _spu) { return _spu->get_events(); } @@ -3257,55 +3257,55 @@ public: { case SPU_WrOutMbox: { - res.value = get_rchcnt(::offset32(&SPUThread::ch_out_mbox), true); + res.value = get_rchcnt(::offset32(&spu_thread::ch_out_mbox), true); break; } case SPU_WrOutIntrMbox: { - res.value = get_rchcnt(::offset32(&SPUThread::ch_out_intr_mbox), true); + res.value = get_rchcnt(::offset32(&spu_thread::ch_out_intr_mbox), true); break; } case MFC_RdTagStat: { - res.value = get_rchcnt(::offset32(&SPUThread::ch_tag_stat)); + res.value = get_rchcnt(::offset32(&spu_thread::ch_tag_stat)); break; } case MFC_RdListStallStat: { - res.value = get_rchcnt(::offset32(&SPUThread::ch_stall_stat)); + res.value = get_rchcnt(::offset32(&spu_thread::ch_stall_stat)); break; } case SPU_RdSigNotify1: { - res.value = get_rchcnt(::offset32(&SPUThread::ch_snr1)); + res.value = get_rchcnt(::offset32(&spu_thread::ch_snr1)); break; } case SPU_RdSigNotify2: { - res.value = get_rchcnt(::offset32(&SPUThread::ch_snr2)); + res.value = get_rchcnt(::offset32(&spu_thread::ch_snr2)); break; } case MFC_RdAtomicStat: { - res.value = get_rchcnt(::offset32(&SPUThread::ch_atomic_stat)); + res.value = get_rchcnt(::offset32(&spu_thread::ch_atomic_stat)); break; } case MFC_WrTagUpdate: { - res.value = m_ir->CreateLoad(spu_ptr(&SPUThread::ch_tag_upd), true); + res.value = m_ir->CreateLoad(spu_ptr(&spu_thread::ch_tag_upd), true); res.value = m_ir->CreateICmpEQ(res.value, m_ir->getInt32(0)); res.value = m_ir->CreateZExt(res.value, get_type()); break; } case MFC_Cmd: { - res.value = m_ir->CreateLoad(spu_ptr(&SPUThread::mfc_size), true); + res.value = m_ir->CreateLoad(spu_ptr(&spu_thread::mfc_size), true); res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value); break; } case SPU_RdInMbox: { - res.value = m_ir->CreateLoad(spu_ptr(&SPUThread::ch_in_mbox), true); + res.value = m_ir->CreateLoad(spu_ptr(&spu_thread::ch_in_mbox), true); res.value = m_ir->CreateLShr(res.value, 8); res.value = m_ir->CreateAnd(res.value, 7); break; @@ -3328,17 +3328,17 @@ public: set_vr(op.rt, insert(splat(0), 3, res)); } - static bool exec_wrch(SPUThread* _spu, u32 ch, u32 value) + static bool exec_wrch(spu_thread* _spu, u32 ch, u32 value) { return _spu->set_ch_value(ch, value); } - static void exec_mfc(SPUThread* _spu) + static void exec_mfc(spu_thread* _spu) { return _spu->do_mfc(); } - static bool exec_mfc_cmd(SPUThread* _spu) + static bool exec_mfc_cmd(spu_thread* _spu) { return _spu->process_mfc_cmd(_spu->ch_mfc_cmd); } @@ -3351,7 +3351,7 @@ public: { case SPU_WrSRR0: { - m_ir->CreateStore(val.value, spu_ptr(&SPUThread::srr0)); + m_ir->CreateStore(val.value, spu_ptr(&spu_thread::srr0)); return; } case SPU_WrOutIntrMbox: @@ -3367,7 +3367,7 @@ public: case MFC_WrTagMask: { // TODO - m_ir->CreateStore(val.value, spu_ptr(&SPUThread::ch_tag_mask)); + m_ir->CreateStore(val.value, spu_ptr(&spu_thread::ch_tag_mask)); return; } case MFC_WrTagUpdate: @@ -3376,11 +3376,11 @@ public: { const u64 upd = ci->getZExtValue(); - const auto tag_mask = m_ir->CreateLoad(spu_ptr(&SPUThread::ch_tag_mask)); - const auto mfc_fence = m_ir->CreateLoad(spu_ptr(&SPUThread::mfc_fence)); + const auto tag_mask = m_ir->CreateLoad(spu_ptr(&spu_thread::ch_tag_mask)); + const auto mfc_fence = m_ir->CreateLoad(spu_ptr(&spu_thread::mfc_fence)); const auto completed = m_ir->CreateAnd(tag_mask, m_ir->CreateNot(mfc_fence)); - const auto upd_ptr = spu_ptr(&SPUThread::ch_tag_upd); - const auto stat_ptr = spu_ptr(&SPUThread::ch_tag_stat); + const auto upd_ptr = spu_ptr(&spu_thread::ch_tag_upd); + const auto stat_ptr = spu_ptr(&spu_thread::ch_tag_stat); const auto stat_val = m_ir->CreateOr(m_ir->CreateZExt(completed, get_type()), INT64_MIN); if (upd == 0) @@ -3424,7 +3424,7 @@ public: } LOG_WARNING(SPU, "[0x%x] MFC_EAH: $%u is not a zero constant", m_pos, +op.rt); - //m_ir->CreateStore(val.value, spu_ptr(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::eah)); + //m_ir->CreateStore(val.value, spu_ptr(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::eah)); return; } case MFC_EAL: @@ -3468,8 +3468,8 @@ public: const auto fail = llvm::BasicBlock::Create(m_context, "", m_function); const auto next = llvm::BasicBlock::Create(m_context, "", m_function); - const auto pf = spu_ptr(&SPUThread::mfc_fence); - const auto pb = spu_ptr(&SPUThread::mfc_barrier); + const auto pf = spu_ptr(&spu_thread::mfc_fence); + const auto pb = spu_ptr(&spu_thread::mfc_barrier); switch (u64 cmd = ci->getZExtValue()) { @@ -3494,7 +3494,7 @@ public: m_ir->SetInsertPoint(fail); m_ir->CreateUnreachable(); m_ir->SetInsertPoint(next); - m_ir->CreateStore(ci, spu_ptr(&SPUThread::ch_mfc_cmd, &spu_mfc_cmd::cmd)); + m_ir->CreateStore(ci, spu_ptr(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::cmd)); call(&exec_mfc_cmd, m_thread); return; } @@ -3609,7 +3609,7 @@ public: case MFC_EIEIO_CMD: case MFC_SYNC_CMD: { - const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(spu_ptr(&SPUThread::mfc_size))); + const auto cond = m_ir->CreateIsNull(m_ir->CreateLoad(spu_ptr(&spu_thread::mfc_size))); m_ir->CreateCondBr(cond, exec, fail); m_ir->SetInsertPoint(exec); m_ir->CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); @@ -3631,8 +3631,8 @@ public: m_ir->SetInsertPoint(fail); // Get MFC slot, redirect to invalid memory address - const auto slot = m_ir->CreateLoad(spu_ptr(&SPUThread::mfc_size)); - const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(::offset32(&SPUThread::mfc_queue))); + const auto slot = m_ir->CreateLoad(spu_ptr(&spu_thread::mfc_size)); + const auto off0 = m_ir->CreateAdd(m_ir->CreateMul(slot, m_ir->getInt32(sizeof(spu_mfc_cmd))), m_ir->getInt32(::offset32(&spu_thread::mfc_queue))); const auto ptr0 = m_ir->CreateGEP(m_thread, m_ir->CreateZExt(off0, get_type())); const auto ptr1 = m_ir->CreateGEP(m_memptr, m_ir->getInt64(0xffdeadf0)); const auto pmfc = m_ir->CreateSelect(m_ir->CreateICmpULT(slot, m_ir->getInt32(16)), ptr0, ptr1); @@ -3695,7 +3695,7 @@ public: } } - m_ir->CreateStore(m_ir->CreateAdd(slot, m_ir->getInt32(1)), spu_ptr(&SPUThread::mfc_size)); + m_ir->CreateStore(m_ir->CreateAdd(slot, m_ir->getInt32(1)), spu_ptr(&spu_thread::mfc_size)); m_ir->CreateBr(next); m_ir->SetInsertPoint(next); return; @@ -3708,7 +3708,7 @@ public: case MFC_WrListStallAck: { const auto mask = eval(splat(1) << (val & 0x1f)); - const auto _ptr = spu_ptr(&SPUThread::ch_stall_mask); + const auto _ptr = spu_ptr(&spu_thread::ch_stall_mask); const auto _old = m_ir->CreateLoad(_ptr); const auto _new = m_ir->CreateAnd(_old, m_ir->CreateNot(mask.value)); m_ir->CreateStore(_new, _ptr); @@ -3723,18 +3723,18 @@ public: } case SPU_WrDec: { - m_ir->CreateStore(call(&get_timebased_time), spu_ptr(&SPUThread::ch_dec_start_timestamp)); - m_ir->CreateStore(val.value, spu_ptr(&SPUThread::ch_dec_value)); + m_ir->CreateStore(call(&get_timebased_time), spu_ptr(&spu_thread::ch_dec_start_timestamp)); + m_ir->CreateStore(val.value, spu_ptr(&spu_thread::ch_dec_value)); return; } case SPU_WrEventMask: { - m_ir->CreateStore(val.value, spu_ptr(&SPUThread::ch_event_mask))->setVolatile(true); + m_ir->CreateStore(val.value, spu_ptr(&spu_thread::ch_event_mask))->setVolatile(true); return; } case SPU_WrEventAck: { - m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::And, spu_ptr(&SPUThread::ch_event_stat), eval(~val).value, llvm::AtomicOrdering::Release); + m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::And, spu_ptr(&spu_thread::ch_event_stat), eval(~val).value, llvm::AtomicOrdering::Release); return; } case 69: @@ -3769,7 +3769,7 @@ public: if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { m_block->block_end = m_ir->GetInsertBlock(); - m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr(&SPUThread::pc)); + m_ir->CreateStore(m_ir->getInt32(m_pos + 4), spu_ptr(&spu_thread::pc)); m_ir->CreateRetVoid(); } } @@ -5330,7 +5330,7 @@ public: const auto halt = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->CreateCondBr(cond.value, halt, next); m_ir->SetInsertPoint(halt); - const auto pstatus = spu_ptr(&SPUThread::status); + const auto pstatus = spu_ptr(&spu_thread::status); const auto chalt = m_ir->getInt32(SPU_STATUS_STOPPED_BY_HALT); m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, pstatus, chalt, llvm::AtomicOrdering::Release)->setVolatile(true); const auto ptr = _ptr(m_memptr, 0xffdead00); @@ -5391,7 +5391,7 @@ public: } // TODO - static u32 exec_check_interrupts(SPUThread* _spu, u32 addr) + static u32 exec_check_interrupts(spu_thread* _spu, u32 addr) { _spu->set_interrupt_status(true); @@ -5464,18 +5464,18 @@ public: if (op.d) { - m_ir->CreateStore(m_ir->getFalse(), spu_ptr(&SPUThread::interrupts_enabled))->setVolatile(true); + m_ir->CreateStore(m_ir->getFalse(), spu_ptr(&spu_thread::interrupts_enabled))->setVolatile(true); } - m_ir->CreateStore(addr.value, spu_ptr(&SPUThread::pc)); + m_ir->CreateStore(addr.value, spu_ptr(&spu_thread::pc)); const auto type = llvm::FunctionType::get(get_type(), {get_type(), get_type(), get_type()}, false)->getPointerTo()->getPointerTo(); - const auto disp = m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&SPUThread::jit_dispatcher))), type); + const auto disp = m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, m_ir->getInt64(::offset32(&spu_thread::jit_dispatcher))), type); const auto ad64 = m_ir->CreateZExt(addr.value, get_type()); if (ret && g_cfg.core.spu_block_size != spu_block_size_type::safe) { // Compare address stored in stack mirror with addr - const auto stack0 = eval(zext(sp) + ::offset32(&SPUThread::stack_mirror)); + const auto stack0 = eval(zext(sp) + ::offset32(&spu_thread::stack_mirror)); const auto stack1 = eval(stack0 + 8); const auto _ret = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), type)); const auto link = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type())); @@ -5595,7 +5595,7 @@ public: // Exit function on unexpected target m_ir->SetInsertPoint(sw->getDefaultDest()); - m_ir->CreateStore(addr.value, spu_ptr(&SPUThread::pc)); + m_ir->CreateStore(addr.value, spu_ptr(&spu_thread::pc)); m_ir->CreateRetVoid(); } else @@ -5617,7 +5617,7 @@ public: { m_block->block_end = m_ir->GetInsertBlock(); value_t srr0; - srr0.value = m_ir->CreateLoad(spu_ptr(&SPUThread::srr0)); + srr0.value = m_ir->CreateLoad(spu_ptr(&spu_thread::srr0)); m_ir->CreateBr(add_block_indirect(op, srr0)); } @@ -5716,7 +5716,7 @@ public: { // Store the return function chunk address at the stack mirror const auto func = add_function(m_pos + 4); - const auto stack0 = eval(zext(extract(get_vr(1), 3) & 0x3fff0) + ::offset32(&SPUThread::stack_mirror)); + const auto stack0 = eval(zext(extract(get_vr(1), 3) & 0x3fff0) + ::offset32(&spu_thread::stack_mirror)); const auto stack1 = eval(stack0 + 8); m_ir->CreateStore(func, m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), func->getType()->getPointerTo())); m_ir->CreateStore(m_ir->getInt64(m_pos + 4), m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type())); diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index bcce0aebd8..f75ea57faa 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -76,10 +76,10 @@ public: virtual spu_function_t compile(std::vector&&) = 0; // Default dispatch function fallback (second arg is unused) - static void dispatch(SPUThread&, void*, u8* rip); + static void dispatch(spu_thread&, void*, u8* rip); // Target for the unresolved patch point (second arg is unused) - static void branch(SPUThread&, void*, u8* rip); + static void branch(spu_thread&, void*, u8* rip); // Get the block at specified address std::vector block(const be_t* ls, u32 lsa); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 6c5eb16483..915267ef5a 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -140,7 +140,7 @@ namespace spu u32 pc = 0; bool active = false; - concurrent_execution_watchdog(SPUThread& spu) + concurrent_execution_watchdog(spu_thread& spu) :pc(spu.pc) { if (g_cfg.core.preferred_spu_threads > 0) @@ -391,35 +391,12 @@ spu_imm_table_t::spu_imm_table_t() } } -void SPUThread::on_spawn() +std::string spu_thread::get_name() const { - if (g_cfg.core.thread_scheduler_enabled) - { - thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::spu)); - } - - if (g_cfg.core.lower_spu_priority) - { - thread_ctrl::set_native_priority(-1); - } + return fmt::format("%sSPU[0x%x] Thread (%s)", offset >= RAW_SPU_BASE_ADDR ? "Raw" : "", id, spu_name.get()); } -void SPUThread::on_init(const std::shared_ptr& _this) -{ - if (!offset) - { - const_cast(offset) = verify("SPU LS" HERE, vm::alloc(0x40000, vm::main)); - - cpu_thread::on_init(_this); - } -} - -std::string SPUThread::get_name() const -{ - return fmt::format("%sSPU[0x%x] Thread (%s)", offset >= RAW_SPU_BASE_ADDR ? "Raw" : "", id, m_name); -} - -std::string SPUThread::dump() const +std::string spu_thread::dump() const { std::string ret = cpu_thread::dump(); @@ -451,7 +428,7 @@ std::string SPUThread::dump() const return ret; } -void SPUThread::cpu_init() +void spu_thread::cpu_init() { gpr = {}; fpscr.Reset(); @@ -501,8 +478,15 @@ void SPUThread::cpu_init() extern thread_local std::string(*g_tls_log_prefix)(); -void SPUThread::cpu_task() +void spu_thread::cpu_task() { + // Get next PC and SPU Interrupt status + pc = npc.exchange(0); + + set_interrupt_status((pc & 1) != 0); + + pc &= 0x3fffc; + std::fesetround(FE_TOWARDZERO); if (g_cfg.core.set_daz_and_ftz && g_cfg.core.spu_decoder != spu_decoder_type::precise) @@ -513,9 +497,8 @@ void SPUThread::cpu_task() g_tls_log_prefix = [] { - const auto cpu = static_cast(get_current_cpu_thread()); - - return fmt::format("%s [0x%05x]", cpu->get_name(), cpu->pc); + const auto cpu = static_cast(get_current_cpu_thread()); + return fmt::format("%s [0x%05x]", thread_ctrl::get_name(), cpu->pc); }; if (jit) @@ -525,6 +508,9 @@ void SPUThread::cpu_task() jit_dispatcher[pc / 4](*this, vm::_ptr(offset), nullptr); } + // save next PC and current SPU Interrupt status + npc = pc | (interrupts_enabled); + // Print some stats LOG_NOTICE(SPU, "Stats: Block Weight: %u (Retreats: %u);", block_counter, block_failure); return; @@ -548,7 +534,8 @@ void SPUThread::cpu_task() { if (UNLIKELY(state)) { - if (check_state()) return; + if (check_state()) + break; // Decode single instruction (may be step) const u32 op = *reinterpret_cast*>(base + pc); @@ -606,29 +593,39 @@ void SPUThread::cpu_task() break; } } + + // save next PC and current SPU Interrupt status + npc = pc | (interrupts_enabled); } -void SPUThread::cpu_mem() +void spu_thread::cpu_mem() { //vm::passive_lock(*this); } -void SPUThread::cpu_unmem() +void spu_thread::cpu_unmem() { //state.test_and_set(cpu_flag::memory); } -SPUThread::~SPUThread() +spu_thread::~spu_thread() { // Deallocate Local Storage vm::dealloc_verbose_nothrow(offset); + + // Deallocate RawSPU ID + if (!group && offset >= RAW_SPU_BASE_ADDR) + { + g_raw_spu_id[index] = 0; + g_raw_spu_ctr--; + } } -SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group) +spu_thread::spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::string_view name) : cpu_thread(idm::last_id()) - , m_name(name) + , spu_name(name) , index(index) - , offset(0) + , offset(ls) , group(group) { if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit) @@ -652,9 +649,14 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group) std::memset(stack_mirror.data(), 0xff, sizeof(stack_mirror)); } } + + if (!group && offset >= RAW_SPU_BASE_ADDR) + { + cpu_init(); + } } -void SPUThread::push_snr(u32 number, u32 value) +void spu_thread::push_snr(u32 number, u32 value) { // Get channel const auto channel = number & 1 ? &ch_snr2 : &ch_snr1; @@ -670,7 +672,7 @@ void SPUThread::push_snr(u32 number, u32 value) } } -void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) +void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) { const bool is_get = (args.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_START_MASK)) == MFC_GET_CMD; @@ -686,7 +688,7 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) if (eal < SYS_SPU_THREAD_BASE_LOW) { // RawSPU MMIO - auto thread = idm::get((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET); + auto thread = idm::get>(find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET)); if (!thread) { @@ -717,7 +719,7 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) } else if (group && group->threads[index]) { - auto& spu = static_cast(*group->threads[index]); + auto& spu = static_cast(*group->threads[index]); if (offset + args.size - 1 < 0x40000) // LS access { @@ -890,7 +892,7 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) } } -bool SPUThread::do_dma_check(const spu_mfc_cmd& args) +bool spu_thread::do_dma_check(const spu_mfc_cmd& args) { const u32 mask = 1u << args.tag; @@ -940,7 +942,7 @@ bool SPUThread::do_dma_check(const spu_mfc_cmd& args) return true; } -bool SPUThread::do_list_transfer(spu_mfc_cmd& args) +bool spu_thread::do_list_transfer(spu_mfc_cmd& args) { struct list_element { @@ -994,7 +996,7 @@ bool SPUThread::do_list_transfer(spu_mfc_cmd& args) return true; } -void SPUThread::do_putlluc(const spu_mfc_cmd& args) +void spu_thread::do_putlluc(const spu_mfc_cmd& args) { const u32 addr = args.eal & -128u; @@ -1047,7 +1049,7 @@ void SPUThread::do_putlluc(const spu_mfc_cmd& args) vm::reservation_notifier(addr, 128).notify_all(); } -void SPUThread::do_mfc(bool wait) +void spu_thread::do_mfc(bool wait) { u32 removed = 0; u32 barrier = 0; @@ -1149,17 +1151,17 @@ void SPUThread::do_mfc(bool wait) } } -u32 SPUThread::get_mfc_completed() +u32 spu_thread::get_mfc_completed() { return ch_tag_mask & ~mfc_fence; } -bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) +bool spu_thread::process_mfc_cmd(spu_mfc_cmd args) { // Stall infinitely if MFC queue is full while (UNLIKELY(mfc_size >= 16)) { - if (state & cpu_flag::stop) + if (is_stopped()) { return false; } @@ -1192,7 +1194,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) while (rdata == data && vm::reservation_acquire(raddr, 128) == rtime) { - if (state & cpu_flag::stop) + if (is_stopped()) { break; } @@ -1446,7 +1448,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) args.cmd, args.lsa, args.eal, args.tag, args.size); } -u32 SPUThread::get_events(bool waiting) +u32 spu_thread::get_events(bool waiting) { const u32 mask1 = ch_event_mask; @@ -1485,7 +1487,7 @@ u32 SPUThread::get_events(bool waiting) }); } -void SPUThread::set_events(u32 mask) +void spu_thread::set_events(u32 mask) { if (mask & ~SPU_EVENT_IMPLEMENTED) { @@ -1502,7 +1504,7 @@ void SPUThread::set_events(u32 mask) } } -void SPUThread::set_interrupt_status(bool enable) +void spu_thread::set_interrupt_status(bool enable) { if (enable) { @@ -1520,7 +1522,7 @@ void SPUThread::set_interrupt_status(bool enable) } } -u32 SPUThread::get_ch_count(u32 ch) +u32 spu_thread::get_ch_count(u32 ch) { LOG_TRACE(SPU, "get_ch_count(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???"); @@ -1542,7 +1544,7 @@ u32 SPUThread::get_ch_count(u32 ch) fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???"); } -s64 SPUThread::get_ch_value(u32 ch) +s64 spu_thread::get_ch_value(u32 ch) { LOG_TRACE(SPU, "get_ch_value(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???"); @@ -1557,7 +1559,7 @@ s64 SPUThread::get_ch_value(u32 ch) while (!channel.try_pop(out)) { - if (state & cpu_flag::stop) + if (is_stopped()) { return -1; } @@ -1595,7 +1597,7 @@ s64 SPUThread::get_ch_value(u32 ch) return out; } - if (state & cpu_flag::stop) + if (is_stopped()) { return -1; } @@ -1699,7 +1701,7 @@ s64 SPUThread::get_ch_value(u32 ch) while (res = get_events(), !res) { - if (state & (cpu_flag::stop + cpu_flag::dbg_global_stop)) + if (is_stopped()) { return -1; } @@ -1712,7 +1714,7 @@ s64 SPUThread::get_ch_value(u32 ch) while (res = get_events(true), !res) { - if (state & cpu_flag::stop) + if (is_stopped()) { return -1; } @@ -1734,7 +1736,7 @@ s64 SPUThread::get_ch_value(u32 ch) fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???"); } -bool SPUThread::set_ch_value(u32 ch, u32 value) +bool spu_thread::set_ch_value(u32 ch, u32 value) { LOG_TRACE(SPU, "set_ch_value(ch=%d [%s], value=0x%x)", ch, ch < 128 ? spu_ch_name[ch] : "???", value); @@ -1752,7 +1754,7 @@ bool SPUThread::set_ch_value(u32 ch, u32 value) { while (!ch_out_intr_mbox.try_push(value)) { - if (state & cpu_flag::stop) + if (is_stopped()) { return false; } @@ -1898,7 +1900,7 @@ bool SPUThread::set_ch_value(u32 ch, u32 value) { while (!ch_out_mbox.try_push(value)) { - if (state & cpu_flag::stop) + if (is_stopped()) { return false; } @@ -2043,7 +2045,7 @@ bool SPUThread::set_ch_value(u32 ch, u32 value) fmt::throw_exception("Unknown/illegal channel (ch=%d [%s], value=0x%x)" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???", value); } -bool SPUThread::stop_and_signal(u32 code) +bool spu_thread::stop_and_signal(u32 code) { LOG_TRACE(SPU, "stop_and_signal(code=0x%x)", code); @@ -2087,7 +2089,7 @@ bool SPUThread::stop_and_signal(u32 code) // HACK: wait for executable code while (!_ref(pc)) { - if (state & cpu_flag::stop) + if (is_stopped()) { return false; } @@ -2143,7 +2145,7 @@ bool SPUThread::stop_and_signal(u32 code) // Check group status, wait if necessary while (group->run_state >= SPU_THREAD_GROUP_STATUS_WAITING && group->run_state <= SPU_THREAD_GROUP_STATUS_SUSPENDED) { - if (state & cpu_flag::stop) + if (is_stopped()) { return false; } @@ -2212,7 +2214,7 @@ bool SPUThread::stop_and_signal(u32 code) while (true) { - if (state & cpu_flag::stop) + if (is_stopped()) { return false; } @@ -2246,7 +2248,7 @@ bool SPUThread::stop_and_signal(u32 code) if (thread.get() != this) { - thread->notify(); + thread_ctrl::notify(*thread); } } } @@ -2285,7 +2287,7 @@ bool SPUThread::stop_and_signal(u32 code) if (thread && thread.get() != this) { thread->state += cpu_flag::stop; - thread->notify(); + thread_ctrl::notify(*thread); } } @@ -2329,7 +2331,7 @@ bool SPUThread::stop_and_signal(u32 code) } } -void SPUThread::halt() +void spu_thread::halt() { LOG_TRACE(SPU, "halt()"); @@ -2350,7 +2352,7 @@ void SPUThread::halt() fmt::throw_exception("Halt" HERE); } -void SPUThread::fast_call(u32 ls_addr) +void spu_thread::fast_call(u32 ls_addr) { // LS:0x0: this is originally the entry point of the interrupt handler, but interrupts are not implemented _ref(0) = 0x00000002; // STOP 2 @@ -2378,3 +2380,6 @@ void SPUThread::fast_call(u32 ls_addr) gpr[0]._u32[3] = old_lr; gpr[1]._u32[3] = old_stack; } + +DECLARE(spu_thread::g_raw_spu_ctr){}; +DECLARE(spu_thread::g_raw_spu_id){}; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index baee513c58..7229888618 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -11,10 +11,8 @@ struct lv2_event_queue; struct lv2_spu_group; struct lv2_int_tag; -class SPUThread; - // JIT Block -using spu_function_t = void(*)(SPUThread&, void*, u8*); +using spu_function_t = void(*)(spu_thread&, void*, u8*); // SPU Channels enum : u32 @@ -500,24 +498,22 @@ public: } }; -class SPUThread : public cpu_thread +class spu_thread : public cpu_thread { public: - virtual void on_spawn() override; - virtual void on_init(const std::shared_ptr&) override; virtual std::string get_name() const override; virtual std::string dump() const override; - virtual void cpu_task() override; + virtual void cpu_task() override final; virtual void cpu_mem() override; virtual void cpu_unmem() override; - virtual ~SPUThread() override; + virtual ~spu_thread() override; void cpu_init(); static const u32 id_base = 0x02000000; // TODO (used to determine thread type) static const u32 id_step = 1; static const u32 id_count = 2048; - SPUThread(const std::string& name, u32 index, lv2_spu_group* group); + spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::string_view name); u32 pc = 0; @@ -578,7 +574,7 @@ public: const u32 offset; // SPU LS offset lv2_spu_group* const group; // SPU Thread Group - const std::string m_name; // Thread name + lf_value spu_name; // Thread name std::unique_ptr jit; // Recompiler instance @@ -623,4 +619,20 @@ public: { return *_ptr(lsa); } + + bool read_reg(const u32 addr, u32& value); + bool write_reg(const u32 addr, const u32 value); + + static atomic_t g_raw_spu_ctr; + static atomic_t g_raw_spu_id[5]; + + static u32 find_raw_spu(u32 id) + { + if (LIKELY(id < std::size(g_raw_spu_id))) + { + return g_raw_spu_id[id]; + } + + return -1; + } }; diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 4d01b2816b..190594cc01 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1002,13 +1002,13 @@ DECLARE(lv2_obj::g_ppu); DECLARE(lv2_obj::g_pending); DECLARE(lv2_obj::g_waiting); -void lv2_obj::sleep_timeout(old_thread& thread, u64 timeout) +void lv2_obj::sleep_timeout(cpu_thread& thread, u64 timeout) { std::lock_guard lock(g_mutex); const u64 start_time = get_system_time(); - if (auto ppu = dynamic_cast(&thread)) + if (auto ppu = static_cast(thread.id_type() == 1 ? &thread : nullptr)) { LOG_TRACE(PPU, "sleep() - waiting (%zu)", g_pending.size()); @@ -1123,7 +1123,7 @@ void lv2_obj::awake(cpu_thread& cpu, u32 prio) } // Remove pending if necessary - if (!g_pending.empty() && cpu.get() == thread_ctrl::get_current()) + if (!g_pending.empty() && &cpu == get_current_cpu_thread()) { unqueue(g_pending, &cpu); } @@ -1165,7 +1165,7 @@ void lv2_obj::schedule_all() target->state ^= (cpu_flag::signal + cpu_flag::suspend); target->start_time = 0; - if (target->get() != thread_ctrl::get_current()) + if (target != get_current_cpu_thread()) { target->notify(); } diff --git a/rpcs3/Emu/Cell/lv2/sys_cond.cpp b/rpcs3/Emu/Cell/lv2/sys_cond.cpp index a0e4d09694..36a5d6f41a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_cond.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_cond.cpp @@ -241,6 +241,11 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout) while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; diff --git a/rpcs3/Emu/Cell/lv2/sys_event.cpp b/rpcs3/Emu/Cell/lv2/sys_event.cpp index 5ea0742ac9..52b959afb7 100644 --- a/rpcs3/Emu/Cell/lv2/sys_event.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_event.cpp @@ -57,7 +57,7 @@ bool lv2_event_queue::send(lv2_event event) else { // Store event in In_MBox - auto& spu = static_cast(*sq.front()); + auto& spu = static_cast(*sq.front()); // TODO: use protocol? sq.pop_front(); @@ -180,7 +180,7 @@ error_code sys_event_queue_destroy(ppu_thread& ppu, u32 equeue_id, s32 mode) } else { - static_cast(*cpu).ch_in_mbox.set_values(1, CELL_ECANCELED); + static_cast(*cpu).ch_in_mbox.set_values(1, CELL_ECANCELED); cpu->state += cpu_flag::signal; cpu->notify(); } @@ -271,6 +271,11 @@ error_code sys_event_queue_receive(ppu_thread& ppu, u32 equeue_id, vm::ptr num) } } - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } + if (num) *num = value; return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp b/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp index 77c50e3c06..4185df50e9 100644 --- a/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp @@ -8,8 +8,6 @@ #include "Emu/Cell/PPUOpcodes.h" #include "sys_interrupt.h" - - LOG_CHANNEL(sys_interrupt); void lv2_int_serv::exec() @@ -22,7 +20,7 @@ void lv2_int_serv::exec() { ppu_cmd::sleep, 0 } }); - thread->notify(); + thread_ctrl::notify(*thread); } void lv2_int_serv::join() @@ -35,8 +33,8 @@ void lv2_int_serv::join() { ppu_cmd::opcode, ppu_instructions::SC(0) }, }); - thread->notify(); - thread->join(); + thread_ctrl::notify(*thread); + (*thread)(); } error_code sys_interrupt_tag_destroy(u32 intrtag) @@ -86,7 +84,7 @@ error_code _sys_interrupt_thread_establish(vm::ptr ih, u32 intrtag, u32 int } // Get interrupt thread - const auto it = idm::get_unlocked(intrthread); + const auto it = idm::get_unlocked>(intrthread); if (!it) { @@ -110,7 +108,8 @@ error_code _sys_interrupt_thread_establish(vm::ptr ih, u32 intrtag, u32 int result = std::make_shared(it, arg1, arg2); tag->handler = result; - it->run(); + it->state -= cpu_flag::stop; + thread_ctrl::notify(*it); return result; }); @@ -131,7 +130,7 @@ error_code _sys_interrupt_thread_disestablish(ppu_thread& ppu, u32 ih, vm::ptr(ih)) + if (const auto thread = idm::withdraw>(ih)) { *r13 = thread->gpr[13]; return CELL_OK; diff --git a/rpcs3/Emu/Cell/lv2/sys_interrupt.h b/rpcs3/Emu/Cell/lv2/sys_interrupt.h index dca783b03e..6b87f59f56 100644 --- a/rpcs3/Emu/Cell/lv2/sys_interrupt.h +++ b/rpcs3/Emu/Cell/lv2/sys_interrupt.h @@ -15,11 +15,11 @@ struct lv2_int_serv final : lv2_obj { static const u32 id_base = 0x0b000000; - const std::shared_ptr thread; + const std::shared_ptr> thread; const u64 arg1; const u64 arg2; - lv2_int_serv(const std::shared_ptr& thread, u64 arg1, u64 arg2) + lv2_int_serv(const std::shared_ptr>& thread, u64 arg1, u64 arg2) : thread(thread) , arg1(arg1) , arg2(arg2) diff --git a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp index 6bfe68a6fa..a4b65d1fc8 100644 --- a/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_lwcond.cpp @@ -274,6 +274,11 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; @@ -290,7 +295,7 @@ error_code _sys_lwcond_queue_wait(ppu_thread& ppu, u32 lwcond_id, u32 lwmutex_id cond->waiters--; - if (mutex->signaled.fetch_dec_sat()) + if (mutex->signaled.try_dec()) { ppu.gpr[3] = CELL_EDEADLK; break; diff --git a/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp b/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp index 0708101be3..733a9acf23 100644 --- a/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_lwmutex.cpp @@ -112,6 +112,11 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout) while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; diff --git a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp index deaa413789..97e94aeeae 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mutex.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_mutex.cpp @@ -152,6 +152,11 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout) while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; diff --git a/rpcs3/Emu/Cell/lv2/sys_net.cpp b/rpcs3/Emu/Cell/lv2/sys_net.cpp index fbe6662048..10076edd8e 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net.cpp @@ -112,7 +112,7 @@ static void network_clear_queue(ppu_thread& ppu) extern void network_thread_init() { - thread_ctrl::make_shared("Network Thread", []() + thread_ctrl::spawn("Network Thread", []() { std::vector> socklist; socklist.reserve(lv2_socket::id_count); @@ -241,7 +241,7 @@ extern void network_thread_init() CloseHandle(_eventh); WSACleanup(); #endif - })->detach(); + }); } lv2_socket::lv2_socket(lv2_socket::socket_type s) @@ -338,6 +338,11 @@ s32 sys_net_bnet_accept(ppu_thread& ppu, s32 s, vm::ptr addr, { while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + thread_ctrl::wait(); } @@ -546,6 +551,11 @@ s32 sys_net_bnet_connect(ppu_thread& ppu, s32 s, vm::ptr addr, { while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + thread_ctrl::wait(); } @@ -946,6 +956,11 @@ s32 sys_net_bnet_recvfrom(ppu_thread& ppu, s32 s, vm::ptr buf, u32 len, s3 { while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + thread_ctrl::wait(); } @@ -1099,6 +1114,11 @@ s32 sys_net_bnet_sendto(ppu_thread& ppu, s32 s, vm::cptr buf, u32 len, s32 { while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + thread_ctrl::wait(); } @@ -1546,6 +1566,11 @@ s32 sys_net_bnet_poll(ppu_thread& ppu, vm::ptr fds, s32 nfds, s3 while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; @@ -1740,6 +1765,11 @@ s32 sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr readf while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index e99ae658ef..066957f0c8 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -9,8 +9,6 @@ #include "sys_event.h" #include "sys_mmapper.h" - - LOG_CHANNEL(sys_ppu_thread); void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode) @@ -40,15 +38,15 @@ void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode) if (jid == -1) { - // Delete detached thread and unqueue - idm::remove(ppu.id); + // Detach detached thread, id will be removed on cleanup + static_cast&>(ppu) = thread_state::detached; } else if (jid != 0) { std::lock_guard lock(id_manager::g_mutex); // Schedule joiner and unqueue - lv2_obj::awake(*idm::check_unlocked(jid), -2); + lv2_obj::awake(*idm::check_unlocked>(jid), -2); } // Unqueue @@ -71,7 +69,7 @@ error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr vptr sys_ppu_thread.trace("sys_ppu_thread_join(thread_id=0x%x, vptr=*0x%x)", thread_id, vptr); - const auto thread = idm::get(thread_id, [&](ppu_thread& thread) -> CellError + const auto thread = idm::get>(thread_id, [&](ppu_thread& thread) -> CellError { CellError result = thread.joiner.atomic_op([&](u32& value) -> CellError { @@ -120,17 +118,21 @@ error_code sys_ppu_thread_join(ppu_thread& ppu, u32 thread_id, vm::ptr vptr } // Wait for cleanup - thread->join(); + (*thread.ptr)(); // Get the exit status from the register if (vptr) { - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } + *vptr = thread->gpr[3]; } // Cleanup - idm::remove(thread->id); + idm::remove>(thread->id); return CELL_OK; } @@ -138,7 +140,7 @@ error_code sys_ppu_thread_detach(u32 thread_id) { sys_ppu_thread.trace("sys_ppu_thread_detach(thread_id=0x%x)", thread_id); - const auto thread = idm::check(thread_id, [&](ppu_thread& thread) -> CellError + const auto thread = idm::check>(thread_id, [&](ppu_thread& thread) -> CellError { return thread.joiner.atomic_op([&](u32& value) -> CellError { @@ -180,7 +182,7 @@ error_code sys_ppu_thread_detach(u32 thread_id) if (thread.ret == CELL_EAGAIN) { - idm::remove(thread_id); + idm::remove>(thread_id); } return CELL_OK; @@ -202,7 +204,7 @@ error_code sys_ppu_thread_set_priority(ppu_thread& ppu, u32 thread_id, s32 prio) return CELL_EINVAL; } - const auto thread = idm::check(thread_id, [&](ppu_thread& thread) + const auto thread = idm::check>(thread_id, [&](ppu_thread& thread) { if (thread.prio != prio && thread.prio.exchange(prio) != prio) { @@ -222,7 +224,7 @@ error_code sys_ppu_thread_get_priority(u32 thread_id, vm::ptr priop) { sys_ppu_thread.trace("sys_ppu_thread_get_priority(thread_id=0x%x, priop=*0x%x)", thread_id, priop); - const auto thread = idm::check(thread_id, [&](ppu_thread& thread) + const auto thread = idm::check>(thread_id, [&](ppu_thread& thread) { *priop = thread.prio; }); @@ -249,7 +251,7 @@ error_code sys_ppu_thread_stop(u32 thread_id) { sys_ppu_thread.todo("sys_ppu_thread_stop(thread_id=0x%x)", thread_id); - const auto thread = idm::get(thread_id); + const auto thread = idm::get>(thread_id); if (!thread) { @@ -263,7 +265,7 @@ error_code sys_ppu_thread_restart(u32 thread_id) { sys_ppu_thread.todo("sys_ppu_thread_restart(thread_id=0x%x)", thread_id); - const auto thread = idm::get(thread_id); + const auto thread = idm::get>(thread_id); if (!thread) { @@ -273,10 +275,10 @@ error_code sys_ppu_thread_restart(u32 thread_id) return CELL_OK; } -error_code _sys_ppu_thread_create(vm::ptr thread_id, vm::ptr param, u64 arg, u64 unk, s32 prio, u32 stacksize, u64 flags, vm::cptr threadname) +error_code _sys_ppu_thread_create(vm::ptr thread_id, vm::ptr param, u64 arg, u64 unk, s32 prio, u32 _stacksz, u64 flags, vm::cptr threadname) { sys_ppu_thread.warning("_sys_ppu_thread_create(thread_id=*0x%x, param=*0x%x, arg=0x%llx, unk=0x%llx, prio=%d, stacksize=0x%x, flags=0x%llx, threadname=%s)", - thread_id, param, arg, unk, prio, stacksize, flags, threadname); + thread_id, param, arg, unk, prio, _stacksz, flags, threadname); if (prio < 0 || prio > 3071) { @@ -288,33 +290,38 @@ error_code _sys_ppu_thread_create(vm::ptr thread_id, vm::ptr([&]() + // Compute actual stack size and allocate + const u32 stack_size = _stacksz >= 4096 ? ::align(std::min(_stacksz, 0x100000), 4096) : 0x4000; + + const vm::addr_t stack_base{vm::alloc(_stacksz, vm::stack, 4096)}; + + if (!stack_base) { - auto ppu = std::make_shared(threadname ? threadname.get_ptr() : "", prio, stacksize); + return CELL_ENOMEM; + } - if ((flags & SYS_PPU_THREAD_CREATE_JOINABLE) != 0) + const u32 tid = idm::import>([&]() + { + const u32 tid = idm::last_id(); + + std::string ppu_name; + std::string full_name = fmt::format("PPU[0x%x] Thread", tid); + + if (threadname) { - ppu->joiner = 0; + ppu_name = threadname.get_ptr(); + fmt::append(full_name, " (%s)", ppu_name); } - ppu->gpr[13] = param->tls.value(); + ppu_thread_params p; + p.stack_addr = stack_base; + p.stack_size = stack_size; + p.tls_addr = param->tls; + p.entry = param->entry; + p.arg0 = arg; + p.arg1 = unk; - if ((flags & SYS_PPU_THREAD_CREATE_INTERRUPT) == 0) - { - // Initialize thread entry point - ppu->cmd_list - ({ - { ppu_cmd::set_args, 2 }, arg, unk, // Actually unknown - { ppu_cmd::lle_call, param->entry.value() }, - }); - } - else - { - // Save entry for further use (workaround) - ppu->gpr[2] = param->entry.value(); - } - - return ppu; + return std::make_shared>(full_name, p, ppu_name, prio, 1 - static_cast(flags & 3)); }); if (!tid) @@ -330,7 +337,7 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id) { sys_ppu_thread.trace("sys_ppu_thread_start(thread_id=0x%x)", thread_id); - const auto thread = idm::get(thread_id, [&](ppu_thread& thread) + const auto thread = idm::get>(thread_id, [&](ppu_thread& thread) { lv2_obj::awake(thread, -2); }); @@ -347,10 +354,10 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id) } else { - thread->notify(); + thread_ctrl::notify(*thread); // Dirty hack for sound: confirm the creation of _mxr000 event queue - if (thread->m_name == "_cellsurMixerMain") + if (thread->ppu_name.get() == "_cellsurMixerMain"sv) { lv2_obj::sleep(ppu); @@ -360,10 +367,18 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id) return (eq.name == "_mxr000\0"_u64) || (eq.key == 0x8000cafe02460300); })) { + if (ppu.is_stopped()) + { + return 0; + } + thread_ctrl::wait_for(50000); } - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } } } @@ -372,22 +387,26 @@ error_code sys_ppu_thread_start(ppu_thread& ppu, u32 thread_id) error_code sys_ppu_thread_rename(u32 thread_id, vm::cptr name) { - sys_ppu_thread.todo("sys_ppu_thread_rename(thread_id=0x%x, name=%s)", thread_id, name); + sys_ppu_thread.warning("sys_ppu_thread_rename(thread_id=0x%x, name=%s)", thread_id, name); - const auto thread = idm::get(thread_id); + const auto thread = idm::get>(thread_id); if (!thread) { return CELL_ESRCH; } + // thread_ctrl name is not changed (TODO) + thread->ppu_name.assign(name.get_ptr()); return CELL_OK; } error_code sys_ppu_thread_recover_page_fault(u32 thread_id) { sys_ppu_thread.warning("sys_ppu_thread_recover_page_fault(thread_id=0x%x)", thread_id); - const auto thread = idm::get(thread_id); + + const auto thread = idm::get>(thread_id); + if (!thread) { return CELL_ESRCH; @@ -421,7 +440,8 @@ error_code sys_ppu_thread_get_page_fault_context(u32 thread_id, vm::ptr(thread_id); + const auto thread = idm::get>(thread_id); + if (!thread) { return CELL_ESRCH; diff --git a/rpcs3/Emu/Cell/lv2/sys_process.cpp b/rpcs3/Emu/Cell/lv2/sys_process.cpp index a706e16f2c..42e6415fa6 100644 --- a/rpcs3/Emu/Cell/lv2/sys_process.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_process.cpp @@ -246,7 +246,7 @@ void _sys_process_exit(ppu_thread& ppu, s32 status, u32 arg2, u32 arg3) Emu.Stop(); }); - thread_ctrl::eternalize(); + ppu.state += cpu_flag::dbg_global_stop; } void _sys_process_exit2(ppu_thread& ppu, s32 status, vm::ptr arg, u32 arg_size, u32 arg4) @@ -314,5 +314,5 @@ void _sys_process_exit2(ppu_thread& ppu, s32 status, vm::ptr ar Emu.BootGame(path, true); }); - thread_ctrl::eternalize(); + ppu.state += cpu_flag::dbg_global_stop; } diff --git a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp index b1fa084896..31ffbc98d9 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rwlock.cpp @@ -129,6 +129,11 @@ error_code sys_rwlock_rlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; @@ -318,6 +323,11 @@ error_code sys_rwlock_wlock(ppu_thread& ppu, u32 rw_lock_id, u64 timeout) while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; diff --git a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp index e38e5c46c1..a59d9680d5 100644 --- a/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_semaphore.cpp @@ -123,6 +123,11 @@ error_code sys_semaphore_wait(ppu_thread& ppu, u32 sem_id, u64 timeout) while (!ppu.state.test_and_reset(cpu_flag::signal)) { + if (ppu.is_stopped()) + { + return 0; + } + if (timeout) { const u64 passed = get_system_time() - ppu.start_time; diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index 1171c269cd..13202e07c3 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -15,8 +15,6 @@ #include "sys_event.h" #include "sys_spu.h" - - LOG_CHANNEL(sys_spu); void sys_spu_image::load(const fs::file& stream) @@ -233,11 +231,25 @@ error_code sys_spu_thread_initialize(vm::ptr thread, u32 group_id, u32 spu_ sys_spu.todo("Unimplemented SPU Thread options (0x%x)", option); } - auto spu = idm::make_ptr(thread_name, spu_num, group.get()); + const vm::addr_t ls_addr{verify("SPU LS" HERE, vm::alloc(0x40000, vm::main))}; - *thread = spu->id; + const u32 tid = idm::import>([&]() + { + const u32 tid = idm::last_id(); + + std::string full_name = fmt::format("SPU[0x%x] Thread", tid); + + if (!thread_name.empty()) + { + fmt::append(full_name, " (%s)", thread_name); + } + + group->threads[spu_num] = std::make_shared>(full_name, ls_addr, group.get(), spu_num, thread_name); + return group->threads[spu_num]; + }); + + *thread = tid; - group->threads[spu_num] = std::move(spu); group->args[spu_num] = {arg->arg1, arg->arg2, arg->arg3, arg->arg4}; group->imgs[spu_num] = std::make_pair(*img, std::vector()); group->imgs[spu_num].second.assign(img->segs.get_ptr(), img->segs.get_ptr() + img->nsegs); @@ -254,9 +266,9 @@ error_code sys_spu_thread_set_argument(u32 id, vm::ptr { sys_spu.warning("sys_spu_thread_set_argument(id=0x%x, arg=*0x%x)", id, arg); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -274,9 +286,9 @@ error_code sys_spu_thread_get_exit_status(u32 id, vm::ptr status) { sys_spu.warning("sys_spu_thread_get_exit_status(id=0x%x, status=*0x%x)", id, status); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (UNLIKELY(!thread)) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -342,7 +354,7 @@ error_code sys_spu_thread_group_destroy(u32 id) { if (auto thread = std::move(ptr)) { - idm::remove(thread->id); + idm::remove>(thread->id); } } @@ -384,8 +396,8 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id) sys_spu_image::deploy(thread->offset, img.second.data(), img.first.nsegs); - thread->pc = img.first.entry_point; thread->cpu_init(); + thread->npc = img.first.entry_point; thread->gpr[3] = v128::from64(0, args[0]); thread->gpr[4] = v128::from64(0, args[1]); thread->gpr[5] = v128::from64(0, args[2]); @@ -403,7 +415,8 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id) { if (thread) { - thread->run(); + thread->state -= cpu_flag::stop; + thread_ctrl::notify(*thread); } } @@ -501,7 +514,7 @@ error_code sys_spu_thread_group_resume(u32 id) if (thread) { thread->state -= cpu_flag::suspend; - thread->notify(); + thread_ctrl::notify(*thread); } } @@ -539,11 +552,11 @@ error_code sys_spu_thread_group_terminate(u32 id, s32 value) sys_spu.warning("sys_spu_thread_group_terminate(id=0x%x, value=0x%x)", id, value); // The id can be either SPU Thread Group or SPU Thread - const auto thread = idm::get(id); + const auto thread = idm::get>(id); const auto _group = idm::get(id); const auto group = thread ? thread->group : _group.get(); - if (!group && !thread) + if (!group && (!thread || !thread->group)) { return CELL_ESRCH; } @@ -581,7 +594,7 @@ error_code sys_spu_thread_group_terminate(u32 id, s32 value) if (thread) { thread->state += cpu_flag::stop; - thread->notify(); + thread_ctrl::notify(*thread); } } @@ -627,6 +640,11 @@ error_code sys_spu_thread_group_join(ppu_thread& ppu, u32 id, vm::ptr cause while ((group->join_state & ~SPU_TGJSF_IS_JOINING) == 0) { + if (ppu.is_stopped()) + { + return 0; + } + bool stopped = true; for (auto& t : group->threads) @@ -648,7 +666,6 @@ error_code sys_spu_thread_group_join(ppu_thread& ppu, u32 id, vm::ptr cause // TODO group->cv.wait(group->mutex, 1000); - thread_ctrl::test(); } join_state = group->join_state; @@ -657,7 +674,10 @@ error_code sys_spu_thread_group_join(ppu_thread& ppu, u32 id, vm::ptr cause group->run_state = SPU_THREAD_GROUP_STATUS_INITIALIZED; // hack } - ppu.test_state(); + if (ppu.test_stopped()) + { + return 0; + } switch (join_state & ~SPU_TGJSF_IS_JOINING) { @@ -743,9 +763,9 @@ error_code sys_spu_thread_write_ls(u32 id, u32 lsa, u64 value, u32 type) { sys_spu.trace("sys_spu_thread_write_ls(id=0x%x, lsa=0x%05x, value=0x%llx, type=%d)", id, lsa, value, type); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -780,9 +800,9 @@ error_code sys_spu_thread_read_ls(u32 id, u32 lsa, vm::ptr value, u32 type) { sys_spu.trace("sys_spu_thread_read_ls(id=0x%x, lsa=0x%05x, value=*0x%x, type=%d)", id, lsa, value, type); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -817,9 +837,9 @@ error_code sys_spu_thread_write_spu_mb(u32 id, u32 value) { sys_spu.warning("sys_spu_thread_write_spu_mb(id=0x%x, value=0x%x)", id, value); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -842,9 +862,9 @@ error_code sys_spu_thread_set_spu_cfg(u32 id, u64 value) { sys_spu.warning("sys_spu_thread_set_spu_cfg(id=0x%x, value=0x%x)", id, value); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -863,9 +883,9 @@ error_code sys_spu_thread_get_spu_cfg(u32 id, vm::ptr value) { sys_spu.warning("sys_spu_thread_get_spu_cfg(id=0x%x, value=*0x%x)", id, value); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -879,9 +899,9 @@ error_code sys_spu_thread_write_snr(u32 id, u32 number, u32 value) { sys_spu.trace("sys_spu_thread_write_snr(id=0x%x, number=%d, value=0x%x)", id, number, value); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -1016,10 +1036,10 @@ error_code sys_spu_thread_connect_event(u32 id, u32 eq, u32 et, u8 spup) { sys_spu.warning("sys_spu_thread_connect_event(id=0x%x, eq=0x%x, et=%d, spup=%d)", id, eq, et, spup); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); const auto queue = idm::get(eq); - if (!thread || !queue) + if (UNLIKELY(!queue || !thread || !thread->group)) { return CELL_ESRCH; } @@ -1048,9 +1068,9 @@ error_code sys_spu_thread_disconnect_event(u32 id, u32 et, u8 spup) { sys_spu.warning("sys_spu_thread_disconnect_event(id=0x%x, et=%d, spup=%d)", id, et, spup); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -1079,10 +1099,10 @@ error_code sys_spu_thread_bind_queue(u32 id, u32 spuq, u32 spuq_num) { sys_spu.warning("sys_spu_thread_bind_queue(id=0x%x, spuq=0x%x, spuq_num=0x%x)", id, spuq, spuq_num); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); const auto queue = idm::get(spuq); - if (!thread || !queue) + if (UNLIKELY(!queue || !thread || !thread->group)) { return CELL_ESRCH; } @@ -1123,9 +1143,9 @@ error_code sys_spu_thread_unbind_queue(u32 id, u32 spuq_num) { sys_spu.warning("sys_spu_thread_unbind_queue(id=0x%x, spuq_num=0x%x)", id, spuq_num); - const auto thread = idm::get(id); + const auto thread = idm::get>(id); - if (!thread) + if (UNLIKELY(!thread || !thread->group)) { return CELL_ESRCH; } @@ -1251,16 +1271,27 @@ error_code sys_raw_spu_create(vm::ptr id, vm::ptr attr) // TODO: check number set by sys_spu_initialize() - const auto thread = idm::make_ptr(""); - - if (!thread) + if (!spu_thread::g_raw_spu_ctr.try_inc(5)) { return CELL_EAGAIN; } - thread->cpu_init(); + u32 index = 0; - *id = thread->index; + // Find free RawSPU ID + while (!spu_thread::g_raw_spu_id[index].try_inc(1)) + { + if (++index == 5) + index = 0; + } + + const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, 0x40000, vm::spu))}; + + const u32 tid = idm::make>(fmt::format("RawSPU[0x%x] Thread", index), ls_addr, nullptr, index, ""); + + spu_thread::g_raw_spu_id[index] = verify("RawSPU ID" HERE, tid); + + *id = index; return CELL_OK; } @@ -1269,9 +1300,9 @@ error_code sys_raw_spu_destroy(ppu_thread& ppu, u32 id) { sys_spu.warning("sys_raw_spu_destroy(id=%d)", id); - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } @@ -1320,8 +1351,7 @@ error_code sys_raw_spu_destroy(ppu_thread& ppu, u32 id) idm::remove(pair.second); } - idm::remove(thread->id); - + idm::remove>(thread->id); return CELL_OK; } @@ -1340,9 +1370,9 @@ error_code sys_raw_spu_create_interrupt_tag(u32 id, u32 class_id, u32 hwthread, { std::shared_ptr result; - auto thread = idm::check_unlocked(id); + auto thread = idm::check_unlocked>(spu_thread::find_raw_spu(id)); - if (!thread) + if (!thread || thread->group) { error = CELL_ESRCH; return result; @@ -1379,9 +1409,9 @@ error_code sys_raw_spu_set_int_mask(u32 id, u32 class_id, u64 mask) return CELL_EINVAL; } - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } @@ -1400,9 +1430,9 @@ error_code sys_raw_spu_get_int_mask(u32 id, u32 class_id, vm::ptr mask) return CELL_EINVAL; } - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } @@ -1421,9 +1451,9 @@ error_code sys_raw_spu_set_int_stat(u32 id, u32 class_id, u64 stat) return CELL_EINVAL; } - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } @@ -1442,9 +1472,9 @@ error_code sys_raw_spu_get_int_stat(u32 id, u32 class_id, vm::ptr stat) return CELL_EINVAL; } - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } @@ -1458,9 +1488,9 @@ error_code sys_raw_spu_read_puint_mb(u32 id, vm::ptr value) { sys_spu.trace("sys_raw_spu_read_puint_mb(id=%d, value=*0x%x)", id, value); - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } @@ -1479,9 +1509,9 @@ error_code sys_raw_spu_set_spu_cfg(u32 id, u32 value) fmt::throw_exception("Unexpected value (0x%x)" HERE, value); } - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } @@ -1495,9 +1525,9 @@ error_code sys_raw_spu_get_spu_cfg(u32 id, vm::ptr value) { sys_spu.trace("sys_raw_spu_get_spu_afg(id=%d, value=*0x%x)", id, value); - const auto thread = idm::get(id); + const auto thread = idm::get>(spu_thread::find_raw_spu(id)); - if (!thread) + if (UNLIKELY(!thread || thread->group)) { return CELL_ESRCH; } diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.h b/rpcs3/Emu/Cell/lv2/sys_spu.h index 7d192c3164..fd96649a48 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.h +++ b/rpcs3/Emu/Cell/lv2/sys_spu.h @@ -1,6 +1,7 @@ #pragma once #include "sys_event.h" +#include "Emu/Cell/SPUThread.h" enum : s32 { @@ -214,8 +215,6 @@ enum : u32 SPU_TGJSF_GROUP_EXIT = (1 << 2), // set if SPU Thread Group is terminated by sys_spu_thread_group_exit }; -class SPUThread; - struct lv2_spu_group { static const u32 id_base = 1; // Wrong? @@ -236,7 +235,7 @@ struct lv2_spu_group atomic_t join_state; // flags used to detect exit cause cond_variable cv; // used to signal waiting PPU thread - std::array, 256> threads; // SPU Threads + std::array>, 256> threads; // SPU Threads std::array>, 256> imgs; // SPU Images std::array, 256> args; // SPU Thread Arguments diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index ec4e5aeea8..f9f53ec5fb 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -114,7 +114,7 @@ struct lv2_obj } // Remove the current thread from the scheduling queue, register timeout - static void sleep_timeout(old_thread&, u64 timeout); + static void sleep_timeout(cpu_thread&, u64 timeout); static void sleep(cpu_thread& thread, u64 timeout = 0) { @@ -224,7 +224,7 @@ private: static std::deque g_pending; // Scheduler queue for timeouts (wait until -> thread) - static std::deque> g_waiting; + static std::deque> g_waiting; static void schedule_all(); }; diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.cpp b/rpcs3/Emu/Cell/lv2/sys_timer.cpp index e59f00d4fa..9603e61e30 100644 --- a/rpcs3/Emu/Cell/lv2/sys_timer.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_timer.cpp @@ -11,13 +11,11 @@ #include - - LOG_CHANNEL(sys_timer); extern u64 get_system_time(); -void lv2_timer::on_task() +void lv2_timer_context::operator()() { while (!Emu.IsStopped()) { @@ -50,7 +48,6 @@ void lv2_timer::on_task() } // TODO: use single global dedicated thread for busy waiting, no timer threads - lv2_obj::sleep_timeout(*this, next - _now); thread_ctrl::wait_for(next - _now); } else if (_state == SYS_TIMER_STATE_STOP) @@ -64,19 +61,17 @@ void lv2_timer::on_task() } } -void lv2_timer::on_stop() +void lv2_timer_context::on_abort() { // Signal thread using invalid state state = -1; - notify(); - join(); } error_code sys_timer_create(vm::ptr timer_id) { sys_timer.warning("sys_timer_create(timer_id=*0x%x)", timer_id); - if (const u32 id = idm::make()) + if (const u32 id = idm::make("Timer Thread")) { *timer_id = id; return CELL_OK; @@ -155,7 +150,7 @@ error_code _sys_timer_start(u32 timer_id, u64 base_time, u64 period) const auto timer = idm::check(timer_id, [&](lv2_timer& timer) -> CellError { - std::lock_guard lock(timer.mutex); + std::unique_lock lock(timer.mutex); if (timer.state != SYS_TIMER_STATE_STOP) { @@ -171,7 +166,9 @@ error_code _sys_timer_start(u32 timer_id, u64 base_time, u64 period) timer.expire = base_time ? base_time : start_time + period; timer.period = period; timer.state = SYS_TIMER_STATE_RUN; - timer.notify(); + + lock.unlock(); + thread_ctrl::notify(timer); return {}; }); @@ -311,6 +308,11 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time) while (sleep_time >= passed) { + if (ppu.is_stopped()) + { + return 0; + } + remaining = sleep_time - passed; if (remaining > host_min_quantum) diff --git a/rpcs3/Emu/Cell/lv2/sys_timer.h b/rpcs3/Emu/Cell/lv2/sys_timer.h index 15f058917f..b24a4d16ae 100644 --- a/rpcs3/Emu/Cell/lv2/sys_timer.h +++ b/rpcs3/Emu/Cell/lv2/sys_timer.h @@ -17,12 +17,12 @@ struct sys_timer_information_t be_t pad; }; -struct lv2_timer final : public lv2_obj, public old_thread +struct lv2_timer_context : lv2_obj { static const u32 id_base = 0x11000000; - void on_task() override; - void on_stop() override; + void operator()(); + void on_abort(); semaphore<> mutex; atomic_t state{SYS_TIMER_STATE_STOP}; @@ -36,6 +36,8 @@ struct lv2_timer final : public lv2_obj, public old_thread atomic_t period{0}; // Period (oneshot if 0) }; +using lv2_timer = named_thread; + class ppu_thread; // Syscalls diff --git a/rpcs3/Emu/IdManager.cpp b/rpcs3/Emu/IdManager.cpp index 9f9f355c26..19f42c2224 100644 --- a/rpcs3/Emu/IdManager.cpp +++ b/rpcs3/Emu/IdManager.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "IdManager.h" +#include "Utilities/Thread.h" shared_mutex id_manager::g_mutex; @@ -23,7 +24,7 @@ id_manager::id_map::pointer idm::allocate_id(const id_manager::id_key& info, u32 if (_next >= base && _next < base + step * count) { g_id = _next; - vec.emplace_back(id_manager::id_key(_next, info.type(), info.on_stop()), nullptr); + vec.emplace_back(id_manager::id_key(_next, info.type()), nullptr); return &vec.back(); } } @@ -37,7 +38,7 @@ id_manager::id_map::pointer idm::allocate_id(const id_manager::id_key& info, u32 if (!ptr->second) { g_id = next; - ptr->first = id_manager::id_key(next, info.type(), info.on_stop()); + ptr->first = id_manager::id_key(next, info.type()); return ptr; } } @@ -60,12 +61,8 @@ void idm::clear() { for (auto& pair : map) { - if (auto ptr = pair.second.get()) - { - pair.first.on_stop()(ptr); - pair.second.reset(); - pair.first = {}; - } + pair.second.reset(); + pair.first = {}; } map.clear(); @@ -82,13 +79,8 @@ void fxm::init() void fxm::clear() { // Call recorded finalization functions for all IDs - for (auto& pair : g_vec) + for (auto& val : g_vec) { - if (auto ptr = pair.second.get()) - { - pair.first(ptr); - pair.second.reset(); - pair.first = nullptr; - } + val.reset(); } } diff --git a/rpcs3/Emu/IdManager.h b/rpcs3/Emu/IdManager.h index d7b093133c..7e536b9265 100644 --- a/rpcs3/Emu/IdManager.h +++ b/rpcs3/Emu/IdManager.h @@ -35,46 +35,6 @@ namespace id_manager static_assert(u64{step} * count + base < UINT32_MAX, "ID traits: invalid object range"); }; - // Optional object initialization function (called after ID registration) - template - struct on_init - { - static inline void func(T*, const std::shared_ptr&) - { - // Forbid forward declarations - static constexpr auto size = sizeof(std::conditional_t::value, void*, T>); - } - }; - - template - struct on_init().on_init(std::declval&>()))> - { - static inline void func(T* ptr, const std::shared_ptr& _ptr) - { - if (ptr) ptr->on_init(_ptr); - } - }; - - // Optional object finalization function (called after ID removal) - template - struct on_stop - { - static inline void func(T*) - { - // Forbid forward declarations - static constexpr auto size = sizeof(std::conditional_t::value, void*, T>); - } - }; - - template - struct on_stop().on_stop())> - { - static inline void func(T* ptr) - { - if (ptr) ptr->on_stop(); - } - }; - // Correct usage testing template struct id_verify : std::integral_constant::value> @@ -118,16 +78,6 @@ namespace id_manager { return add_type(0); } - - // Get type finalizer - template - static inline auto get_stop() - { - return [](void* ptr) -> void - { - return id_manager::on_stop::func(static_cast(ptr)); - }; - } }; template @@ -138,15 +88,13 @@ namespace id_manager { u32 m_value; // ID value u32 m_type; // True object type - void (*m_stop)(void*); // Finalizer public: id_key() = default; - id_key(u32 value, u32 type, void (*stop)(void*)) + id_key(u32 value, u32 type) : m_value(value) , m_type(type) - , m_stop(stop) { } @@ -160,11 +108,6 @@ namespace id_manager return m_type; } - auto on_stop() const - { - return m_stop; - } - operator u32() const { return m_value; @@ -301,7 +244,7 @@ class idm static_assert(id_manager::id_verify::value, "Invalid ID type combination"); // ID info - const id_manager::id_key info{get_type(), get_type(), id_manager::typeinfo::get_stop()}; + const id_manager::id_key info{get_type(), get_type()}; // ID traits using traits = id_manager::id_traits; @@ -342,7 +285,6 @@ public: { if (auto pair = create_id([&] { return std::make_shared(std::forward(args)...); })) { - id_manager::on_init::func(static_cast(pair->second.get()), pair->second); return {pair->second, static_cast(pair->second.get())}; } @@ -355,7 +297,6 @@ public: { if (auto pair = create_id([&] { return std::make_shared(std::forward(args)...); })) { - id_manager::on_init::func(static_cast(pair->second.get()), pair->second); return pair->first; } @@ -368,7 +309,6 @@ public: { if (auto pair = create_id([&] { return ptr; })) { - id_manager::on_init::func(static_cast(pair->second.get()), pair->second); return pair->first; } @@ -381,7 +321,6 @@ public: { if (auto pair = create_id(std::forward(provider))) { - id_manager::on_init::func(static_cast(pair->second.get()), pair->second); return pair->first; } @@ -572,7 +511,6 @@ public: } } - id_manager::on_stop::func(static_cast(ptr.get())); return true; } @@ -594,7 +532,6 @@ public: } } - id_manager::on_stop::func(static_cast(ptr.get())); return {ptr, static_cast(ptr.get())}; } @@ -612,8 +549,6 @@ public: { func(*_ptr); std::shared_ptr ptr = std::move(found->second); - lock.unlock(); - id_manager::on_stop::func(static_cast(ptr.get())); return {ptr, static_cast(ptr.get())}; } else @@ -627,8 +562,6 @@ public: } std::shared_ptr ptr = std::move(found->second); - lock.unlock(); - id_manager::on_stop::func(static_cast(ptr.get())); return {{ptr, static_cast(ptr.get())}, std::move(ret)}; } } @@ -641,7 +574,7 @@ public: class fxm { // Type Index -> Object. Use global since only one process is supported atm. - static std::vector>> g_vec; + static std::vector> g_vec; template static inline u32 get_type() @@ -664,14 +597,12 @@ public: { std::lock_guard lock(id_manager::g_mutex); - auto& pair = g_vec[get_type()]; + auto& cur = g_vec[get_type()]; - if (!pair.second) + if (!cur) { ptr = std::make_shared(std::forward(args)...); - - pair.first = id_manager::typeinfo::get_stop(); - pair.second = ptr; + cur = ptr; } else { @@ -679,7 +610,6 @@ public: } } - id_manager::on_init::func(ptr.get(), ptr); return ptr; } @@ -692,21 +622,13 @@ public: { std::lock_guard lock(id_manager::g_mutex); - auto& pair = g_vec[get_type()]; + auto& cur = g_vec[get_type()]; ptr = std::make_shared(std::forward(args)...); - old = std::move(pair.second); - - pair.first = id_manager::typeinfo::get_stop(); - pair.second = ptr; + old = std::move(cur); + cur = ptr; } - if (old) - { - id_manager::on_stop::func(static_cast(old.get())); - } - - id_manager::on_init::func(ptr.get(), ptr); return ptr; } @@ -718,16 +640,15 @@ public: { std::lock_guard lock(id_manager::g_mutex); - auto& pair = g_vec[get_type()]; + auto& cur = g_vec[get_type()]; - if (!pair.second) + if (!cur) { ptr = provider(); if (ptr) { - pair.first = id_manager::typeinfo::get_stop(); - pair.second = ptr; + cur = ptr; } } @@ -737,7 +658,6 @@ public: } } - id_manager::on_init::func(ptr.get(), ptr); return ptr; } @@ -750,16 +670,14 @@ public: { std::lock_guard lock(id_manager::g_mutex); - auto& pair = g_vec[get_type()]; + auto& cur = g_vec[get_type()]; ptr = provider(); if (ptr) { - old = std::move(pair.second); - - pair.first = id_manager::typeinfo::get_stop(); - pair.second = ptr; + old = std::move(cur); + cur = ptr; } else { @@ -767,12 +685,6 @@ public: } } - if (old) - { - id_manager::on_stop::func(static_cast(old.get())); - } - - id_manager::on_init::func(ptr.get(), ptr); return ptr; } @@ -784,22 +696,19 @@ public: { std::lock_guard lock(id_manager::g_mutex); - auto& pair = g_vec[get_type()]; + auto& old = g_vec[get_type()]; - if (auto& old = pair.second) + if (old) { return {old, static_cast(old.get())}; } else { ptr = std::make_shared(std::forward(args)...); - - pair.first = id_manager::typeinfo::get_stop(); - pair.second = ptr; + old = ptr; } } - id_manager::on_init::func(ptr.get(), ptr); return ptr; } @@ -807,7 +716,7 @@ public: template static inline T* check_unlocked() { - return static_cast(g_vec[get_type()].second.get()); + return static_cast(g_vec[get_type()].get()); } // Check whether the object exists @@ -825,7 +734,7 @@ public: { reader_lock lock(id_manager::g_mutex); - auto& ptr = g_vec[get_type()].second; + auto& ptr = g_vec[get_type()]; return {ptr, static_cast(ptr.get())}; } @@ -837,12 +746,7 @@ public: std::shared_ptr ptr; { std::lock_guard lock(id_manager::g_mutex); - ptr = std::move(g_vec[get_type()].second); - } - - if (ptr) - { - id_manager::on_stop::func(static_cast(ptr.get())); + ptr = std::move(g_vec[get_type()]); } return ptr.operator bool(); @@ -855,12 +759,7 @@ public: std::shared_ptr ptr; { std::lock_guard lock(id_manager::g_mutex); - ptr = std::move(g_vec[get_type()].second); - } - - if (ptr) - { - id_manager::on_stop::func(static_cast(ptr.get())); + ptr = std::move(g_vec[get_type()]); } return {ptr, static_cast(ptr.get())}; diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 6c5cff0bf6..67fde64b31 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -127,11 +127,6 @@ namespace vm void cleanup_unlock(cpu_thread& cpu) noexcept { - if (g_tls_locked && cpu.get() == thread_ctrl::get_current()) - { - g_tls_locked = nullptr; - } - for (u32 i = 0; i < g_locks.size(); i++) { if (g_locks[i] == &cpu) @@ -225,7 +220,7 @@ namespace vm { while (cpu_thread* ptr = lock) { - if (ptr->state & (cpu_flag::dbg_global_stop + cpu_flag::exit)) + if (ptr->is_stopped()) { break; } @@ -533,11 +528,21 @@ namespace vm } } + const u32 page_addr = addr + (this->flags & 0x10 ? 0x1000 : 0); + const u32 page_size = size - (this->flags & 0x10 ? 0x2000 : 0); + + if (this->flags & 0x10) + { + // Mark overflow/underflow guard pages as allocated + verify(HERE), !g_pages[addr / 4096].flags.exchange(page_allocated); + verify(HERE), !g_pages[addr / 4096 + size / 4096 - 1].flags.exchange(page_allocated); + } + // Map "real" memory pages - _page_map(addr, flags, size, shm.get()); + _page_map(page_addr, flags, page_size, shm.get()); // Add entry - m_map[addr] = std::move(shm); + m_map[addr] = std::make_pair(size, std::move(shm)); return true; } @@ -589,11 +594,11 @@ namespace vm vm::writer_lock lock(0); // Deallocate all memory - for (auto it = m_map.begin(), end = m_map.end(); it != end;) + for (auto it = m_map.begin(), end = m_map.end(); !m_common && it != end;) { const auto next = std::next(it); - const auto size = (next == end ? this->addr + this->size : next->first) - it->first; - _page_unmap(it->first, size, it->second.get()); + const auto size = it->second.first; + _page_unmap(it->first, size, it->second.second.get()); it = next; } @@ -614,7 +619,7 @@ namespace vm const u32 min_page_size = flags & 0x100 ? 0x1000 : 0x10000; // Align to minimal page size - const u32 size = ::align(orig_size, min_page_size); + const u32 size = ::align(orig_size, min_page_size) + (flags & 0x10 ? 0x2000 : 0); // Check alignment (it's page allocation, so passing small values there is just silly) if (align < min_page_size || align != (0x80000000u >> utils::cntlz32(align, true))) @@ -623,7 +628,7 @@ namespace vm } // Return if size is invalid - if (!size || size > this->size) + if (!orig_size || !size || size > this->size) { return 0; } @@ -654,7 +659,7 @@ namespace vm { if (try_alloc(addr, pflags, size, std::move(shm))) { - return addr; + return addr + (flags & 0x10 ? 0x1000 : 0); } } @@ -672,7 +677,7 @@ namespace vm const u32 size = ::align(orig_size, min_page_size); // return if addr or size is invalid - if (!size || size > this->size || addr < this->addr || addr + size - 1 > this->addr + this->size - 1) + if (!size || size > this->size || addr < this->addr || addr + size - 1 > this->addr + this->size - 1 || flags & 0x10) { return 0; } @@ -708,37 +713,42 @@ namespace vm u32 block_t::dealloc(u32 addr, const std::shared_ptr* src) { - u32 result = 0; { vm::writer_lock lock(0); - const auto found = m_map.find(addr); + const auto found = m_map.find(addr - (flags & 0x10 ? 0x1000 : 0)); if (found == m_map.end()) { return 0; } - if (src && found->second.get() != src->get()) + if (src && found->second.second.get() != src->get()) { return 0; } - // Approximate allocation size - const auto next = std::next(found); - const auto size = (next == m_map.end() ? this->addr + this->size : next->first) - found->first; + // Get allocation size + const auto size = found->second.first - (flags & 0x10 ? 0x2000 : 0); + + if (flags & 0x10) + { + // Clear guard pages + verify(HERE), g_pages[addr / 4096 - 1].flags.exchange(0) == page_allocated; + verify(HERE), g_pages[addr / 4096 + size / 4096].flags.exchange(0) == page_allocated; + } // Unmap "real" memory pages - result = _page_unmap(addr, size, found->second.get()); + verify(HERE), size == _page_unmap(addr, size, found->second.second.get()); // Remove entry m_map.erase(found); - } - return result; + return size; + } } - std::pair> block_t::get(u32 addr, u32 size) + std::pair> block_t::get(u32 addr, u32 size) { if (addr < this->addr || std::max(size, addr - this->addr + size) >= this->size) { @@ -769,12 +779,12 @@ namespace vm } // Range check - if (std::max(size, addr - found->first + size) > found->second->size()) + if (std::max(size, addr - found->first + size) > found->second.second->size()) { return {addr, nullptr}; } - return *found; + return {found->first, found->second.second}; } u32 block_t::imp_used(const vm::writer_lock&) @@ -783,7 +793,7 @@ namespace vm for (auto& entry : m_map) { - result += entry.second->size(); + result += entry.second.first - (flags & 0x10 ? 0x2000 : 0); } return result; @@ -967,7 +977,7 @@ namespace vm std::make_shared(0x20000000, 0x10000000, 0x201), // user 64k pages nullptr, // user 1m pages std::make_shared(0xC0000000, 0x10000000), // video - std::make_shared(0xD0000000, 0x10000000, 0x101), // stack + std::make_shared(0xD0000000, 0x10000000, 0x111), // stack std::make_shared(0xE0000000, 0x20000000), // SPU reserved }; } diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index d27ee13bc0..170de102da 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -146,7 +146,7 @@ namespace vm class block_t final { // Mapped regions: addr -> shm handle - std::map> m_map; + std::map>> m_map; // Common mapped region for special cases std::shared_ptr m_common; @@ -173,7 +173,7 @@ namespace vm u32 dealloc(u32 addr, const std::shared_ptr* = nullptr); // Get memory at specified address (if size = 0, addr assumed exact) - std::pair> get(u32 addr, u32 size = 0); + std::pair> get(u32 addr, u32 size = 0); // Internal u32 imp_used(const vm::writer_lock&); diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp index 8b3a23c173..c2e11ab610 100644 --- a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp +++ b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp @@ -7,6 +7,7 @@ #include "Emu/RSX/GSRender.h" #include +#include namespace rsx { @@ -42,7 +43,7 @@ namespace rsx { u32 fifo_size = 4; - // run through replay commands to figure out how big command buffer needs to be + // run through replay commands to figure out how big command buffer needs to be for (const auto& rc : frame->replay_commands) { const u32 count = (rc.rsx_command.first >> 18) & 0x7ff; @@ -203,7 +204,7 @@ namespace rsx } } - void rsx_replay_thread::cpu_task() + void rsx_replay_thread::on_task() { be_t context_id = allocate_context(); @@ -284,7 +285,18 @@ namespace rsx // random pause to not destroy gpu std::this_thread::sleep_for(10ms); } + } - state += cpu_flag::exit; + void rsx_replay_thread::operator()() + { + try + { + on_task(); + } + catch (const std::exception& e) + { + LOG_FATAL(RSX, "%s thrown: %s", typeid(e).name(), e.what()); + Emu.Pause(); + } } } diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.h b/rpcs3/Emu/RSX/Capture/rsx_replay.h index c2bfdded30..1bccfb5924 100644 --- a/rpcs3/Emu/RSX/Capture/rsx_replay.h +++ b/rpcs3/Emu/RSX/Capture/rsx_replay.h @@ -210,7 +210,7 @@ namespace rsx }; - class rsx_replay_thread : public ppu_thread + class rsx_replay_thread { struct rsx_context { @@ -236,9 +236,12 @@ namespace rsx public: rsx_replay_thread(std::unique_ptr&& frame_data) - : ppu_thread("Rsx Capture Replay Thread"), frame(std::move(frame_data)) {}; + :frame(std::move(frame_data)) + { + } - virtual void cpu_task() override; + void on_task(); + void operator()(); private: be_t allocate_context(); std::vector alloc_write_fifo(be_t context_id); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index ad843903bf..9263f77d78 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -134,6 +134,11 @@ namespace } } +u64 D3D12GSRender::get_cycles() +{ + return thread_ctrl::get_cycles(static_cast&>(*this)); +} + D3D12GSRender::D3D12GSRender() : GSRender() , m_d3d12_lib() diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 5478f7763e..5d2f3a08cb 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -121,6 +121,7 @@ private: ComPtr m_current_sampler_descriptors; public: + u64 get_cycles() override final; D3D12GSRender(); virtual ~D3D12GSRender(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 7f5b0e868b..71a39e7c5b 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -22,6 +22,11 @@ namespace } } +u64 GLGSRender::get_cycles() +{ + return thread_ctrl::get_cycles(static_cast&>(*this)); +} + GLGSRender::GLGSRender() : GSRender() { m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.6")); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 29b359e655..8d5eb06f58 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -346,6 +346,7 @@ private: std::vector m_scratch_buffer; public: + u64 get_cycles() override final; GLGSRender(); private: diff --git a/rpcs3/Emu/RSX/Null/NullGSRender.cpp b/rpcs3/Emu/RSX/Null/NullGSRender.cpp index 48f2f51f3d..464ca7581a 100644 --- a/rpcs3/Emu/RSX/Null/NullGSRender.cpp +++ b/rpcs3/Emu/RSX/Null/NullGSRender.cpp @@ -2,6 +2,11 @@ #include "NullGSRender.h" #include "Emu/System.h" +u64 NullGSRender::get_cycles() +{ + return thread_ctrl::get_cycles(static_cast&>(*this)); +} + NullGSRender::NullGSRender() : GSRender() { } diff --git a/rpcs3/Emu/RSX/Null/NullGSRender.h b/rpcs3/Emu/RSX/Null/NullGSRender.h index 34adb02ada..2bfdd2e247 100644 --- a/rpcs3/Emu/RSX/Null/NullGSRender.h +++ b/rpcs3/Emu/RSX/Null/NullGSRender.h @@ -1,11 +1,12 @@ #pragma once #include "Emu/RSX/GSRender.h" -class NullGSRender final : public GSRender +class NullGSRender : public GSRender { public: + u64 get_cycles() override final; NullGSRender(); private: - bool do_method(u32 cmd, u32 value) override; + bool do_method(u32 cmd, u32 value) override final; }; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp index 12e9e7f5e3..88245bc780 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp @@ -230,7 +230,6 @@ namespace rsx u32 ppus{0}; u32 spus{0}; - u32 rawspus{0}; f32 cpu_usage{-1.f}; u32 total_threads{0}; @@ -260,16 +259,20 @@ namespace rsx } case detail_level::medium: { - ppus = idm::select([&ppu_cycles](u32, ppu_thread& ppu) { ppu_cycles += ppu.get()->get_cycles(); }); + ppus = idm::select>([&ppu_cycles](u32, named_thread& ppu) + { + ppu_cycles += thread_ctrl::get_cycles(ppu); + }); - spus = idm::select([&spu_cycles](u32, SPUThread& spu) { spu_cycles += spu.get()->get_cycles(); }); - - rawspus = idm::select([&spu_cycles](u32, RawSPUThread& rawspu) { spu_cycles += rawspu.get()->get_cycles(); }); + spus = idm::select>([&spu_cycles](u32, named_thread& spu) + { + spu_cycles += thread_ctrl::get_cycles(spu); + }); if (!rsx_thread) rsx_thread = fxm::get(); - rsx_cycles += rsx_thread->get()->get_cycles(); + rsx_cycles += rsx_thread->get_cycles(); total_cycles = ppu_cycles + spu_cycles + rsx_cycles; cpu_usage = m_cpu_stats.get_usage(); @@ -329,7 +332,7 @@ namespace rsx " Total : %04.1f %% (%2u)\n\n" "%s\n" " RSX : %02u %%", - fps, frametime, std::string(title1_high.size(), ' '), ppu_usage, ppus, spu_usage, spus + rawspus, rsx_usage, cpu_usage, total_threads, std::string(title2.size(), ' '), rsx_load); + fps, frametime, std::string(title1_high.size(), ' '), ppu_usage, ppus, spu_usage, spus, rsx_usage, cpu_usage, total_threads, std::string(title2.size(), ' '), rsx_load); break; } } diff --git a/rpcs3/Emu/RSX/Overlays/overlays.h b/rpcs3/Emu/RSX/Overlays/overlays.h index e70d118b4e..f6353187c1 100644 --- a/rpcs3/Emu/RSX/Overlays/overlays.h +++ b/rpcs3/Emu/RSX/Overlays/overlays.h @@ -977,13 +977,13 @@ namespace rsx this->on_close = on_close; if (interactive) { - thread_ctrl::make_shared("dialog input thread", [&] + thread_ctrl::spawn("dialog input thread", [&] { if (auto error = run_input_loop()) { LOG_ERROR(RSX, "Dialog input loop exited with error code=%d", error); } - })->detach(); + }); } return CELL_OK; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index c371b2853a..2d1a7d7bc1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include class GSRender; @@ -367,13 +368,36 @@ namespace rsx } } - void thread::on_spawn() + void thread::operator()() { - m_rsx_thread = std::this_thread::get_id(); + try + { + // Wait for startup (TODO) + while (m_rsx_thread_exiting) + { + thread_ctrl::wait_for(1000); + + if (Emu.IsStopped()) + { + return; + } + } + + on_task(); + } + catch (const std::exception& e) + { + LOG_FATAL(RSX, "%s thrown: %s", typeid(e).name(), e.what()); + Emu.Pause(); + } + + on_exit(); } void thread::on_task() { + m_rsx_thread = std::this_thread::get_id(); + if (supports_native_ui) { m_overlay_manager = fxm::make_always(); @@ -406,7 +430,7 @@ namespace rsx last_flip_time = get_system_time() - 1000000; - thread_ctrl::spawn(m_vblank_thread, "VBlank Thread", [this]() + named_thread vblank_thread("VBlank Thread", [this]() { const u64 start_time = get_system_time(); @@ -428,7 +452,7 @@ namespace rsx { ppu_cmd::sleep, 0 } }); - intr_thread->notify(); + thread_ctrl::notify(*intr_thread); } continue; @@ -441,7 +465,7 @@ namespace rsx } }); - thread_ctrl::spawn(m_decompiler_thread, "RSX Decompiler Thread", [this] + named_thread decompiler_thread ("RSX Decompiler Thread", [this] { if (g_cfg.video.disable_asynchronous_shader_compiler) { @@ -1000,22 +1024,6 @@ namespace rsx void thread::on_exit() { m_rsx_thread_exiting = true; - if (m_vblank_thread) - { - m_vblank_thread->join(); - m_vblank_thread.reset(); - } - - if (m_decompiler_thread) - { - m_decompiler_thread->join(); - m_decompiler_thread.reset(); - } - } - - std::string thread::get_name() const - { - return "rsx::thread"; } void thread::fill_scale_offset_data(void *buffer, bool flip_y) const @@ -2179,10 +2187,8 @@ namespace rsx memset(display_buffers, 0, sizeof(display_buffers)); - m_rsx_thread_exiting = false; - on_init_rsx(); - start_thread(fxm::get()); + m_rsx_thread_exiting = false; } GcmTileInfo *thread::find_tile(u32 offset, u32 location) @@ -2908,7 +2914,7 @@ namespace rsx { ppu_cmd::sleep, 0 } }); - intr_thread->notify(); + thread_ctrl::notify(*intr_thread); } sys_rsx_context_attribute(0x55555555, 0xFEC, buffer, 0, 0, 0); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index a3a69a087b..af6753cceb 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -361,11 +361,8 @@ namespace rsx struct sampled_image_descriptor_base; - class thread : public old_thread + class thread { - std::shared_ptr m_vblank_thread; - std::shared_ptr m_decompiler_thread; - u64 timestamp_ctrl = 0; u64 timestamp_subvalue = 0; @@ -434,7 +431,7 @@ namespace rsx void capture_frame(const std::string &name); public: - std::shared_ptr intr_thread; + std::shared_ptr> intr_thread; // I hate this flag, but until hle is closer to lle, its needed bool isHLE{ false }; @@ -516,13 +513,14 @@ namespace rsx bool zcull_rendering_enabled = false; bool zcull_pixel_cnt_enabled = false; + void operator()(); + virtual u64 get_cycles() = 0; + protected: thread(); virtual ~thread(); - - virtual void on_spawn() override; - virtual void on_task() override; - virtual void on_exit() override; + virtual void on_task(); + virtual void on_exit(); /** * Execute a backend local task queue @@ -534,11 +532,6 @@ namespace rsx virtual bool on_decompiler_task() { return false; } public: - virtual std::string get_name() const override; - - virtual void on_init(const std::shared_ptr&) override {} // disable start() (TODO) - virtual void on_stop() override {} // disable join() - virtual void begin(); virtual void end(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index ca8a2824aa..8757e2437a 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -502,6 +502,11 @@ namespace } } +u64 VKGSRender::get_cycles() +{ + return thread_ctrl::get_cycles(static_cast&>(*this)); +} + VKGSRender::VKGSRender() : GSRender() { u32 instance_handle = m_thread_context.createInstance("RPCS3"); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 9b5ebdfe2b..ac30ccc049 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -360,7 +360,7 @@ private: s64 m_flip_time = 0; std::vector m_draw_buffers; - + shared_mutex m_flush_queue_mutex; flush_request_task m_flush_requests; @@ -377,6 +377,7 @@ private: #endif public: + u64 get_cycles() override final; VKGSRender(); ~VKGSRender(); diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index fd46fcb4ad..db9fa82280 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -1133,7 +1133,7 @@ namespace rsx { ppu_cmd::sleep, 0 } }); - rsx->intr_thread->notify(); + thread_ctrl::notify(*rsx->intr_thread); } } diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index afdc7d2159..5874a963c8 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -486,9 +486,7 @@ bool Emulator::BootRsxCapture(const std::string& path) GetCallbacks().on_run(); m_state = system_state::running; - auto&& rsxcapture = idm::make_ptr(std::move(frame)); - rsxcapture->run(); - + fxm::make(std::move(frame)); return true; } @@ -736,7 +734,7 @@ void Emulator::Load(bool add_only) // Workaround for analyser glitches vm::falloc(0x10000, 0xf0000, vm::main); - return thread_ctrl::make_shared("SPRX Loader", [this] + return thread_ctrl::spawn("SPRX Loader", [this] { std::vector dir_queue; dir_queue.emplace_back(m_path + '/'); @@ -744,7 +742,7 @@ void Emulator::Load(bool add_only) std::vector> file_queue; file_queue.reserve(2000); - std::queue> thread_queue; + std::queue>> thread_queue; const uint max_threads = std::thread::hardware_concurrency(); // Initialize progress dialog @@ -820,12 +818,12 @@ void Emulator::Load(bool add_only) std::this_thread::sleep_for(10ms); } - thread_queue.emplace(thread_ctrl::make_shared("Worker " + std::to_string(thread_queue.size()), [_prx = std::move(prx)] + thread_queue.emplace("Worker " + std::to_string(thread_queue.size()), [_prx = std::move(prx)] { ppu_initialize(*_prx); ppu_unload_prx(*_prx); g_progr_fdone++; - })); + }); continue; } @@ -846,7 +844,7 @@ void Emulator::Load(bool add_only) { Emu.Stop(); }); - })->detach(); + }); } // Detect boot location @@ -1237,12 +1235,12 @@ void Emulator::Run() auto on_select = [](u32, cpu_thread& cpu) { - cpu.run(); + cpu.state -= cpu_flag::stop; + cpu.notify(); }; - idm::select(on_select); - idm::select(on_select); - idm::select(on_select); + idm::select>(on_select); + idm::select>(on_select); #ifdef WITH_GDB_DEBUGGER // Initialize debug server at the end of emu run sequence @@ -1273,9 +1271,8 @@ bool Emulator::Pause() cpu.state += cpu_flag::dbg_global_pause; }; - idm::select(on_select); - idm::select(on_select); - idm::select(on_select); + idm::select>(on_select); + idm::select>(on_select); return true; } @@ -1338,9 +1335,8 @@ void Emulator::Resume() cpu.notify(); }; - idm::select(on_select); - idm::select(on_select); - idm::select(on_select); + idm::select>(on_select); + idm::select>(on_select); GetCallbacks().on_resume(); } @@ -1369,23 +1365,14 @@ void Emulator::Stop(bool restart) fxm::remove(); #endif - auto e_stop = std::make_exception_ptr(cpu_flag::dbg_global_stop); - auto on_select = [&](u32, cpu_thread& cpu) { cpu.state += cpu_flag::dbg_global_stop; - - // Can't normally be null. - // Hack for a possible vm deadlock on thread creation. - if (auto thread = cpu.get()) - { - thread->set_exception(e_stop); - } + cpu.notify(); }; - idm::select(on_select); - idm::select(on_select); - idm::select(on_select); + idm::select>(on_select); + idm::select>(on_select); LOG_NOTICE(GENERAL, "All threads signaled..."); diff --git a/rpcs3/rpcs3_app.cpp b/rpcs3/rpcs3_app.cpp index 559acdb38d..00a43c6961 100644 --- a/rpcs3/rpcs3_app.cpp +++ b/rpcs3/rpcs3_app.cpp @@ -234,13 +234,13 @@ void rpcs3_app::InitializeCallbacks() { switch (video_renderer type = g_cfg.video.renderer) { - case video_renderer::null: return std::make_shared(); - case video_renderer::opengl: return std::make_shared(); + case video_renderer::null: return std::make_shared>("rsx::thread"); + case video_renderer::opengl: return std::make_shared>("rsx::thread"); #if defined(_WIN32) || defined(HAVE_VULKAN) - case video_renderer::vulkan: return std::make_shared(); + case video_renderer::vulkan: return std::make_shared>("rsx::thread"); #endif #ifdef _MSC_VER - case video_renderer::dx12: return std::make_shared(); + case video_renderer::dx12: return std::make_shared>("rsx::thread"); #endif default: fmt::throw_exception("Invalid video renderer: %s" HERE, type); } diff --git a/rpcs3/rpcs3qt/breakpoint_list.cpp b/rpcs3/rpcs3qt/breakpoint_list.cpp index e76059f39f..08aba88a86 100644 --- a/rpcs3/rpcs3qt/breakpoint_list.cpp +++ b/rpcs3/rpcs3qt/breakpoint_list.cpp @@ -17,7 +17,7 @@ breakpoint_list::breakpoint_list(QWidget* parent, breakpoint_handler* handler) : connect(this, &QListWidget::customContextMenuRequested, this, &breakpoint_list::OnBreakpointListRightClicked); } -/** +/** * It's unfortunate I need a method like this to sync these. Should ponder a cleaner way to do this. */ void breakpoint_list::UpdateCPUData(std::weak_ptr cpu, std::shared_ptr disasm) @@ -60,7 +60,7 @@ void breakpoint_list::AddBreakpoint(u32 pc) m_breakpoint_handler->AddBreakpoint(pc); const auto cpu = this->cpu.lock(); - const u32 cpu_offset = cpu->id_type() != 1 ? static_cast(*cpu).offset : 0; + const u32 cpu_offset = cpu->id_type() != 1 ? static_cast(*cpu).offset : 0; m_disasm->offset = (u8*)vm::base(cpu_offset); m_disasm->disasm(m_disasm->dump_pc = pc); diff --git a/rpcs3/rpcs3qt/debugger_frame.cpp b/rpcs3/rpcs3qt/debugger_frame.cpp index 3ddc90a7ed..68c56869af 100644 --- a/rpcs3/rpcs3qt/debugger_frame.cpp +++ b/rpcs3/rpcs3qt/debugger_frame.cpp @@ -258,7 +258,7 @@ u32 debugger_frame::GetPc() const return 0; } - return cpu->id_type() == 1 ? static_cast(cpu.get())->cia : static_cast(cpu.get())->pc; + return cpu->id_type() == 1 ? static_cast(cpu.get())->cia : static_cast(cpu.get())->pc; } void debugger_frame::UpdateUI() @@ -340,9 +340,8 @@ void debugger_frame::UpdateUnitList() { const QSignalBlocker blocker(m_choice_units); - idm::select(on_select); - idm::select(on_select); - idm::select(on_select); + idm::select>(on_select); + idm::select>(on_select); } OnSelectUnit(); @@ -369,21 +368,16 @@ void debugger_frame::OnSelectUnit() return data == &cpu; }; - if (auto ppu = idm::select(on_select)) + if (auto ppu = idm::select>(on_select)) { m_disasm = std::make_unique(CPUDisAsm_InterpreterMode); cpu = ppu.ptr; } - else if (auto spu1 = idm::select(on_select)) + else if (auto spu1 = idm::select>(on_select)) { m_disasm = std::make_unique(CPUDisAsm_InterpreterMode); cpu = spu1.ptr; } - else if (auto rspu = idm::select(on_select)) - { - m_disasm = std::make_unique(CPUDisAsm_InterpreterMode); - cpu = rspu.ptr; - } } m_debugger_list->UpdateCPUData(this->cpu, m_disasm); @@ -540,7 +534,7 @@ u64 debugger_frame::EvaluateExpression(const QString& expression) } else { - auto spu = static_cast(thread.get()); + auto spu = static_cast(thread.get()); for (int i = 0; i < 128; ++i) { diff --git a/rpcs3/rpcs3qt/debugger_list.cpp b/rpcs3/rpcs3qt/debugger_list.cpp index 5aa6ca590b..d099d0be7b 100644 --- a/rpcs3/rpcs3qt/debugger_list.cpp +++ b/rpcs3/rpcs3qt/debugger_list.cpp @@ -37,7 +37,7 @@ u32 debugger_list::GetPc() const return 0; } - return cpu->id_type() == 1 ? static_cast(cpu.get())->cia : static_cast(cpu.get())->pc; + return cpu->id_type() == 1 ? static_cast(cpu.get())->cia : static_cast(cpu.get())->pc; } u32 debugger_list::GetCenteredAddress(u32 address) const @@ -73,7 +73,7 @@ void debugger_list::ShowAddress(u32 addr) else { const bool is_spu = cpu->id_type() != 1; - const u32 cpu_offset = is_spu ? static_cast(*cpu).offset : 0; + const u32 cpu_offset = is_spu ? static_cast(*cpu).offset : 0; const u32 address_limits = is_spu ? 0x3ffff : ~0; m_pc &= address_limits; m_disasm->offset = (u8*)vm::base(cpu_offset); @@ -90,7 +90,7 @@ void debugger_list::ShowAddress(u32 addr) item(i)->setText((IsBreakpoint(m_pc) ? ">>> " : " ") + qstr(m_disasm->last_opcode)); - if (cpu->state & cpu_state_pause && m_pc == GetPc()) + if (cpu->is_paused() && m_pc == GetPc()) { item(i)->setTextColor(m_text_color_pc); item(i)->setBackgroundColor(m_color_pc); diff --git a/rpcs3/rpcs3qt/instruction_editor_dialog.cpp b/rpcs3/rpcs3qt/instruction_editor_dialog.cpp index fca254a64c..0076400ef3 100644 --- a/rpcs3/rpcs3qt/instruction_editor_dialog.cpp +++ b/rpcs3/rpcs3qt/instruction_editor_dialog.cpp @@ -17,7 +17,7 @@ instruction_editor_dialog::instruction_editor_dialog(QWidget *parent, u32 _pc, c setMinimumSize(300, sizeHint().height()); const auto cpu = _cpu.get(); - m_cpu_offset = cpu->id_type() != 1 ? static_cast(*cpu).offset : 0; + m_cpu_offset = cpu->id_type() != 1 ? static_cast(*cpu).offset : 0; QString instruction = qstr(fmt::format("%08x", vm::read32(m_cpu_offset + m_pc).value())); QVBoxLayout* vbox_panel(new QVBoxLayout()); diff --git a/rpcs3/rpcs3qt/kernel_explorer.cpp b/rpcs3/rpcs3qt/kernel_explorer.cpp index 69f2ce20ae..28d83812af 100644 --- a/rpcs3/rpcs3qt/kernel_explorer.cpp +++ b/rpcs3/rpcs3qt/kernel_explorer.cpp @@ -261,18 +261,18 @@ void kernel_explorer::Update() lv2_types.emplace_back(l_addTreeChild(root, "PPU Threads")); - idm::select([&](u32 id, ppu_thread& ppu) + idm::select>([&](u32 id, ppu_thread& ppu) { lv2_types.back().count++; - l_addTreeChild(lv2_types.back().node, qstr(fmt::format("PPU Thread: ID = 0x%08x '%s'", id, ppu.get_name()))); + l_addTreeChild(lv2_types.back().node, qstr(fmt::format("PPU Thread: ID = 0x%08x '%s'", id, ppu.ppu_name.get()))); }); lv2_types.emplace_back(l_addTreeChild(root, "SPU Threads")); - idm::select([&](u32 id, SPUThread& spu) + idm::select>([&](u32 id, spu_thread& spu) { lv2_types.back().count++; - l_addTreeChild(lv2_types.back().node, qstr(fmt::format("SPU Thread: ID = 0x%08x '%s'", id, spu.get_name()))); + l_addTreeChild(lv2_types.back().node, qstr(fmt::format("SPU Thread: ID = 0x%08x '%s'", id, spu.spu_name.get()))); }); lv2_types.emplace_back(l_addTreeChild(root, "SPU Thread Groups")); diff --git a/rpcs3/rpcs3qt/register_editor_dialog.cpp b/rpcs3/rpcs3qt/register_editor_dialog.cpp index a957ae8bf5..0242a160db 100644 --- a/rpcs3/rpcs3qt/register_editor_dialog.cpp +++ b/rpcs3/rpcs3qt/register_editor_dialog.cpp @@ -111,7 +111,7 @@ void register_editor_dialog::updateRegister(const QString& text) } else { - auto& spu = *static_cast(cpu.get()); + auto& spu = *static_cast(cpu.get()); std::string::size_type first_brk = reg.find('['); if (first_brk != std::string::npos) @@ -179,7 +179,7 @@ void register_editor_dialog::OnOkay(const std::shared_ptr& _cpu) } else { - auto& spu = *static_cast(cpu); + auto& spu = *static_cast(cpu); while (value.length() < 32) value = "0" + value; const auto first_brk = reg.find('[');