diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 29848bc4ae..abd557c89e 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -1,20 +1,91 @@ #pragma once -#include - union u128 { u64 _u64[2]; s64 _s64[2]; + + class u64_reversed_array_2 + { + u64 data[2]; + + public: + u64& operator [] (s32 index) + { + return data[1 - index]; + } + + const u64& operator [] (s32 index) const + { + return data[1 - index]; + } + + } u64r; + u32 _u32[4]; s32 _s32[4]; + + class u32_reversed_array_4 + { + u32 data[4]; + + public: + u32& operator [] (s32 index) + { + return data[3 - index]; + } + + const u32& operator [] (s32 index) const + { + return data[3 - index]; + } + + } u32r; + u16 _u16[8]; s16 _s16[8]; + + class u16_reversed_array_8 + { + u16 data[8]; + + public: + u16& operator [] (s32 index) + { + return data[7 - index]; + } + + const u16& operator [] (s32 index) const + { + return data[7 - index]; + } + + } u16r; + u8 _u8[16]; s8 _s8[16]; + + class u8_reversed_array_16 + { + u8 data[16]; + + public: + u8& operator [] (s32 index) + { + return data[15 - index]; + } + + const u8& operator [] (s32 index) const + { + return data[15 - index]; + } + + } u8r; + float _f[4]; double _d[2]; - __m128 xmm; + __m128 vf; + __m128i vi; class bit_array_128 { @@ -94,6 +165,11 @@ union u128 return ret; } + static u128 from64r(u64 _1, u64 _0 = 0) + { + return from64(_0, _1); + } + static u128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0) { u128 ret; @@ -105,12 +181,21 @@ union u128 } static u128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0) + { + return from32(_0, _1, _2, _3); + } + + static u128 from32p(u32 value) { u128 ret; - ret._u32[0] = _0; - ret._u32[1] = _1; - ret._u32[2] = _2; - ret._u32[3] = _3; + ret.vi = _mm_set1_epi32((int)value); + return ret; + } + + static u128 from8p(u8 value) + { + u128 ret; + ret.vi = _mm_set1_epi8((char)value); return ret; } @@ -121,9 +206,41 @@ union u128 return ret; } - void setBit(u32 bit) + static u128 fromV(__m128i value) { - _bit[bit] = true; + u128 ret; + ret.vi = value; + return ret; + } + + static __forceinline u128 add8(const u128& left, const u128& right) + { + return fromV(_mm_add_epi8(left.vi, right.vi)); + } + + static __forceinline u128 sub8(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi8(left.vi, right.vi)); + } + + static __forceinline u128 minu8(const u128& left, const u128& right) + { + return fromV(_mm_min_epu8(left.vi, right.vi)); + } + + static __forceinline u128 eq8(const u128& left, const u128& right) + { + return fromV(_mm_cmpeq_epi8(left.vi, right.vi)); + } + + static __forceinline u128 gtu8(const u128& left, const u128& right) + { + return fromV(_mm_cmpgt_epu8(left.vi, right.vi)); + } + + static __forceinline u128 leu8(const u128& left, const u128& right) + { + return fromV(_mm_cmple_epu8(left.vi, right.vi)); } bool operator == (const u128& right) const @@ -136,19 +253,19 @@ union u128 return (_u64[0] != right._u64[0]) || (_u64[1] != right._u64[1]); } - u128 operator | (const u128& right) const + __forceinline u128 operator | (const u128& right) const { - return from64(_u64[0] | right._u64[0], _u64[1] | right._u64[1]); + return fromV(_mm_or_si128(vi, right.vi)); } - u128 operator & (const u128& right) const + __forceinline u128 operator & (const u128& right) const { - return from64(_u64[0] & right._u64[0], _u64[1] & right._u64[1]); + return fromV(_mm_and_si128(vi, right.vi)); } - u128 operator ^ (const u128& right) const + __forceinline u128 operator ^ (const u128& right) const { - return from64(_u64[0] ^ right._u64[0], _u64[1] ^ right._u64[1]); + return fromV(_mm_xor_si128(vi, right.vi)); } u128 operator ~ () const @@ -156,6 +273,12 @@ union u128 return from64(~_u64[0], ~_u64[1]); } + // result = (~left) & (right) + static __forceinline u128 andnot(const u128& left, const u128& right) + { + return fromV(_mm_andnot_si128(left.vi, right.vi)); + } + void clear() { _u64[1] = _u64[0] = 0; @@ -180,6 +303,72 @@ union u128 } }; +#ifndef InterlockedCompareExchange +static __forceinline u128 InterlockedCompareExchange(volatile u128* dest, u128 exch, u128 comp) +{ +#if defined(__GNUG__) + auto res = __sync_val_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch); + return (u128&)res; +#else + _InterlockedCompareExchange128((volatile long long*)dest, exch._u64[1], exch._u64[0], (long long*)&comp); + return comp; +#endif +} +#endif + +static __forceinline bool InterlockedCompareExchangeTest(volatile u128* dest, u128 exch, u128 comp) +{ +#if defined(__GNUG__) + return __sync_bool_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch); +#else + return _InterlockedCompareExchange128((volatile long long*)dest, exch._u64[1], exch._u64[0], (long long*)&comp) != 0; +#endif +} + +#ifndef InterlockedExchange +static __forceinline u128 InterlockedExchange(volatile u128* dest, u128 value) +{ + while (true) + { + const u128 old = *(u128*)dest; + if (InterlockedCompareExchangeTest(dest, value, old)) return old; + } +} +#endif + +#ifndef InterlockedOr +static __forceinline u128 InterlockedOr(volatile u128* dest, u128 value) +{ + while (true) + { + const u128 old = *(u128*)dest; + if (InterlockedCompareExchangeTest(dest, old | value, old)) return old; + } +} +#endif + +#ifndef InterlockedAnd +static __forceinline u128 InterlockedAnd(volatile u128* dest, u128 value) +{ + while (true) + { + const u128 old = *(u128*)dest; + if (InterlockedCompareExchangeTest(dest, old & value, old)) return old; + } +} +#endif + +#ifndef InterlockedXor +static __forceinline u128 InterlockedXor(volatile u128* dest, u128 value) +{ + while (true) + { + const u128 old = *(u128*)dest; + if (InterlockedCompareExchangeTest(dest, old ^ value, old)) return old; + } +} +#endif + #define re16(val) _byteswap_ushort(val) #define re32(val) _byteswap_ulong(val) #define re64(val) _byteswap_uint64(val) diff --git a/Utilities/GNU.h b/Utilities/GNU.h index b1bcbfb982..91fc34ae50 100644 --- a/Utilities/GNU.h +++ b/Utilities/GNU.h @@ -1,5 +1,7 @@ #pragma once +#include + #ifdef _WIN32 #define thread_local __declspec(thread) #elif __APPLE__ @@ -44,23 +46,6 @@ void strcpy_trunc(char(&dst)[size], const char(&src)[rsize]) #define _byteswap_uint64(x) __builtin_bswap64(x) #define INFINITE 0xFFFFFFFF #define _CRT_ALIGN(x) __attribute__((aligned(x))) -#define InterlockedCompareExchange(ptr,new_val,old_val) __sync_val_compare_and_swap(ptr,old_val,new_val) -#define InterlockedExchange(ptr, value) __sync_lock_test_and_set(ptr, value) -#define InterlockedOr(ptr, value) __sync_fetch_and_or(ptr, value) -#define InterlockedAnd(ptr, value) __sync_fetch_and_and(ptr, value) -#define InterlockedXor(ptr, value) __sync_fetch_and_xor(ptr, value) - -//inline int64_t InterlockedOr64(volatile int64_t *dest, int64_t val) -//{ -// int64_t olderval; -// int64_t oldval = *dest; -// do -// { -// olderval = oldval; -// oldval = __sync_val_compare_and_swap(dest, olderval | val, olderval); -// } while (olderval != oldval); -// return oldval; -//} inline uint64_t __umulh(uint64_t a, uint64_t b) { @@ -97,95 +82,208 @@ int clock_gettime(int foo, struct timespec *ts); #ifndef InterlockedCompareExchange static __forceinline uint8_t InterlockedCompareExchange(volatile uint8_t* dest, uint8_t exch, uint8_t comp) { +#if defined(__GNUG__) + return __sync_val_compare_and_swap(dest, comp, exch); +#else return _InterlockedCompareExchange8((volatile char*)dest, exch, comp); +#endif } static __forceinline uint16_t InterlockedCompareExchange(volatile uint16_t* dest, uint16_t exch, uint16_t comp) { +#if defined(__GNUG__) + return __sync_val_compare_and_swap(dest, comp, exch); +#else return _InterlockedCompareExchange16((volatile short*)dest, exch, comp); +#endif } static __forceinline uint32_t InterlockedCompareExchange(volatile uint32_t* dest, uint32_t exch, uint32_t comp) { +#if defined(__GNUG__) + return __sync_val_compare_and_swap(dest, comp, exch); +#else return _InterlockedCompareExchange((volatile long*)dest, exch, comp); +#endif } static __forceinline uint64_t InterlockedCompareExchange(volatile uint64_t* dest, uint64_t exch, uint64_t comp) { +#if defined(__GNUG__) + return __sync_val_compare_and_swap(dest, comp, exch); +#else return _InterlockedCompareExchange64((volatile long long*)dest, exch, comp); +#endif } #endif +static __forceinline bool InterlockedCompareExchangeTest(volatile uint8_t* dest, uint8_t exch, uint8_t comp) +{ +#if defined(__GNUG__) + return __sync_bool_compare_and_swap(dest, comp, exch); +#else + return (uint8_t)_InterlockedCompareExchange8((volatile char*)dest, exch, comp) == comp; +#endif +} +static __forceinline bool InterlockedCompareExchangeTest(volatile uint16_t* dest, uint16_t exch, uint16_t comp) +{ +#if defined(__GNUG__) + return __sync_bool_compare_and_swap(dest, comp, exch); +#else + return (uint16_t)_InterlockedCompareExchange16((volatile short*)dest, exch, comp) == comp; +#endif +} +static __forceinline bool InterlockedCompareExchangeTest(volatile uint32_t* dest, uint32_t exch, uint32_t comp) +{ +#if defined(__GNUG__) + return __sync_bool_compare_and_swap(dest, comp, exch); +#else + return (uint32_t)_InterlockedCompareExchange((volatile long*)dest, exch, comp) == comp; +#endif +} +static __forceinline bool InterlockedCompareExchangeTest(volatile uint64_t* dest, uint64_t exch, uint64_t comp) +{ +#if defined(__GNUG__) + return __sync_bool_compare_and_swap(dest, comp, exch); +#else + return (uint64_t)_InterlockedCompareExchange64((volatile long long*)dest, exch, comp) == comp; +#endif +} + #ifndef InterlockedExchange static __forceinline uint8_t InterlockedExchange(volatile uint8_t* dest, uint8_t value) { +#if defined(__GNUG__) + return __sync_lock_test_and_set(dest, value); +#else return _InterlockedExchange8((volatile char*)dest, value); +#endif } static __forceinline uint16_t InterlockedExchange(volatile uint16_t* dest, uint16_t value) { +#if defined(__GNUG__) + return __sync_lock_test_and_set(dest, value); +#else return _InterlockedExchange16((volatile short*)dest, value); +#endif } static __forceinline uint32_t InterlockedExchange(volatile uint32_t* dest, uint32_t value) { +#if defined(__GNUG__) + return __sync_lock_test_and_set(dest, value); +#else return _InterlockedExchange((volatile long*)dest, value); +#endif } static __forceinline uint64_t InterlockedExchange(volatile uint64_t* dest, uint64_t value) { +#if defined(__GNUG__) + return __sync_lock_test_and_set(dest, value); +#else return _InterlockedExchange64((volatile long long*)dest, value); +#endif } #endif #ifndef InterlockedOr static __forceinline uint8_t InterlockedOr(volatile uint8_t* dest, uint8_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_or(dest, value); +#else return _InterlockedOr8((volatile char*)dest, value); +#endif } static __forceinline uint16_t InterlockedOr(volatile uint16_t* dest, uint16_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_or(dest, value); +#else return _InterlockedOr16((volatile short*)dest, value); +#endif } static __forceinline uint32_t InterlockedOr(volatile uint32_t* dest, uint32_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_or(dest, value); +#else return _InterlockedOr((volatile long*)dest, value); +#endif } static __forceinline uint64_t InterlockedOr(volatile uint64_t* dest, uint64_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_or(dest, value); +#else return _InterlockedOr64((volatile long long*)dest, value); +#endif } #endif #ifndef InterlockedAnd static __forceinline uint8_t InterlockedAnd(volatile uint8_t* dest, uint8_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_and(dest, value); +#else return _InterlockedAnd8((volatile char*)dest, value); +#endif } static __forceinline uint16_t InterlockedAnd(volatile uint16_t* dest, uint16_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_and(dest, value); +#else return _InterlockedAnd16((volatile short*)dest, value); +#endif } static __forceinline uint32_t InterlockedAnd(volatile uint32_t* dest, uint32_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_and(dest, value); +#else return _InterlockedAnd((volatile long*)dest, value); +#endif } static __forceinline uint64_t InterlockedAnd(volatile uint64_t* dest, uint64_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_and(dest, value); +#else return _InterlockedAnd64((volatile long long*)dest, value); +#endif } #endif #ifndef InterlockedXor static __forceinline uint8_t InterlockedXor(volatile uint8_t* dest, uint8_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_xor(dest, value); +#else return _InterlockedXor8((volatile char*)dest, value); +#endif } static __forceinline uint16_t InterlockedXor(volatile uint16_t* dest, uint16_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_xor(dest, value); +#else return _InterlockedXor16((volatile short*)dest, value); +#endif } static __forceinline uint32_t InterlockedXor(volatile uint32_t* dest, uint32_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_xor(dest, value); +#else return _InterlockedXor((volatile long*)dest, value); +#endif } static __forceinline uint64_t InterlockedXor(volatile uint64_t* dest, uint64_t value) { +#if defined(__GNUG__) + return __sync_fetch_and_xor(dest, value); +#else return _InterlockedXor64((volatile long long*)dest, value); +#endif } #endif @@ -222,3 +320,17 @@ static __forceinline uint64_t cntlz64(uint64_t arg) } #endif } + +// compare 16 packed unsigned bytes (greater than) +static __forceinline __m128i _mm_cmpgt_epu8(__m128i A, __m128i B) +{ + // (A xor 0x80) > (B xor 0x80) + return _mm_cmpgt_epi8(_mm_xor_si128(A, _mm_set1_epi8(-128)), _mm_xor_si128(B, _mm_set1_epi8(-128))); +} + +// compare 16 packed unsigned bytes (less or equal) +static __forceinline __m128i _mm_cmple_epu8(__m128i A, __m128i B) +{ + // ((B xor 0x80) > (A xor 0x80)) || A == B + return _mm_or_si128(_mm_cmpgt_epu8(B, A), _mm_cmpeq_epi8(A, B)); +} diff --git a/Utilities/SMutex.cpp b/Utilities/SMutex.cpp index 684429b60e..e10296bd68 100644 --- a/Utilities/SMutex.cpp +++ b/Utilities/SMutex.cpp @@ -8,15 +8,3 @@ bool SM_IsAborted() { return Emu.IsStopped(); } - -void SM_Sleep() -{ - if (NamedThreadBase* t = GetCurrentNamedThread()) - { - t->WaitForAnySignal(); - } - else - { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } -} diff --git a/Utilities/SMutex.h b/Utilities/SMutex.h index 88d22d7a68..5d50fe18e1 100644 --- a/Utilities/SMutex.h +++ b/Utilities/SMutex.h @@ -2,7 +2,6 @@ #include "Emu/Memory/atomic_type.h" bool SM_IsAborted(); -void SM_Sleep(); enum SMutexResult { @@ -20,8 +19,7 @@ template < typename T, const u64 free_value = 0, - const u64 dead_value = 0xffffffffffffffffull, - void (*wait)() = SM_Sleep + const u64 dead_value = 0xffffffffffffffffull > class SMutexBase { @@ -118,7 +116,7 @@ public: default: return res; } - if (wait != nullptr) wait(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); if (timeout && counter++ > timeout) { diff --git a/Utilities/SQueue.h b/Utilities/SQueue.h index 83d7adc3dd..7d140e3cdd 100644 --- a/Utilities/SQueue.h +++ b/Utilities/SQueue.h @@ -33,7 +33,7 @@ public: return false; } - SM_Sleep(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } @@ -60,7 +60,7 @@ public: return false; } - SM_Sleep(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } @@ -112,7 +112,7 @@ public: break; } - SM_Sleep(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index b24ad1036f..63f0e83078 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -1,4 +1,5 @@ #include "stdafx.h" +#include "Emu/System.h" #include "Log.h" #include "Thread.h" @@ -207,3 +208,58 @@ bool thread::joinable() const { return m_thr.joinable(); } + +bool waiter_map_t::is_stopped(u64 signal_id) +{ + if (Emu.IsStopped()) + { + LOG_WARNING(Log::HLE, "%s.waiter_op() aborted (signal_id=0x%llx)", m_name.c_str(), signal_id); + return true; + } + return false; +} + +waiter_map_t::waiter_reg_t::waiter_reg_t(waiter_map_t& map, u64 signal_id) + : signal_id(signal_id) + , thread(GetCurrentNamedThread()) + , map(map) +{ + std::lock_guard lock(map.m_mutex); + + // add waiter + map.m_waiters.push_back({ signal_id, thread }); +} + +waiter_map_t::waiter_reg_t::~waiter_reg_t() +{ + std::lock_guard lock(map.m_mutex); + + // remove waiter + for (size_t i = map.m_waiters.size() - 1; i >= 0; i--) + { + if (map.m_waiters[i].signal_id == signal_id && map.m_waiters[i].thread == thread) + { + map.m_waiters.erase(map.m_waiters.begin() + i); + return; + } + } + + LOG_ERROR(HLE, "%s(): waiter not found (signal_id=0x%llx, map='%s')", __FUNCTION__, signal_id, map.m_name.c_str()); + Emu.Pause(); +} + +void waiter_map_t::notify(u64 signal_id) +{ + if (!m_waiters.size()) return; + + std::lock_guard lock(m_mutex); + + // find waiter and signal + for (auto& v : m_waiters) + { + if (v.signal_id == signal_id) + { + v.thread->Notify(); + } + } +} diff --git a/Utilities/Thread.h b/Utilities/Thread.h index 2566cdf43b..3e793c523d 100644 --- a/Utilities/Thread.h +++ b/Utilities/Thread.h @@ -69,4 +69,56 @@ public: void detach(); void join(); bool joinable() const; -}; \ No newline at end of file +}; + +class waiter_map_t +{ + // TODO: optimize (use custom lightweight readers-writer lock) + std::mutex m_mutex; + + struct waiter_t + { + u64 signal_id; + NamedThreadBase* thread; + }; + + std::vector m_waiters; + + std::string m_name; + + struct waiter_reg_t + { + const u64 signal_id; + NamedThreadBase* const thread; + waiter_map_t& map; + + waiter_reg_t(waiter_map_t& map, u64 signal_id); + ~waiter_reg_t(); + }; + + bool is_stopped(u64 signal_id); + +public: + waiter_map_t(const char* name) : m_name(name) {} + + // wait until waiter_func() returns true, signal_id is an arbitrary number + template __forceinline void wait_op(u64 signal_id, const WT waiter_func) + { + // check condition + if (waiter_func()) return; + + // register waiter + waiter_reg_t waiter(*this, signal_id); + + while (true) + { + // wait for 1 ms or until signal arrived + waiter.thread->WaitForAnySignal(1); + if (is_stopped(signal_id)) break; + if (waiter_func()) break; + } + } + + // signal all threads waiting on waiter_op() with the same signal_id (signaling only hints those threads that corresponding conditions are *probably* met) + void notify(u64 signal_id); +}; diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index fac66da910..fea5e15400 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -28,6 +28,7 @@ CPUThread::CPUThread(CPUThreadType type) , m_is_branch(false) , m_status(Stopped) , m_last_syscall(0) + , m_trace_enabled(false) { } @@ -298,7 +299,7 @@ void _se_translator(unsigned int u, EXCEPTION_POINTERS* pExp) void CPUThread::Task() { - if (Ini.HLELogging.GetValue()) LOG_NOTICE(PPU, "%s enter", CPUThread::GetFName().c_str()); + if (Ini.HLELogging.GetValue()) LOG_NOTICE(GENERAL, "%s enter", CPUThread::GetFName().c_str()); const std::vector& bp = Emu.GetBreakPoints(); @@ -337,7 +338,7 @@ void CPUThread::Task() } Step(); - //if (PC - 0x13ED4 < 0x288) trace.push_back(PC); + //if (m_trace_enabled) trace.push_back(PC); NextPc(m_dec->DecodeMemory(PC + m_offset)); if (status == CPUThread_Step) @@ -373,7 +374,25 @@ void CPUThread::Task() // TODO: linux version #endif - for (auto& v : trace) LOG_NOTICE(PPU, "PC = 0x%x", v); + if (trace.size()) + { + LOG_NOTICE(GENERAL, "Trace begin (%d elements)", trace.size()); - if (Ini.HLELogging.GetValue()) LOG_NOTICE(PPU, "%s leave", CPUThread::GetFName().c_str()); + u32 start = trace[0], prev = trace[0] - 4; + + for (auto& v : trace) //LOG_NOTICE(GENERAL, "PC = 0x%x", v); + { + if (v - prev != 4) + { + LOG_NOTICE(GENERAL, "Trace: 0x%08x .. 0x%08x", start, prev); + start = v; + } + prev = v; + } + + LOG_NOTICE(GENERAL, "Trace end: 0x%08x .. 0x%08x", start, prev); + } + + + if (Ini.HLELogging.GetValue()) LOG_NOTICE(GENERAL, "%s leave", CPUThread::GetFName().c_str()); } diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index e70007773c..3acb2c12b3 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -119,6 +119,7 @@ public: u32 nPC; u64 cycle; bool m_is_branch; + bool m_trace_enabled; bool m_is_interrupt; bool m_has_interrupt; diff --git a/rpcs3/Emu/CPU/CPUThreadManager.cpp b/rpcs3/Emu/CPU/CPUThreadManager.cpp index af9368f338..b745da0177 100644 --- a/rpcs3/Emu/CPU/CPUThreadManager.cpp +++ b/rpcs3/Emu/CPU/CPUThreadManager.cpp @@ -135,22 +135,6 @@ RawSPUThread* CPUThreadManager::GetRawSPUThread(u32 num) } } -void CPUThreadManager::NotifyThread(const u32 id) -{ - if (!id) return; - - std::lock_guard lock(m_mtx_thread); - - for (u32 i = 0; i < m_threads.size(); i++) - { - if (m_threads[i]->GetId() == id) - { - m_threads[i]->Notify(); - return; - } - } -} - void CPUThreadManager::Exec() { std::lock_guard lock(m_mtx_thread); diff --git a/rpcs3/Emu/CPU/CPUThreadManager.h b/rpcs3/Emu/CPU/CPUThreadManager.h index 480ef940f0..d43a7506c3 100644 --- a/rpcs3/Emu/CPU/CPUThreadManager.h +++ b/rpcs3/Emu/CPU/CPUThreadManager.h @@ -17,7 +17,6 @@ public: CPUThread& AddThread(CPUThreadType type); void RemoveThread(const u32 id); - void NotifyThread(const u32 id); std::vector& GetThreads() { return m_threads; } s32 GetThreadNumById(CPUThreadType type, u32 id); diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index d903353991..7f88e6e3ec 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -251,10 +251,12 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0); @@ -270,10 +272,12 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0); @@ -289,10 +293,12 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0); @@ -308,10 +314,12 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0); @@ -337,10 +345,12 @@ private: } void BI(u32 intr, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0); @@ -349,10 +359,12 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0); diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index dafae1842b..82c2c287f6 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -9,64 +9,6 @@ using namespace asmjit::host; #define UNIMPLEMENTED() UNK(__FUNCTION__) -#define mmToU64Ptr(x) ((u64*)(&x)) -#define mmToU32Ptr(x) ((u32*)(&x)) -#define mmToU16Ptr(x) ((u16*)(&x)) -#define mmToU8Ptr(x) ((u8*)(&x)) - -struct g_imm_table_struct -{ - //u16 cntb_table[65536]; - - __m128i fsmb_table[65536]; - __m128i fsmh_table[256]; - __m128i fsm_table[16]; - - __m128i sldq_pshufb[32]; - __m128i srdq_pshufb[32]; - __m128i rldq_pshufb[16]; - - g_imm_table_struct() - { - /*static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0"); - for (u32 i = 0; i < sizeof(cntb_table) / sizeof(cntb_table[0]); i++) - { - u32 cnt_low = 0, cnt_high = 0; - for (u32 j = 0; j < 8; j++) - { - cnt_low += (i >> j) & 1; - cnt_high += (i >> (j + 8)) & 1; - } - cntb_table[i] = (cnt_high << 8) | cnt_low; - }*/ - for (u32 i = 0; i < sizeof(fsm_table) / sizeof(fsm_table[0]); i++) - { - - for (u32 j = 0; j < 4; j++) mmToU32Ptr(fsm_table[i])[j] = (i & (1 << j)) ? ~0 : 0; - } - for (u32 i = 0; i < sizeof(fsmh_table) / sizeof(fsmh_table[0]); i++) - { - for (u32 j = 0; j < 8; j++) mmToU16Ptr(fsmh_table[i])[j] = (i & (1 << j)) ? ~0 : 0; - } - for (u32 i = 0; i < sizeof(fsmb_table) / sizeof(fsmb_table[0]); i++) - { - for (u32 j = 0; j < 16; j++) mmToU8Ptr(fsmb_table[i])[j] = (i & (1 << j)) ? ~0 : 0; - } - for (u32 i = 0; i < sizeof(sldq_pshufb) / sizeof(sldq_pshufb[0]); i++) - { - for (u32 j = 0; j < 16; j++) mmToU8Ptr(sldq_pshufb[i])[j] = (u8)(j - i); - } - for (u32 i = 0; i < sizeof(srdq_pshufb) / sizeof(srdq_pshufb[0]); i++) - { - for (u32 j = 0; j < 16; j++) mmToU8Ptr(srdq_pshufb[i])[j] = (j + i > 15) ? 0xff : (u8)(j + i); - } - for (u32 i = 0; i < sizeof(rldq_pshufb) / sizeof(rldq_pshufb[0]); i++) - { - for (u32 j = 0; j < 16; j++) mmToU8Ptr(rldq_pshufb[i])[j] = (u8)(j - i) & 0xf; - } - } -}; - class SPURecompiler; class SPURecompilerCore : public CPUDecoder @@ -1132,10 +1074,12 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } c.mov(cpu_dword(PC), CPU.PC); @@ -1151,10 +1095,12 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } c.mov(cpu_dword(PC), CPU.PC); @@ -1170,10 +1116,12 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } c.mov(cpu_dword(PC), CPU.PC); @@ -1189,10 +1137,12 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } c.mov(cpu_dword(PC), CPU.PC); @@ -1239,10 +1189,12 @@ private: } void BI(u32 intr, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } c.mov(cpu_dword(PC), CPU.PC); @@ -1255,10 +1207,12 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - if (intr) + switch (intr) { - UNIMPLEMENTED(); - return; + case 0: break; + case 0x10: break; // enable interrupts + case 0x20: break; // disable interrupts + default: UNIMPLEMENTED(); return; } XmmInvalidate(rt); diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp index 66c1181a34..c5edf35a7b 100644 --- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp +++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp @@ -13,7 +13,7 @@ #include "SPUInterpreter.h" #include "SPURecompiler.h" -static const g_imm_table_struct g_imm_table; +const g_imm_table_struct g_imm_table; SPURecompilerCore::SPURecompilerCore(SPUThread& cpu) : m_enc(new SPURecompiler(cpu, *this)) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 04bb14b88f..5d069af957 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1018,7 +1018,7 @@ void SPUThread::StopAndSignal(u32 code) case 0x003: { - GPR[3]._u32[3] = m_code3_func(*this); + GPR[3]._u64[1] = m_code3_func(*this); break; } @@ -1161,7 +1161,7 @@ void SPUThread::StopAndSignal(u32 code) { LOG_ERROR(Log::SPU, "Unknown STOP code: 0x%x (message=0x%x)", code, SPU.Out_MBox.GetValue()); } - Stop(); + Emu.Pause(); break; } } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 8eef83d71f..5553d2fc28 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -105,6 +105,66 @@ enum SPU_RdSigNotify2_offs = 0x1C00C, }; +#define mmToU64Ptr(x) ((u64*)(&x)) +#define mmToU32Ptr(x) ((u32*)(&x)) +#define mmToU16Ptr(x) ((u16*)(&x)) +#define mmToU8Ptr(x) ((u8*)(&x)) + +struct g_imm_table_struct +{ + //u16 cntb_table[65536]; + + __m128i fsmb_table[65536]; + __m128i fsmh_table[256]; + __m128i fsm_table[16]; + + __m128i sldq_pshufb[32]; + __m128i srdq_pshufb[32]; + __m128i rldq_pshufb[16]; + + g_imm_table_struct() + { + /*static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0"); + for (u32 i = 0; i < sizeof(cntb_table) / sizeof(cntb_table[0]); i++) + { + u32 cnt_low = 0, cnt_high = 0; + for (u32 j = 0; j < 8; j++) + { + cnt_low += (i >> j) & 1; + cnt_high += (i >> (j + 8)) & 1; + } + cntb_table[i] = (cnt_high << 8) | cnt_low; + }*/ + for (u32 i = 0; i < sizeof(fsm_table) / sizeof(fsm_table[0]); i++) + { + + for (u32 j = 0; j < 4; j++) mmToU32Ptr(fsm_table[i])[j] = (i & (1 << j)) ? ~0 : 0; + } + for (u32 i = 0; i < sizeof(fsmh_table) / sizeof(fsmh_table[0]); i++) + { + for (u32 j = 0; j < 8; j++) mmToU16Ptr(fsmh_table[i])[j] = (i & (1 << j)) ? ~0 : 0; + } + for (u32 i = 0; i < sizeof(fsmb_table) / sizeof(fsmb_table[0]); i++) + { + for (u32 j = 0; j < 16; j++) mmToU8Ptr(fsmb_table[i])[j] = (i & (1 << j)) ? ~0 : 0; + } + for (u32 i = 0; i < sizeof(sldq_pshufb) / sizeof(sldq_pshufb[0]); i++) + { + for (u32 j = 0; j < 16; j++) mmToU8Ptr(sldq_pshufb[i])[j] = (u8)(j - i); + } + for (u32 i = 0; i < sizeof(srdq_pshufb) / sizeof(srdq_pshufb[0]); i++) + { + for (u32 j = 0; j < 16; j++) mmToU8Ptr(srdq_pshufb[i])[j] = (j + i > 15) ? 0xff : (u8)(j + i); + } + for (u32 i = 0; i < sizeof(rldq_pshufb) / sizeof(rldq_pshufb[0]); i++) + { + for (u32 j = 0; j < 16; j++) mmToU8Ptr(rldq_pshufb[i])[j] = (u8)(j - i) & 0xf; + } + } +}; + +extern const g_imm_table_struct g_imm_table; + //Floating point status and control register. Unsure if this is one of the GPRs or SPRs //Is 128 bits, but bits 0-19, 24-28, 32-49, 56-60, 64-81, 88-92, 96-115, 120-124 are unused class FPSCR @@ -451,7 +511,7 @@ public: void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); } std::function m_custom_task; - std::function m_code3_func; + std::function m_code3_func; public: SPUThread(CPUThreadType type = CPU_THREAD_SPU); diff --git a/rpcs3/Emu/Memory/atomic_type.h b/rpcs3/Emu/Memory/atomic_type.h index 35ebf639bd..7309f9cd2b 100644 --- a/rpcs3/Emu/Memory/atomic_type.h +++ b/rpcs3/Emu/Memory/atomic_type.h @@ -49,7 +49,7 @@ public: // atomically compare data with cmp, replace with exch if equal, return true if data was replaced __forceinline bool compare_and_swap_test(const T& cmp, const T& exch) volatile { - return InterlockedCompareExchange(&data, (atomic_type&)(exch), (atomic_type&)(cmp)) == (atomic_type&)(cmp); + return InterlockedCompareExchangeTest(&data, (atomic_type&)(exch), (atomic_type&)(cmp)); } // read data with memory barrier diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 509ff8b303..52c2d01890 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -3,6 +3,7 @@ #include "Utilities/Log.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" +#include "Emu/RSX/GSManager.h" #include "RSXThread.h" #include "Emu/SysCalls/Callback.h" @@ -45,15 +46,40 @@ void RSXThread::nativeRescale(float width, float height) u32 GetAddress(u32 offset, u32 location) { + u32 res = 0; + switch(location) { - case CELL_GCM_LOCATION_LOCAL: return (u32)Memory.RSXFBMem.GetStartAddr() + offset; - case CELL_GCM_LOCATION_MAIN: return (u32)Memory.RSXIOMem.RealAddr(offset); // TODO: Error Check? + case CELL_GCM_LOCATION_LOCAL: + { + res = (u32)Memory.RSXFBMem.GetStartAddr() + offset; + break; + } + case CELL_GCM_LOCATION_MAIN: + { + res = (u32)Memory.RSXIOMem.RealAddr(offset); // TODO: Error Check? + if (res == 0) + { + LOG_ERROR(RSX, "GetAddress(offset=0x%x): RSXIO memory not mapped", offset); + Emu.Pause(); + break; + } + + if (Emu.GetGSManager().GetRender().m_strict_ordering[offset >> 20]) + { + _mm_mfence(); // probably doesn't have any effect on current implementation + } + break; + } + default: + { + LOG_ERROR(RSX, "GetAddress(offset=0x%x, location=0x%x): invalid location", offset, location); + Emu.Pause(); + break; + } } - LOG_ERROR(RSX, "GetAddress(offset=0x%x, location=0x%x)", location); - assert(0); - return 0; + return res; } RSXVertexData::RSXVertexData() diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index f6763e2e80..2ad4dfe254 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -134,6 +134,7 @@ public: u32 m_report_main_addr; u32 m_local_mem_addr, m_main_mem_addr; + bool m_strict_ordering[0x1000]; public: uint m_draw_mode; diff --git a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp index 1200f9408b..cafe9b7442 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp @@ -61,11 +61,11 @@ CellGcmOffsetTable offsetTable; void InitOffsetTable() { - offsetTable.ioAddress = (u32)Memory.Alloc(3072 * sizeof(u16), 1); - offsetTable.eaAddress = (u32)Memory.Alloc(512 * sizeof(u16), 1); + offsetTable.ioAddress.set(be_t::make((u32)Memory.Alloc(3072 * sizeof(u16), 1))); + offsetTable.eaAddress.set(be_t::make((u32)Memory.Alloc(512 * sizeof(u16), 1))); - memset(vm::get_ptr(offsetTable.ioAddress), 0xFF, 3072 * sizeof(u16)); - memset(vm::get_ptr(offsetTable.eaAddress), 0xFF, 512 * sizeof(u16)); + memset(offsetTable.ioAddress.get_ptr(), 0xFF, 3072 * sizeof(u16)); + memset(offsetTable.eaAddress.get_ptr(), 0xFF, 512 * sizeof(u16)); } //---------------------------------------------------------------------------- @@ -129,7 +129,7 @@ u32 cellGcmGetNotifyDataAddress(u32 index) cellGcmGetOffsetTable(table); // If entry not in use, return NULL - u16 entry = vm::read16(table->eaAddress + 241 * sizeof(u16)); + u16 entry = table->eaAddress[241]; if (entry == 0xFFFF) { return 0; } @@ -814,7 +814,7 @@ s32 cellGcmAddressToOffset(u64 address, vm::ptr> offset) // Address in main memory else check else { - u16 upper12Bits = vm::read16(offsetTable.ioAddress + sizeof(u16)*(address >> 20)); + u16 upper12Bits = offsetTable.ioAddress[address >> 20]; // If the address is mapped in IO if (upper12Bits != 0xFFFF) { @@ -858,10 +858,8 @@ s32 cellGcmIoOffsetToAddress(u32 ioOffset, u64 address) return CELL_OK; } -s32 cellGcmMapEaIoAddress(u32 ea, u32 io, u32 size) +s32 gcmMapEaIoAddress(u32 ea, u32 io, u32 size, bool is_strict) { - cellGcmSys->Warning("cellGcmMapEaIoAddress(ea=0x%x, io=0x%x, size=0x%x)", ea, io, size); - if ((ea & 0xFFFFF) || (io & 0xFFFFF) || (size & 0xFFFFF)) return CELL_GCM_ERROR_FAILURE; // Check if the mapping was successfull @@ -870,8 +868,9 @@ s32 cellGcmMapEaIoAddress(u32 ea, u32 io, u32 size) // Fill the offset table for (u32 i = 0; i<(size >> 20); i++) { - vm::write16(offsetTable.ioAddress + ((ea >> 20) + i)*sizeof(u16), (io >> 20) + i); - vm::write16(offsetTable.eaAddress + ((io >> 20) + i)*sizeof(u16), (ea >> 20) + i); + offsetTable.ioAddress[(ea >> 20) + i] = (io >> 20) + i; + offsetTable.eaAddress[(io >> 20) + i] = (ea >> 20) + i; + Emu.GetGSManager().GetRender().m_strict_ordering[(io >> 20) + i] = is_strict; } } else @@ -883,10 +882,20 @@ s32 cellGcmMapEaIoAddress(u32 ea, u32 io, u32 size) return CELL_OK; } +s32 cellGcmMapEaIoAddress(u32 ea, u32 io, u32 size) +{ + cellGcmSys->Warning("cellGcmMapEaIoAddress(ea=0x%x, io=0x%x, size=0x%x)", ea, io, size); + + return gcmMapEaIoAddress(ea, io, size, false); +} + s32 cellGcmMapEaIoAddressWithFlags(u32 ea, u32 io, u32 size, u32 flags) { cellGcmSys->Warning("cellGcmMapEaIoAddressWithFlags(ea=0x%x, io=0x%x, size=0x%x, flags=0x%x)", ea, io, size, flags); - return cellGcmMapEaIoAddress(ea, io, size); // TODO: strict ordering + + assert(flags == 2 /*CELL_GCM_IOMAP_FLAG_STRICT_ORDERING*/); + + return gcmMapEaIoAddress(ea, io, size, true); } s32 cellGcmMapLocalMemory(u64 address, u64 size) @@ -919,13 +928,14 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr> offset) u32 io = Memory.RSXIOMem.Map(ea, size); //check if the mapping was successfull - if (Memory.RSXIOMem.Write32(io, 0)) + if (Memory.RSXIOMem.RealAddr(io) == ea) { //fill the offset table for (u32 i = 0; i<(size >> 20); i++) { - vm::write16(offsetTable.ioAddress + ((ea >> 20) + i) * sizeof(u16), (u16)(io >> 20) + i); - vm::write16(offsetTable.eaAddress + ((io >> 20) + i) * sizeof(u16), (u16)(ea >> 20) + i); + offsetTable.ioAddress[(ea >> 20) + i] = (u16)((io >> 20) + i); + offsetTable.eaAddress[(io >> 20) + i] = (u16)((ea >> 20) + i); + Emu.GetGSManager().GetRender().m_strict_ordering[(io >> 20) + i] = false; } *offset = io; @@ -970,12 +980,12 @@ s32 cellGcmUnmapEaIoAddress(u64 ea) { u64 io; ea = ea >> 20; - io = vm::read16(offsetTable.ioAddress + (ea*sizeof(u16))); + io = offsetTable.ioAddress[ea]; for (u32 i = 0; i> 20; - ea = vm::read16(offsetTable.eaAddress + (io*sizeof(u16))); + ea = offsetTable.eaAddress[io]; for (u32 i = 0; i ioAddress; // u16* - be_t eaAddress; // u16* + vm::bptr ioAddress; + vm::bptr eaAddress; }; // Auxiliary functions diff --git a/rpcs3/Emu/SysCalls/Modules/cellMsgDialog.cpp b/rpcs3/Emu/SysCalls/Modules/cellMsgDialog.cpp index 53cf172f63..9ba08b4753 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellMsgDialog.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellMsgDialog.cpp @@ -91,8 +91,8 @@ int cellMsgDialogOpen2(u32 type, vm::ptr msgString, vm::ptr msgString, vm::ptr s32 + s32 status = (s32)g_msg_dialog_status; + Emu.GetCallbackManager().Register([callback, userData, status]() -> s32 { - callback((s32)g_msg_dialog_status, userData); + callback(status, userData); return CELL_OK; }); } diff --git a/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp b/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp index 609096d59c..55320f1e45 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp @@ -804,7 +804,7 @@ void cellPngDec_init(Module *pxThis) #ifdef PRX_DEBUG CallAfter([]() { - libpngdec = (u32)Memory.PRXMem.AllocAlign(sizeof(libpngdec_data), 4096); + libpngdec = (u32)Memory.MainMem.AllocAlign(sizeof(libpngdec_data), 0x100000); memcpy(vm::get_ptr(libpngdec), libpngdec_data, sizeof(libpngdec_data)); libpngdec_rtoc = libpngdec + 0x49710; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index ac46056b38..0ff43cc712 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -60,6 +60,7 @@ s64 spursInit( return cb_call, u32, u32, s32, s32, s32, u32, u32, u32, u32, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0x74E4, libsre_rtoc, spurs, revision, sdkVersion, nSpus, spuPriority, ppuPriority, flags, Memory.RealToVirtualAddr(prefix), prefixSize, container, Memory.RealToVirtualAddr(swlPriority), swlMaxSpu, swlIsPreem); #endif + // SPURS initialization (asserts should actually rollback and return the error instead) if (!spurs) @@ -170,13 +171,267 @@ s64 spursInit( SPU.GPR[4]._u64[1] = spurs.addr(); return SPU.FastCall(SPU.PC); #endif - //SPU.WriteLS32(0x808, 2); // hack for cellSpursModuleExit - //SPU.WriteLS32(0x260, 3); // hack for cellSpursModulePollStatus - //SPU.WriteLS32(0x264, 0x35000000); // bi $0 + + // code replacement: + { + const u32 addr = /*SPU.ReadLS32(0x1e0) +*/ 8; //SPU.ReadLS32(0x1e4); + SPU.WriteLS32(addr + 0, 3); // hack for cellSpursModulePollStatus + SPU.WriteLS32(addr + 4, 0x35000000); // bi $0 + SPU.WriteLS32(0x1e4, addr); + + SPU.WriteLS32(SPU.ReadLS32(0x1e0), 2); // hack for cellSpursModuleExit + } + + if (!isSecond) SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // first kernel + { + LV2_LOCK(0); // TODO: lock-free implementation if possible + + const u32 arg1 = SPU.GPR[3]._u32[3]; + u32 var0 = SPU.ReadLS32(0x1d8); + u32 var1 = SPU.ReadLS32(0x1dc); + u128 wklA = vm::read128(spurs.addr() + 0x20); + u128 wklB = vm::read128(spurs.addr() + 0x30); + u128 savedA = SPU.ReadLS128(0x180); + u128 savedB = SPU.ReadLS128(0x190); + u128 vAA = u128::sub8(wklA, savedA); + u128 vBB = u128::sub8(wklB, savedB); + u128 vM1 = {}; if (var1 <= 15) vM1.u8r[var1] = 0xff; + u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB)); + + u32 vNUM = 0x20; + u64 vRES = 0x20ull << 32; + u128 vSET = {}; + + if (spurs->m.x72.read_relaxed() & (1 << num)) + { + SPU.WriteLS8(0x1eb, 0); // var4 + if (arg1 == 0 || var1 == 0x20) + { + spurs->m.x72._and_not(1 << num); + } + } + else + { + u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0); + u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10); + u128 savedC = SPU.ReadLS128(0x1A0); + u128 savedD = SPU.ReadLS128(0x1B0); + u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8))); + u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); + u32 flagRecv = spurs->m.flagRecv.read_relaxed(); + u128 vFM = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]); + u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]); + u128 vFMS1 = vFM | wklSet1; + u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[(var1 < 16) ? 0x8000 >> var1 : 0]); + u32 var5 = SPU.ReadLS32(0x1ec); + u128 wklMinCnt = vm::read128(spurs.addr() + 0x40); + u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50); + u128 vCC = u128::andnot(vFMS1, u128::eq8(wklReadyCount0, {}) | u128::leu8(vRC, vAABB)) | + u128::leu8(wklMaxCnt, vAABB) | + u128::eq8(savedC, {}) | + u128::fromV(g_imm_table.fsmb_table[(~var5) >> 16]); + u128 vCCH1 = u128::andnot(vCC, + u128::from8p(0x80) & (vFMS1 | u128::gtu8(wklReadyCount0, vAABB)) | + u128::from8p(0x7f) & savedC); + u128 vCCL1 = u128::andnot(vCC, + u128::from8p(0x80) & vFMV1 | + u128::from8p(0x40) & u128::gtu8(vAABB, {}) & u128::gtu8(wklMinCnt, vAABB) | + u128::from8p(0x3c) & u128::fromV(_mm_slli_epi32(u128::sub8(u128::from8p(8), vAABB).vi, 2)) | + u128::from8p(0x02) & u128::eq8(savedD, u128::from8p((u8)var0)) | + u128::from8p(0x01)); + u128 vSTAT = + u128::from8p(0x01) & u128::gtu8(wklReadyCount0, vAABB) | + u128::from8p(0x02) & wklSet1 | + u128::from8p(0x04) & vFM; + + for (s32 i = 0, max = -1; i < 0x10; i++) + { + const s32 value = ((s32)vCCH1.u8r[i] << 8) | ((s32)vCCL1.u8r[i]); + if (value > max && (vCC.u8r[i] & 1) == 0) + { + vNUM = i; + max = value; + } + } + + if (vNUM < 0x10) + { + vRES = ((u64)vNUM << 32) | vSTAT.u8r[vNUM]; + vSET.u8r[vNUM] = 0x01; + } + + SPU.WriteLS8(0x1eb, vNUM == 0x20); + + if (!arg1 || var1 == vNUM) + { + spurs->m.wklSet1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); + if (vNUM == flagRecv && wklFlag == 0) + { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); + } + } + } + + if (arg1 == 0) + { + vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA + + SPU.WriteLS128(0x180, vSET); // update savedA + SPU.WriteLS32(0x1dc, vNUM); // update var1 + } + + if (arg1 == 1 && vNUM != var1) + { + vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB + + SPU.WriteLS128(0x190, vSET); // update savedB + } + else + { + vm::write128(spurs.addr() + 0x30, vBB); // update wklB + + SPU.WriteLS128(0x190, {}); // update savedB + } + + return vRES; + }; + else SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // second kernel + { + LV2_LOCK(0); // TODO: lock-free implementation if possible + + const u32 arg1 = SPU.GPR[3]._u32[3]; + u32 var0 = SPU.ReadLS32(0x1d8); + u32 var1 = SPU.ReadLS32(0x1dc); + u128 wklA = vm::read128(spurs.addr() + 0x20); + u128 wklB = vm::read128(spurs.addr() + 0x30); + u128 savedA = SPU.ReadLS128(0x180); + u128 savedB = SPU.ReadLS128(0x190); + u128 vAA = u128::sub8(wklA, savedA); + u128 vBB = u128::sub8(wklB, savedB); + u128 vM1 = {}; if (var1 <= 31) vM1.u8r[var1 & 0xf] = (var1 <= 15) ? 0xf : 0xf0; + u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB)); + + u32 vNUM = 0x20; + u64 vRES = 0x20ull << 32; + u128 vSET = {}; + + if (spurs->m.x72.read_relaxed() & (1 << num)) + { + SPU.WriteLS8(0x1eb, 0); // var4 + if (arg1 == 0 || var1 == 0x20) + { + spurs->m.x72._and_not(1 << num); + } + } + else + { + u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0); + u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10); + u128 savedC = SPU.ReadLS128(0x1A0); + u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50); + u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); + u32 flagRecv = spurs->m.flagRecv.read_relaxed(); + u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]); + u128 wklSet2 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet2.read_relaxed()]); + u128 vABL = vAABB & u128::from8p(0x0f); + u128 vABH = u128::fromV(_mm_srli_epi32((vAABB & u128::from8p(0xf0)).vi, 4)); + u32 var5 = SPU.ReadLS32(0x1ec); + u128 v5L = u128::fromV(g_imm_table.fsmb_table[var5 >> 16]); + u128 v5H = u128::fromV(g_imm_table.fsmb_table[(u16)var5]); + u128 vFML = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]); + u128 vFMH = u128::fromV(g_imm_table.fsmb_table[(u16)((wklFlag == 0) && (flagRecv < 32) ? 0x80000000 >> flagRecv : 0)]); + u128 vCL = u128::fromV(_mm_slli_epi32((savedC & u128::from8p(0x0f)).vi, 4)); + u128 vCH = savedC & u128::from8p(0xf0); + u128 vABRL = u128::gtu8(wklReadyCount0, vABL); + u128 vABRH = u128::gtu8(wklReadyCount1, vABH); + u128 vCCL = v5L & u128::gtu8(vCL, {}) & u128::gtu8(wklMaxCnt & u128::from8p(0x0f), vABL) & (wklSet1 | vFML | vABRL); + u128 vCCH = v5H & u128::gtu8(vCH, {}) & u128::gtu8(u128::fromV(_mm_srli_epi32((wklMaxCnt & u128::from8p(0xf0)).vi, 4)), vABH) & (wklSet2 | vFMH | vABRH); + u128 v1H = {}; if (var1 <= 31 && var1 > 15) v1H.u8r[var1 & 0xf] = 4; + u128 v1L = {}; if (var1 <= 15) v1L.u8r[var1] = 4; + u128 vCH1 = (v1H | vCH & u128::from8p(0xFB)) & vCCH; + u128 vCL1 = (v1L | vCL & u128::from8p(0xFB)) & vCCL; + u128 vSTATL = vABRL & u128::from8p(1) | wklSet1 & u128::from8p(2) | vFML & u128::from8p(4); + u128 vSTATH = vABRH & u128::from8p(1) | wklSet2 & u128::from8p(2) | vFMH & u128::from8p(4); + + s32 max = -1; + for (u32 i = 0; i < 0x10; i++) + { + const s32 value = vCL1.u8r[i]; + if (value > max && (vCCL.u8r[i] & 1)) + { + vNUM = i; + max = value; + } + } + for (u32 i = 16; i < 0x20; i++) + { + const s32 value = vCH1.u8r[i]; + if (value > max && (vCCH.u8r[i] & 1)) + { + vNUM = i; + max = value; + } + } + + if (vNUM < 0x10) + { + vRES = ((u64)vNUM << 32) | vSTATL.u8r[vNUM]; + vSET.u8r[vNUM] = 0x01; + } + else if (vNUM < 0x20) + { + vRES = ((u64)vNUM << 32) | vSTATH.u8r[vNUM & 0xf]; + vSET.u8r[vNUM] = 0x10; + } + + SPU.WriteLS8(0x1eb, vNUM == 0x20); + + if (!arg1 || var1 == vNUM) + { + spurs->m.wklSet1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); + spurs->m.wklSet2._and_not(be_t::make((u16)(0x80000000 >> vNUM))); + if (vNUM == flagRecv && wklFlag == 0) + { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); + } + } + } + + if (arg1 == 0) + { + vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA + + SPU.WriteLS128(0x180, vSET); // update savedA + SPU.WriteLS32(0x1dc, vNUM); // update var1 + } + + if (arg1 == 1 && vNUM != var1) + { + vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB + + SPU.WriteLS128(0x190, vSET); // update savedB + } + else + { + vm::write128(spurs.addr() + 0x30, vBB); // update wklB + + SPU.WriteLS128(0x190, {}); // update savedB + } + + return vRES; + }; + //SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // test + //{ + // LV2_LOCK(0); + // SPU.FastCall(0x290); + // u64 vRES = SPU.GPR[3]._u64[1]; + // return vRES; + //}; SPU.WriteLS128(0x1c0, u128::from32r(0, spurs.addr(), num, 0x1f)); u32 wid = 0x20; + u32 stat = 0; while (true) { if (Emu.IsStopped()) @@ -202,7 +457,7 @@ s64 spursInit( SPU.GPR[1]._u32[3] = 0x3FFB0; SPU.GPR[3]._u32[3] = 0x100; SPU.GPR[4]._u64[1] = wkl.data; - SPU.GPR[5]._u32[3] = 0; + SPU.GPR[5]._u32[3] = stat; SPU.FastCall(0xa00); // check status: @@ -217,8 +472,11 @@ s64 spursInit( } // get workload id: - //SPU.GPR[3].clear(); - //wid = SPU.m_code3_func(SPU); + SPU.GPR[3].clear(); + assert(SPU.m_code3_func); + u64 res = SPU.m_code3_func(SPU); + stat = (u32)(res); + wid = (u32)(res >> 32); } })->GetId(); @@ -262,6 +520,7 @@ s64 spursInit( #ifdef PRX_DEBUG_XXX return cb_call>(CPU, libsre + 0x9214, libsre_rtoc, spurs); #endif + if (spurs->m.flags & SAF_UNKNOWN_FLAG_30) { return; @@ -408,10 +667,10 @@ s64 cellSpursInitialize(vm::ptr spurs, s32 nSpus, s32 spuPriority, s3 { cellSpurs->Warning("cellSpursInitialize(spurs_addr=0x%x, nSpus=%d, spuPriority=%d, ppuPriority=%d, exitIfNoWork=%d)", spurs.addr(), nSpus, spuPriority, ppuPriority, exitIfNoWork ? 1 : 0); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x8480, libsre_rtoc); #endif + return spursInit( spurs, 0, @@ -431,10 +690,10 @@ s64 cellSpursInitialize(vm::ptr spurs, s32 nSpus, s32 spuPriority, s3 s64 cellSpursInitializeWithAttribute(vm::ptr spurs, vm::ptr attr) { cellSpurs->Warning("cellSpursInitializeWithAttribute(spurs_addr=0x%x, attr_addr=0x%x)", spurs.addr(), attr.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x839C, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -467,10 +726,10 @@ s64 cellSpursInitializeWithAttribute(vm::ptr spurs, vm::ptr spurs, vm::ptr attr) { cellSpurs->Warning("cellSpursInitializeWithAttribute2(spurs_addr=0x%x, attr_addr=0x%x)", spurs.addr(), attr.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x82B4, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -504,10 +763,10 @@ s64 _cellSpursAttributeInitialize(vm::ptr attr, u32 revision { cellSpurs->Warning("_cellSpursAttributeInitialize(attr_addr=0x%x, revision=%d, sdkVersion=0x%x, nSpus=%d, spuPriority=%d, ppuPriority=%d, exitIfNoWork=%d)", attr.addr(), revision, sdkVersion, nSpus, spuPriority, ppuPriority, exitIfNoWork ? 1 : 0); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x72CC, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -530,10 +789,10 @@ s64 _cellSpursAttributeInitialize(vm::ptr attr, u32 revision s64 cellSpursAttributeSetMemoryContainerForSpuThread(vm::ptr attr, u32 container) { cellSpurs->Warning("cellSpursAttributeSetMemoryContainerForSpuThread(attr_addr=0x%x, container=%d)", attr.addr(), container); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x6FF8, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -556,10 +815,10 @@ s64 cellSpursAttributeSetMemoryContainerForSpuThread(vm::ptr s64 cellSpursAttributeSetNamePrefix(vm::ptr attr, vm::ptr prefix, u32 size) { cellSpurs->Warning("cellSpursAttributeSetNamePrefix(attr_addr=0x%x, prefix_addr=0x%x, size=%d)", attr.addr(), prefix.addr(), size); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x7234, libsre_rtoc); #endif + if (!attr || !prefix) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -582,10 +841,10 @@ s64 cellSpursAttributeSetNamePrefix(vm::ptr attr, vm::ptr attr) { cellSpurs->Warning("cellSpursAttributeEnableSpuPrintfIfAvailable(attr_addr=0x%x)", attr.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x7150, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -602,10 +861,10 @@ s64 cellSpursAttributeEnableSpuPrintfIfAvailable(vm::ptr att s64 cellSpursAttributeSetSpuThreadGroupType(vm::ptr attr, s32 type) { cellSpurs->Warning("cellSpursAttributeSetSpuThreadGroupType(attr_addr=0x%x, type=%d)", attr.addr(), type); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x70C8, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -638,10 +897,10 @@ s64 cellSpursAttributeEnableSystemWorkload(vm::ptr attr, vm: { cellSpurs->Warning("cellSpursAttributeEnableSystemWorkload(attr_addr=0x%x, priority_addr=0x%x, maxSpu=%d, isPreemptible_addr=0x%x)", attr.addr(), priority.addr(), maxSpu, isPreemptible.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0xF410, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -694,11 +953,11 @@ s64 cellSpursAttributeEnableSystemWorkload(vm::ptr attr, vm: s64 cellSpursFinalize(vm::ptr spurs) { - cellSpurs->Warning("cellSpursFinalize(spurs_addr=0x%x)", spurs.addr()); - + cellSpurs->Todo("cellSpursFinalize(spurs_addr=0x%x)", spurs.addr()); #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x8568, libsre_rtoc); #endif + return CELL_OK; } @@ -708,6 +967,7 @@ s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr po return cb_call, u32, vm::ptr, s32, bool>(GetCurrentPPUThread(), libsre + 0xAE34, libsre_rtoc, spurs, queue, port, isDynamic, wasCreated); #endif + if (!spurs || !port) { return CELL_SPURS_CORE_ERROR_NULL_POINTER; @@ -767,10 +1027,10 @@ s64 cellSpursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptrWarning("cellSpursAttachLv2EventQueue(spurs_addr=0x%x, queue=%d, port_addr=0x%x, isDynamic=%d)", spurs.addr(), queue, port.addr(), isDynamic); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0xAFE0, libsre_rtoc); #endif + return spursAttachLv2EventQueue(spurs, queue, port, isDynamic, false); } @@ -796,37 +1056,69 @@ s64 cellSpursGetSpuGuid() #endif } -s64 cellSpursGetSpuThreadGroupId(vm::ptr spurs, vm::ptr> group) +s64 cellSpursGetSpuThreadGroupId(vm::ptr spurs, vm::ptr group) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursGetSpuThreadGroupId(spurs_addr=0x%x, group_addr=0x%x)", spurs.addr(), group.addr()); +#ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x8B30, libsre_rtoc); -#else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; #endif + + if (!spurs || !group) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + if (spurs.addr() % 128) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + *group = spurs->m.spuTG; + return CELL_OK; } -s64 cellSpursGetNumSpuThread(vm::ptr spurs, vm::ptr> nThreads) +s64 cellSpursGetNumSpuThread(vm::ptr spurs, vm::ptr nThreads) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursGetNumSpuThread(spurs_addr=0x%x, nThreads_addr=0x%x)", spurs.addr(), nThreads.addr()); +#ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x8B78, libsre_rtoc); -#else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; #endif + + if (!spurs || !nThreads) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + if (spurs.addr() % 128) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + *nThreads = (u32)spurs->m.nSpus; + return CELL_OK; } -s64 cellSpursGetSpuThreadId(vm::ptr spurs, vm::ptr> thread, vm::ptr> nThreads) +s64 cellSpursGetSpuThreadId(vm::ptr spurs, vm::ptr thread, vm::ptr nThreads) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursGetSpuThreadId(spurs_addr=0x%x, thread_addr=0x%x, nThreads_addr=0x%x)", spurs.addr(), thread.addr(), nThreads.addr()); +#ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x8A98, libsre_rtoc); -#else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; #endif + + if (!spurs || !thread || !nThreads) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + if (spurs.addr() % 128) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + const u32 count = std::min(*nThreads, spurs->m.nSpus); + for (u32 i = 0; i < count; i++) + { + thread[i] = spurs->m.spus[i]; + } + *nThreads = count; + return CELL_OK; } s64 cellSpursSetMaxContention(vm::ptr spurs, u32 workloadId, u32 maxContention) @@ -912,6 +1204,7 @@ s64 spursWakeUp(vm::ptr spurs) #ifdef PRX_DEBUG_XXX return cb_call>(GetCurrentPPUThread(), libsre + 0x84D8, libsre_rtoc, spurs); #endif + if (!spurs) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -961,6 +1254,7 @@ s32 spursAddWorkload( spurs, wid, pm, size, data, Memory.RealToVirtualAddr(priorityTable), minContention, maxContention, nameClass.addr(), nameInstance.addr(), hook.addr(), hookArg.addr()); #endif + if (!spurs || !wid || !pm) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -1123,7 +1417,6 @@ s64 cellSpursAddWorkload( { cellSpurs->Warning("%s(spurs_addr=0x%x, wid_addr=0x%x, pm_addr=0x%x, size=0x%x, data=0x%llx, priorityTable_addr=0x%x, minContention=0x%x, maxContention=0x%x)", __FUNCTION__, spurs.addr(), wid.addr(), pm.addr(), size, data, priorityTable.addr(), minContention, maxContention); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x9ED0, libsre_rtoc); #endif @@ -1156,10 +1449,10 @@ s64 _cellSpursWorkloadAttributeInitialize( { cellSpurs->Warning("%s(attr_addr=0x%x, revision=%d, sdkVersion=0x%x, pm_addr=0x%x, size=0x%x, data=0x%llx, priorityTable_addr=0x%x, minContention=0x%x, maxContention=0x%x)", __FUNCTION__, attr.addr(), revision, sdkVersion, pm.addr(), size, data, priorityTable.addr(), minContention, maxContention); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x9F08, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -1196,10 +1489,10 @@ s64 _cellSpursWorkloadAttributeInitialize( s64 cellSpursWorkloadAttributeSetName(vm::ptr attr, vm::ptr nameClass, vm::ptr nameInstance) { cellSpurs->Warning("%s(attr_addr=0x%x, nameClass_addr=0x%x, nameInstance_addr=0x%x)", __FUNCTION__, attr.addr(), nameClass.addr(), nameInstance.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x9664, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -1217,10 +1510,10 @@ s64 cellSpursWorkloadAttributeSetName(vm::ptr attr, s64 cellSpursWorkloadAttributeSetShutdownCompletionEventHook(vm::ptr attr, vm::ptr hook, vm::ptr arg) { cellSpurs->Warning("%s(attr_addr=0x%x, hook_addr=0x%x, arg=0x%x)", __FUNCTION__, attr.addr(), hook.addr(), arg.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x96A4, libsre_rtoc); #endif + if (!attr || !hook) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -1238,10 +1531,10 @@ s64 cellSpursWorkloadAttributeSetShutdownCompletionEventHook(vm::ptr spurs, vm::ptr wid, vm::ptr attr) { cellSpurs->Warning("%s(spurs_addr=0x%x, wid_addr=0x%x, attr_addr=0x%x)", __FUNCTION__, spurs.addr(), wid.addr(), attr.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0x9E14, libsre_rtoc); #endif + if (!attr) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -1306,10 +1599,10 @@ s64 cellSpursShutdownWorkload() s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set) { cellSpurs->Warning("%s(spurs_addr=0x%x, wid=%d, is_set=%d)", __FUNCTION__, spurs.addr(), wid, is_set); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0xF158, libsre_rtoc); #endif + if (!spurs) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -1376,10 +1669,10 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set s64 cellSpursGetWorkloadFlag(vm::ptr spurs, vm::ptr> flag) { cellSpurs->Warning("%s(spurs_addr=0x%x, flag_addr=0x%x)", __FUNCTION__, spurs.addr(), flag.addr()); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0xEC00, libsre_rtoc); #endif + if (!spurs || !flag) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -1418,10 +1711,10 @@ s64 cellSpursGetWorkloadData() s64 cellSpursReadyCountStore(vm::ptr spurs, u32 wid, u32 value) { cellSpurs->Warning("%s(spurs_addr=0x%x, wid=%d, value=0x%x)", __FUNCTION__, spurs.addr(), wid, value); - #ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0xAB2C, libsre_rtoc); #endif + if (!spurs) { return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursJq.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursJq.cpp index 8c8e50f384..ed4c73b9be 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursJq.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursJq.cpp @@ -766,7 +766,7 @@ void cellSpursJq_init(Module *pxThis) #ifdef PRX_DEBUG CallAfter([]() { - libspurs_jq = (u32)Memory.PRXMem.AllocAlign(sizeof(libspurs_jq_data), 4096); + libspurs_jq = (u32)Memory.MainMem.AllocAlign(sizeof(libspurs_jq_data), 0x100000); memcpy(vm::get_ptr(libspurs_jq), libspurs_jq_data, sizeof(libspurs_jq_data)); libspurs_jq_rtoc = libspurs_jq + 0x17E80; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp index 4a0e1a3ccd..af28371370 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp @@ -16,6 +16,13 @@ u32 libsre; u32 libsre_rtoc; #endif +waiter_map_t g_sync_mutex_wm("sync_mutex_wm"); +waiter_map_t g_sync_barrier_wait_wm("sync_barrier_wait_wm"); +waiter_map_t g_sync_barrier_notify_wm("sync_barrier_notify_wm"); +waiter_map_t g_sync_rwm_read_wm("sync_rwm_read_wm"); +waiter_map_t g_sync_rwm_write_wm("sync_rwm_write_wm"); +waiter_map_t g_sync_queue_wm("sync_queue_wm"); + s32 syncMutexInitialize(vm::ptr mutex) { if (!mutex) @@ -60,15 +67,10 @@ s32 cellSyncMutexLock(vm::ptr mutex) }); // prx: wait until this old value is equal to m_rel - while (order != mutex->data.read_relaxed().m_rel) + g_sync_mutex_wm.wait_op(mutex.addr(), [mutex, order]() { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncMutexLock(mutex_addr=0x%x) aborted", mutex.addr()); - break; - } - } + return order == mutex->data.read_relaxed().m_rel; + }); // prx: sync mutex->data.read_sync(); @@ -116,6 +118,8 @@ s32 cellSyncMutexUnlock(vm::ptr mutex) { mutex.m_rel++; }); + + g_sync_mutex_wm.notify(mutex.addr()); return CELL_OK; } @@ -177,15 +181,12 @@ s32 cellSyncBarrierNotify(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp)) + g_sync_barrier_notify_wm.wait_op(barrier.addr(), [barrier]() { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncBarrierNotify(barrier_addr=0x%x) aborted", barrier.addr()); - return CELL_OK; - } - } + return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp) == CELL_OK; + }); + + g_sync_barrier_wait_wm.notify(barrier.addr()); return CELL_OK; } @@ -202,7 +203,13 @@ s32 cellSyncBarrierTryNotify(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp); + if (s32 res = barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryNotifyOp)) + { + return res; + } + + g_sync_barrier_wait_wm.notify(barrier.addr()); + return CELL_OK; } s32 syncBarrierTryWaitOp(CellSyncBarrier::data_t& barrier) @@ -236,15 +243,12 @@ s32 cellSyncBarrierWait(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - while (barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp)) + g_sync_barrier_wait_wm.wait_op(barrier.addr(), [barrier]() { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncBarrierWait(barrier_addr=0x%x) aborted", barrier.addr()); - return CELL_OK; - } - } + return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp) == CELL_OK; + }); + + g_sync_barrier_notify_wm.notify(barrier.addr()); return CELL_OK; } @@ -261,7 +265,13 @@ s32 cellSyncBarrierTryWait(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - return barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp); + if (s32 res = barrier->data.atomic_op_sync(CELL_OK, syncBarrierTryWaitOp)) + { + return res; + } + + g_sync_barrier_notify_wm.notify(barrier.addr()); + return CELL_OK; } s32 syncRwmInitialize(vm::ptr rwm, vm::ptr buffer, u32 buffer_size) @@ -299,6 +309,7 @@ s32 syncRwmTryReadBeginOp(CellSyncRwm::data_t& rwm) { return CELL_SYNC_ERROR_BUSY; } + rwm.m_readers++; return CELL_OK; } @@ -310,6 +321,7 @@ s32 syncRwmReadEndOp(CellSyncRwm::data_t& rwm) cellSync->Error("syncRwmReadEndOp(rwm_addr=0x%x): m_readers == 0 (m_writers=%d)", Memory.RealToVirtualAddr(&rwm), (u16)rwm.m_writers); return CELL_SYNC_ERROR_ABORT; } + rwm.m_readers--; return CELL_OK; } @@ -328,21 +340,22 @@ s32 cellSyncRwmRead(vm::ptr rwm, vm::ptr buffer) } // prx: increase m_readers, wait until m_writers is zero - while (rwm->data.atomic_op(CELL_OK, syncRwmTryReadBeginOp)) + g_sync_rwm_read_wm.wait_op(rwm.addr(), [rwm]() { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncRwmRead(rwm_addr=0x%x) aborted", rwm.addr()); - return CELL_OK; - } - } + return rwm->data.atomic_op(CELL_OK, syncRwmTryReadBeginOp) == CELL_OK; + }); // copy data to buffer_addr memcpy(buffer.get_ptr(), rwm->m_buffer.get_ptr(), (u32)rwm->m_size); // prx: decrease m_readers (return 0x8041010C if already zero) - return rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp); + if (s32 res = rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp)) + { + return res; + } + + g_sync_rwm_write_wm.notify(rwm.addr()); + return CELL_OK; } s32 cellSyncRwmTryRead(vm::ptr rwm, vm::ptr buffer) @@ -365,7 +378,13 @@ s32 cellSyncRwmTryRead(vm::ptr rwm, vm::ptr buffer) memcpy(buffer.get_ptr(), rwm->m_buffer.get_ptr(), (u32)rwm->m_size); - return rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp); + if (s32 res = rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp)) + { + return res; + } + + g_sync_rwm_write_wm.notify(rwm.addr()); + return CELL_OK; } s32 syncRwmTryWriteBeginOp(CellSyncRwm::data_t& rwm) @@ -374,6 +393,7 @@ s32 syncRwmTryWriteBeginOp(CellSyncRwm::data_t& rwm) { return CELL_SYNC_ERROR_BUSY; } + rwm.m_writers = 1; return CELL_OK; } @@ -391,32 +411,23 @@ s32 cellSyncRwmWrite(vm::ptr rwm, vm::ptr buffer) return CELL_SYNC_ERROR_ALIGN; } - while (rwm->data.atomic_op(CELL_OK, syncRwmTryWriteBeginOp)) + g_sync_rwm_read_wm.wait_op(rwm.addr(), [rwm]() { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncRwmWrite(rwm_addr=0x%x) aborted (I)", rwm.addr()); - return CELL_OK; - } - } + return rwm->data.atomic_op(CELL_OK, syncRwmTryWriteBeginOp) == CELL_OK; + }); // prx: wait until m_readers == 0 - while (rwm->data.read_relaxed().m_readers.ToBE()) + g_sync_rwm_write_wm.wait_op(rwm.addr(), [rwm]() { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncRwmWrite(rwm_addr=0x%x) aborted (II)", rwm.addr()); - return CELL_OK; - } - } + return rwm->data.read_relaxed().m_readers.ToBE() == 0; + }); // prx: copy data from buffer_addr memcpy(rwm->m_buffer.get_ptr(), buffer.get_ptr(), (u32)rwm->m_size); // prx: sync and zeroize m_readers and m_writers rwm->data.exchange({}); + g_sync_rwm_read_wm.notify(rwm.addr()); return CELL_OK; } @@ -434,7 +445,7 @@ s32 cellSyncRwmTryWrite(vm::ptr rwm, vm::ptr buffer) } // prx: compare m_readers | m_writers with 0, return if not zero, set m_writers to 1 - if (!rwm->data.compare_and_swap_test({}, {be_t::make(0), be_t::make(1)})) + if (!rwm->data.compare_and_swap_test({}, { be_t::make(0), be_t::make(1) })) { return CELL_SYNC_ERROR_BUSY; } @@ -444,6 +455,7 @@ s32 cellSyncRwmTryWrite(vm::ptr rwm, vm::ptr buffer) // prx: sync and zeroize m_readers and m_writers rwm->data.exchange({}); + g_sync_rwm_read_wm.notify(rwm.addr()); return CELL_OK; } @@ -520,24 +532,20 @@ s32 cellSyncQueuePush(vm::ptr queue, vm::ptr buffer) assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; - while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 + g_sync_queue_wm.wait_op(queue.addr(), [queue, depth, &position]() { - return syncQueueTryPushOp(queue, depth, position); - })) - { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) + return CELL_OK == queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 { - cellSync->Warning("cellSyncQueuePush(queue_addr=0x%x) aborted", queue.addr()); - return CELL_OK; - } - } + return syncQueueTryPushOp(queue, depth, position); + }); + }); // prx: memcpy(position * m_size + m_addr, buffer_addr, m_size), sync memcpy(&queue->m_buffer[position * size], buffer.get_ptr(), size); // prx: atomically insert 0 in 5th u8 queue->data &= { be_t::make(~0), be_t::make(0xffffff) }; + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -570,6 +578,7 @@ s32 cellSyncQueueTryPush(vm::ptr queue, vm::ptr buffe memcpy(&queue->m_buffer[position * size], buffer.get_ptr(), size); queue->data &= { be_t::make(~0), be_t::make(0xffffff) }; + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -612,24 +621,20 @@ s32 cellSyncQueuePop(vm::ptr queue, vm::ptr buffer) assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; - while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 + g_sync_queue_wm.wait_op(queue.addr(), [queue, depth, &position]() { - return syncQueueTryPopOp(queue, depth, position); - })) - { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) + return CELL_OK == queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 { - cellSync->Warning("cellSyncQueuePop(queue_addr=0x%x) aborted", queue.addr()); - return CELL_OK; - } - } + return syncQueueTryPopOp(queue, depth, position); + }); + }); // prx: (sync), memcpy(buffer_addr, position * m_size + m_addr, m_size) memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); // prx: atomically insert 0 in first u8 queue->data &= { be_t::make(0xffffff), be_t::make(~0) }; + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -662,6 +667,7 @@ s32 cellSyncQueueTryPop(vm::ptr queue, vm::ptr buffer) memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); queue->data &= { be_t::make(0xffffff), be_t::make(~0) }; + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -698,21 +704,17 @@ s32 cellSyncQueuePeek(vm::ptr queue, vm::ptr buffer) assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; - while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 + g_sync_queue_wm.wait_op(queue.addr(), [queue, depth, &position]() { - return syncQueueTryPeekOp(queue, depth, position); - })) - { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) + return CELL_OK == queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 { - cellSync->Warning("cellSyncQueuePeek(queue_addr=0x%x) aborted", queue.addr()); - return CELL_OK; - } - } + return syncQueueTryPeekOp(queue, depth, position); + }); + }); memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); queue->data &= { be_t::make(0xffffff), be_t::make(~0) }; + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -745,6 +747,7 @@ s32 cellSyncQueueTryPeek(vm::ptr queue, vm::ptr buffer) memcpy(buffer.get_ptr(), &queue->m_buffer[position * size], size); queue->data &= { be_t::make(0xffffff), be_t::make(~0) }; + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -787,47 +790,40 @@ s32 cellSyncQueueClear(vm::ptr queue) assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); // TODO: optimize if possible - while (queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32 + g_sync_queue_wm.wait_op(queue.addr(), [queue, depth]() { - const u32 v1 = (u32)queue.m_v1; - // prx: extract first u8, repeat if not zero, insert 1 - if (v1 >> 24) + return CELL_OK == queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32 { - return CELL_SYNC_ERROR_BUSY; - } - queue.m_v1 = v1 | 0x1000000; - return CELL_OK; - })) - { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncQueueClear(queue_addr=0x%x) aborted (I)", queue.addr()); - return CELL_OK; - } - } + const u32 v1 = (u32)queue.m_v1; + // prx: extract first u8, repeat if not zero, insert 1 + if (v1 >> 24) + { + return CELL_SYNC_ERROR_BUSY; + } - while (queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32 - { - const u32 v2 = (u32)queue.m_v2; - // prx: extract 5th u8, repeat if not zero, insert 1 - if (v2 >> 24) - { - return CELL_SYNC_ERROR_BUSY; - } - queue.m_v2 = v2 | 0x1000000; - return CELL_OK; - })) - { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); // hack - if (Emu.IsStopped()) - { - cellSync->Warning("cellSyncQueueClear(queue_addr=0x%x) aborted (II)", queue.addr()); + queue.m_v1 = v1 | 0x1000000; return CELL_OK; - } - } + }); + }); + + g_sync_queue_wm.wait_op(queue.addr(), [queue, depth]() + { + return CELL_OK == queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32 + { + const u32 v2 = (u32)queue.m_v2; + // prx: extract 5th u8, repeat if not zero, insert 1 + if (v2 >> 24) + { + return CELL_SYNC_ERROR_BUSY; + } + + queue.m_v2 = v2 | 0x1000000; + return CELL_OK; + }); + }); queue->data.exchange({}); + g_sync_queue_wm.notify(queue.addr()); return CELL_OK; } @@ -1990,7 +1986,7 @@ void cellSync_init(Module *pxThis) #ifdef PRX_DEBUG CallAfter([]() { - libsre = (u32)Memory.PRXMem.AllocAlign(sizeof(libsre_data), 4096); + libsre = (u32)Memory.MainMem.AllocAlign(sizeof(libsre_data), 0x100000); memcpy(vm::get_ptr(libsre), libsre_data, sizeof(libsre_data)); libsre_rtoc = libsre + 0x399B0; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync2.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync2.cpp index de4ca912e0..7359814b89 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync2.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync2.cpp @@ -458,7 +458,7 @@ void cellSync2_init(Module *pxThis) #ifdef PRX_DEBUG CallAfter([]() { - libsync2 = (u32)Memory.PRXMem.AllocAlign(sizeof(libsync2_data), 4096); + libsync2 = (u32)Memory.MainMem.AllocAlign(sizeof(libsync2_data), 0x100000); memcpy(vm::get_ptr(libsync2), libsync2_data, sizeof(libsync2_data)); libsync2_rtoc = libsync2 + 0xF280; diff --git a/rpcs3/Emu/SysCalls/lv2/sys_cond.cpp b/rpcs3/Emu/SysCalls/lv2/sys_cond.cpp index a51b9f977a..3d7f11570a 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_cond.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_cond.cpp @@ -80,9 +80,7 @@ s32 sys_cond_signal(u32 cond_id) if (u32 target = (mutex->protocol == SYS_SYNC_PRIORITY ? cond->m_queue.pop_prio() : cond->m_queue.pop())) { - //cond->signal_stamp = get_system_time(); cond->signal.lock(target); - Emu.GetCPU().NotifyThread(target); if (Emu.IsStopped()) { @@ -108,9 +106,7 @@ s32 sys_cond_signal_all(u32 cond_id) while (u32 target = (mutex->protocol == SYS_SYNC_PRIORITY ? cond->m_queue.pop_prio() : cond->m_queue.pop())) { cond->signaler = GetCurrentPPUThread().GetId(); - //cond->signal_stamp = get_system_time(); cond->signal.lock(target); - Emu.GetCPU().NotifyThread(target); if (Emu.IsStopped()) { @@ -147,9 +143,7 @@ s32 sys_cond_signal_to(u32 cond_id, u32 thread_id) u32 target = thread_id; { - //cond->signal_stamp = get_system_time(); cond->signal.lock(target); - Emu.GetCPU().NotifyThread(target); } if (Emu.IsStopped()) @@ -195,7 +189,6 @@ s32 sys_cond_wait(u32 cond_id, u64 timeout) { if (cond->signal.unlock(tid, tid) == SMR_OK) { - //const u64 stamp2 = get_system_time(); if (SMutexResult res = mutex->m_mutex.trylock(tid)) { if (res != SMR_FAILED) @@ -215,14 +208,11 @@ s32 sys_cond_wait(u32 cond_id, u64 timeout) } } mutex->recursive = 1; - const volatile u64 stamp = cond->signal_stamp; cond->signal.unlock(tid); - Emu.GetCPU().NotifyThread(cond->signaler); - //ConLog.Write("sys_cond_wait(): signal latency %lld (minimum %lld)", get_system_time() - stamp, stamp2 - stamp); return CELL_OK; } - SM_Sleep(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); if (counter++ > max_counter) { diff --git a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp index f9e931ad60..bb7f9fc910 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp @@ -34,7 +34,7 @@ void sys_ppu_thread_exit(PPUThread& CPU, u64 errorcode) void sys_internal_ppu_thread_exit(PPUThread& CPU, u64 errorcode) { - sys_ppu_thread.Log("sys_internal_ppu_thread_exit(0x%llx)", errorcode); + sys_ppu_thread.Warning("sys_internal_ppu_thread_exit(0x%llx)", errorcode); ppu_thread_exit(CPU, errorcode); } diff --git a/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp b/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp index 7d26df4c05..722d19e4ad 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_semaphore.cpp @@ -119,11 +119,10 @@ s32 sys_semaphore_wait(u32 sem_id, u64 timeout) continue; } sem->signal = 0; - // TODO: notify signaler return CELL_OK; } - SM_Sleep(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } @@ -182,7 +181,7 @@ s32 sys_semaphore_post(u32 sem_id, s32 count) if (sem->signal && sem->m_queue.count()) { - SM_Sleep(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } @@ -190,7 +189,6 @@ s32 sys_semaphore_post(u32 sem_id, s32 count) { count--; sem->signal = target; - Emu.GetCPU().NotifyThread(target); } else {