1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 12:12:50 +01:00

Implement class cond_x16

Use as reservation notifier
Limited to 16 threads but allows more precise control of contention
This commit is contained in:
Nekotekina 2018-11-25 19:43:02 +03:00
parent 7f1cbb1136
commit febe4d4a10
4 changed files with 243 additions and 8 deletions

View File

@ -293,3 +293,106 @@ void cond_one::imp_notify() noexcept
futex(&m_value, FUTEX_WAKE_PRIVATE, 1);
#endif
}
bool cond_x16::imp_wait(u32 _new, u32 slot, u64 _timeout) noexcept
{
const u32 wait_bit = c_wait << slot;
const u32 lock_bit = c_lock << slot;
const bool is_inf = _timeout > cond_variable::max_timeout;
#ifdef _WIN32
LARGE_INTEGER timeout;
timeout.QuadPart = _timeout * -10;
if (HRESULT rc = _timeout ? NtWaitForKeyedEvent(nullptr, &m_cvx16, false, is_inf ? nullptr : &timeout) : WAIT_TIMEOUT)
{
verify(HERE), rc == WAIT_TIMEOUT;
// Retire
const bool signaled = this->retire(slot);
while (signaled)
{
timeout.QuadPart = 0;
if (HRESULT rc2 = NtWaitForKeyedEvent(nullptr, &m_cvx16, false, &timeout))
{
verify(HERE), rc2 == WAIT_TIMEOUT;
SwitchToThread();
continue;
}
return true;
}
return false;
}
if (!this->retire(slot))
{
// Stolen notification: restore balance
NtReleaseKeyedEvent(nullptr, &m_cvx16, false, nullptr);
}
#else
timespec timeout;
timeout.tv_sec = _timeout / 1000000;
timeout.tv_nsec = (_timeout % 1000000) * 1000;
for (u32 value = _new; ((value >> slot) & c_sig) != c_sig; value = m_cvx16)
{
const int err = futex(&m_cvx16, FUTEX_WAIT_PRIVATE, value, is_inf ? nullptr : &timeout) == 0
? 0
: errno;
// Normal or timeout wakeup
if (!err || (!is_inf && err == ETIMEDOUT))
{
return this->retire(slot);
}
// Not a wakeup
verify(HERE), err == EAGAIN;
}
// Convert c_sig to c_lock
m_cvx16 &= ~wait_bit;
#endif
return true;
}
void cond_x16::imp_notify() noexcept
{
auto [old, ok] = m_cvx16.fetch_op([](u32& v)
{
const u32 lock_mask = v >> 16;
const u32 wait_mask = v & 0xffff;
if (const u32 sig_mask = lock_mask ^ wait_mask)
{
v |= sig_mask | sig_mask << 16;
return true;
}
return false;
});
// Determine if some waiters need a syscall notification
const u32 wait_mask = old & (~old >> 16);
if (UNLIKELY(!ok || !wait_mask))
{
return;
}
#ifdef _WIN32
for (u32 i = 0; i < 16; i++)
{
if ((wait_mask >> i) & 1)
NtReleaseKeyedEvent(nullptr, &m_cvx16, false, nullptr);
}
#else
futex(&m_cvx16, FUTEX_WAKE_PRIVATE, INT_MAX);
#endif
}

View File

@ -3,6 +3,7 @@
#include "types.h"
#include "Atomic.h"
#include <shared_mutex>
#include "asm.h"
// Lightweight condition variable
class cond_variable
@ -179,3 +180,137 @@ public:
imp_notify();
}
};
// Packed version of cond_one, supports up to 16 readers.
class cond_x16
{
// For information, shouldn't modify
enum : u32
{
c_wait = 1,
c_lock = 1 << 16,
c_sig = 1 << 16 | 1,
};
// Split in 16-bit parts for convenient bit combining
atomic_t<u32> m_cvx16{0};
// Effectively unused, only counts readers
atomic_t<u32> m_total{0};
class lock_x16
{
cond_x16* m_this;
u32 m_slot;
friend class cond_x16;
public:
lock_x16(cond_x16* _this) noexcept
: m_this(_this)
{
// Spin if the number of readers exceeds 16
while (UNLIKELY(m_this->m_total++ >= 16))
m_this->m_total--;
// Lock and remember obtained slot index
m_slot = m_this->m_cvx16.atomic_op([](u32& cvx16)
{
// Combine used bits and invert to find least significant bit unused
const u32 slot = utils::cnttz32(~((cvx16 & 0xffff) | (cvx16 >> 16)), true);
// Set lock bit
cvx16 |= c_lock << slot;
AUDIT(slot < 16);
return slot;
});
}
lock_x16(const lock_x16&) = delete;
lock_x16& operator=(const lock_x16&) = delete;
~lock_x16()
{
// Clear the slot
m_this->m_cvx16 &= ~((c_wait | c_lock) << m_slot);
m_this->m_total -= 1;
}
bool wait(u64 usec_timeout = -1) const noexcept
{
return m_this->wait(*this, usec_timeout);
}
};
bool imp_wait(u32 _new, u32 slot, u64 _timeout) noexcept;
void imp_notify() noexcept;
bool retire(u32 slot) noexcept
{
const u32 wait_bit = c_wait << slot;
const u32 lock_bit = c_lock << slot;
return m_cvx16.atomic_op([=](u32& cvx16)
{
if (cvx16 & lock_bit)
{
cvx16 &= ~wait_bit;
return true;
}
cvx16 |= lock_bit;
cvx16 &= ~wait_bit;
return false;
});
}
public:
constexpr cond_x16() = default;
lock_x16 lock_one() noexcept
{
return lock_x16(this);
}
bool wait(lock_x16 const& lock, u64 usec_timeout = -1) noexcept
{
AUDIT(lock.m_this == this);
const u32 wait_bit = c_wait << lock.m_slot;
const u32 lock_bit = c_lock << lock.m_slot;
// Change state from c_lock to c_wait
const u32 new_ = m_cvx16.atomic_op([=](u32& cvx16)
{
if (cvx16 & wait_bit)
{
cvx16 &= ~wait_bit;
}
else
{
cvx16 |= wait_bit;
cvx16 &= ~lock_bit;
}
return cvx16;
});
if (new_ & lock_bit)
{
// Already signaled, return without waiting
return true;
}
return imp_wait(new_, lock.m_slot, usec_timeout);
}
void notify_all() noexcept
{
if (LIKELY(!m_cvx16))
return;
imp_notify();
}
};

View File

@ -22,7 +22,6 @@
#include <cfenv>
#include <atomic>
#include <thread>
#include <shared_mutex>
const bool s_use_ssse3 =
#ifdef _MSC_VER
@ -1710,9 +1709,7 @@ s64 spu_thread::get_ch_value(u32 ch)
fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1);
}
std::shared_lock pseudo_lock(vm::reservation_notifier(raddr, 128), std::try_to_lock);
verify(HERE), pseudo_lock;
const auto pseudo_lock = vm::reservation_notifier(raddr, 128).lock_one();
while (res = get_events(), !res)
{
@ -1721,7 +1718,7 @@ s64 spu_thread::get_ch_value(u32 ch)
return -1;
}
pseudo_lock.mutex()->wait(100);
pseudo_lock.wait(100);
}
return res;

View File

@ -7,7 +7,7 @@
class shared_mutex;
class cpu_thread;
class notifier;
class cond_x16;
namespace vm
{
@ -105,9 +105,9 @@ namespace vm
}
// Get reservation sync variable
inline notifier& reservation_notifier(u32 addr, u32 size)
inline cond_x16& reservation_notifier(u32 addr, u32 size)
{
return *reinterpret_cast<notifier*>(g_reservations2 + addr / 128 * 8);
return *reinterpret_cast<cond_x16*>(g_reservations2 + addr / 128 * 8);
}
void reservation_lock_internal(atomic_t<u64>&);