mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
Build transactions at runtime
Drop _xbegin family intrinsics due to bad codegen Implemented `notifier` class, replacing vm::notify Minor optimization: detach transactions from global mutex on TSX path Minor optimization: don't acquire vm::passive_lock on PPU on TSX path
This commit is contained in:
parent
fd525ae1cf
commit
367f039523
@ -7,14 +7,14 @@ asmjit::JitRuntime& asmjit::get_global_runtime()
|
||||
return g_rt;
|
||||
}
|
||||
|
||||
void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label abort)
|
||||
void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback)
|
||||
{
|
||||
Label fall = c.newLabel();
|
||||
Label begin = c.newLabel();
|
||||
c.jmp(begin);
|
||||
c.bind(fall);
|
||||
c.test(x86::eax, _XABORT_RETRY);
|
||||
c.jz(abort);
|
||||
c.jz(fallback);
|
||||
c.align(kAlignCode, 16);
|
||||
c.bind(begin);
|
||||
c.xbegin(fall);
|
||||
@ -25,8 +25,6 @@ void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code
|
||||
c.db(0xc6);
|
||||
c.db(0xf8);
|
||||
c.db(code);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.ret();
|
||||
}
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
|
@ -12,9 +12,9 @@ namespace asmjit
|
||||
JitRuntime& get_global_runtime();
|
||||
|
||||
// Emit xbegin and adjacent loop
|
||||
void build_transaction_enter(X86Assembler& c, Label abort);
|
||||
void build_transaction_enter(X86Assembler& c, Label fallback);
|
||||
|
||||
// Emit xabort and return zero
|
||||
// Emit xabort
|
||||
void build_transaction_abort(X86Assembler& c, unsigned char code);
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
|
||||
LARGE_INTEGER timeout;
|
||||
timeout.QuadPart = _timeout * -10;
|
||||
|
||||
if (HRESULT rc = NtWaitForKeyedEvent(nullptr, &m_value, false, is_inf ? nullptr : &timeout))
|
||||
if (HRESULT rc = _timeout ? NtWaitForKeyedEvent(nullptr, &m_value, false, is_inf ? nullptr : &timeout) : WAIT_TIMEOUT)
|
||||
{
|
||||
verify(HERE), rc == WAIT_TIMEOUT;
|
||||
|
||||
@ -32,6 +32,12 @@ bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
|
||||
|
||||
return true;
|
||||
#else
|
||||
if (!_timeout)
|
||||
{
|
||||
verify(HERE), m_value--;
|
||||
return false;
|
||||
}
|
||||
|
||||
timespec timeout;
|
||||
timeout.tv_sec = _timeout / 1000000;
|
||||
timeout.tv_nsec = (_timeout % 1000000) * 1000;
|
||||
|
@ -9,6 +9,8 @@ class cond_variable
|
||||
// Internal waiter counter
|
||||
atomic_t<u32> m_value{0};
|
||||
|
||||
friend class notifier;
|
||||
|
||||
protected:
|
||||
// Internal waiting function
|
||||
bool imp_wait(u32 _old, u64 _timeout) noexcept;
|
||||
@ -50,3 +52,94 @@ public:
|
||||
|
||||
static constexpr u64 max_timeout = u64{UINT32_MAX} / 1000 * 1000000;
|
||||
};
|
||||
|
||||
// Pair of a fake shared mutex (only limited shared locking) and a condition variable
|
||||
class notifier
|
||||
{
|
||||
atomic_t<u32> m_counter{0};
|
||||
cond_variable m_cond;
|
||||
|
||||
public:
|
||||
constexpr notifier() = default;
|
||||
|
||||
void lock_shared()
|
||||
{
|
||||
m_counter++;
|
||||
}
|
||||
|
||||
void unlock_shared()
|
||||
{
|
||||
const u32 counter = --m_counter;
|
||||
|
||||
if (counter & 0x7f)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (counter >= 0x80)
|
||||
{
|
||||
const u32 _old = m_counter.atomic_op([](u32& value) -> u32
|
||||
{
|
||||
if (value & 0x7f)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return std::exchange(value, 0) >> 7;
|
||||
});
|
||||
|
||||
if (_old && m_cond.m_value)
|
||||
{
|
||||
m_cond.imp_wake(_old);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
explicit_bool_t wait(u64 usec_timeout = -1)
|
||||
{
|
||||
const u32 _old = m_cond.m_value.fetch_add(1);
|
||||
|
||||
if (0x80 <= m_counter.fetch_op([](u32& value)
|
||||
{
|
||||
value--;
|
||||
|
||||
if (value >= 0x80)
|
||||
{
|
||||
value -= 0x80;
|
||||
}
|
||||
}))
|
||||
{
|
||||
// Return without waiting
|
||||
m_cond.imp_wait(_old, 0);
|
||||
m_counter++;
|
||||
return true;
|
||||
}
|
||||
|
||||
const bool res = m_cond.imp_wait(_old, usec_timeout);
|
||||
m_counter++;
|
||||
return res;
|
||||
}
|
||||
|
||||
void notify_all()
|
||||
{
|
||||
if (m_counter)
|
||||
{
|
||||
m_counter.atomic_op([](u32& value)
|
||||
{
|
||||
if (const u32 add = value & 0x7f)
|
||||
{
|
||||
// Mutex is locked in shared mode
|
||||
value += add << 7;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Mutex is unlocked
|
||||
value = 0;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Notify after imaginary "exclusive" lock+unlock
|
||||
m_cond.notify_all();
|
||||
}
|
||||
};
|
||||
|
@ -41,28 +41,5 @@ namespace utils
|
||||
|
||||
bool has_xop();
|
||||
|
||||
FORCE_INLINE bool transaction_enter(uint* out = nullptr)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
const uint status = _xbegin();
|
||||
|
||||
if (status == _XBEGIN_STARTED)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!(status & _XABORT_RETRY))
|
||||
{
|
||||
if (out)
|
||||
{
|
||||
*out = status;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_system_info();
|
||||
}
|
||||
|
@ -122,7 +122,7 @@ if(NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--allow-multiple-definition")
|
||||
endif()
|
||||
|
||||
add_compile_options(-msse -msse2 -mcx16 -mrtm)
|
||||
add_compile_options(-msse -msse2 -mcx16)
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
# This fixes 'some' of the st11range issues. See issue #2516
|
||||
|
@ -6,8 +6,6 @@
|
||||
#include "Emu/System.h"
|
||||
#include "MFC.h"
|
||||
|
||||
const bool s_use_rtm = utils::has_rtm();
|
||||
|
||||
template <>
|
||||
void fmt_class_string<MFC>::format(std::string& out, u64 arg)
|
||||
{
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "stdafx.h"
|
||||
#include "Utilities/VirtualMemory.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
#include "Utilities/JIT.h"
|
||||
#include "Crypto/sha1.h"
|
||||
#include "Emu/Memory/Memory.h"
|
||||
#include "Emu/System.h"
|
||||
@ -46,7 +47,6 @@
|
||||
#endif
|
||||
#include "define_new_memleakdetect.h"
|
||||
|
||||
#include "Utilities/JIT.h"
|
||||
#include "PPUTranslator.h"
|
||||
#include "Modules/cellMsgDialog.h"
|
||||
#endif
|
||||
@ -55,8 +55,6 @@
|
||||
#include <cfenv>
|
||||
#include "Utilities/GSL.h"
|
||||
|
||||
const bool s_use_rtm = utils::has_rtm();
|
||||
|
||||
const bool s_use_ssse3 =
|
||||
#ifdef _MSC_VER
|
||||
utils::has_ssse3();
|
||||
@ -713,7 +711,12 @@ ppu_thread::ppu_thread(const std::string& name, u32 prio, u32 stack)
|
||||
, m_name(name)
|
||||
{
|
||||
// Trigger the scheduler
|
||||
state += cpu_flag::suspend + cpu_flag::memory;
|
||||
state += cpu_flag::suspend;
|
||||
|
||||
if (!g_use_rtm)
|
||||
{
|
||||
state += cpu_flag::memory;
|
||||
}
|
||||
}
|
||||
|
||||
void ppu_thread::cmd_push(cmd64 cmd)
|
||||
@ -942,7 +945,7 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr)
|
||||
ppu.raddr = addr;
|
||||
|
||||
// Do several attemps
|
||||
for (uint i = 0; i < 5; i++)
|
||||
for (uint i = 0; g_use_rtm || i < 5; i++)
|
||||
{
|
||||
ppu.rtime = vm::reservation_acquire(addr, sizeof(T));
|
||||
_mm_lfence();
|
||||
@ -978,6 +981,57 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
|
||||
return ppu_load_acquire_reservation<u64>(ppu, addr);
|
||||
}
|
||||
|
||||
const auto ppu_stwcx_tx = build_function_asm<int(*)(u32 raddr, u64 rtime, u64 rdata, u32 value)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 7);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
||||
c.bswap(args[2].r32());
|
||||
c.bswap(args[3].r32());
|
||||
|
||||
// Touch memory (heavyweight)
|
||||
c.lock().add(x86::dword_ptr(x86::r11), 0);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.lock().xadd(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.cmp(x86::rax, args[1]);
|
||||
c.jne(fail);
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
c.cmp(x86::qword_ptr(x86::r10), args[1]);
|
||||
c.jne(fail);
|
||||
c.cmp(x86::dword_ptr(x86::r11), args[2].r32());
|
||||
c.jne(fail);
|
||||
c.mov(x86::dword_ptr(x86::r11), args[3].r32());
|
||||
c.rdtsc(); // destroys args[1] or args[2]
|
||||
c.shl(x86::rdx, 33);
|
||||
c.shl(x86::rax, 1);
|
||||
c.or_(x86::rax, x86::rdx);
|
||||
c.mov(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.xend();
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.ret();
|
||||
|
||||
c.bind(fail);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.or_(x86::eax, -1);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
||||
{
|
||||
atomic_be_t<u32>& data = vm::_ref<atomic_be_t<u32>>(addr);
|
||||
@ -988,24 +1042,31 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
if (g_use_rtm)
|
||||
{
|
||||
if (!vm::g_mutex.is_lockable() || vm::g_mutex.is_reading())
|
||||
// Do several attempts (TODO)
|
||||
for (u32 i = 0; i < 5; i++)
|
||||
{
|
||||
_xabort(0);
|
||||
const int r = ppu_stwcx_tx(addr, ppu.rtime, ppu.rdata, reg_value);
|
||||
|
||||
if (r > 0)
|
||||
{
|
||||
vm::reservation_notifier(addr, sizeof(u32)).notify_all();
|
||||
ppu.raddr = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (r < 0)
|
||||
{
|
||||
// Reservation lost
|
||||
ppu.raddr = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast<u32>(ppu.rdata), reg_value);
|
||||
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u32));
|
||||
vm::notify(addr, sizeof(u32));
|
||||
}
|
||||
|
||||
_xend();
|
||||
// Give up
|
||||
ppu.raddr = 0;
|
||||
return result;
|
||||
return false;
|
||||
}
|
||||
|
||||
vm::writer_lock lock(0);
|
||||
@ -1015,13 +1076,64 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u32));
|
||||
vm::notify(addr, sizeof(u32));
|
||||
vm::reservation_notifier(addr, sizeof(u32)).notify_all();
|
||||
}
|
||||
|
||||
ppu.raddr = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
const auto ppu_stdcx_tx = build_function_asm<int(*)(u32 raddr, u64 rtime, u64 rdata, u64 value)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 7);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
||||
c.bswap(args[2]);
|
||||
c.bswap(args[3]);
|
||||
|
||||
// Touch memory (heavyweight)
|
||||
c.lock().add(x86::qword_ptr(x86::r11), 0);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.lock().xadd(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.cmp(x86::rax, args[1]);
|
||||
c.jne(fail);
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
c.cmp(x86::qword_ptr(x86::r10), args[1]);
|
||||
c.jne(fail);
|
||||
c.cmp(x86::qword_ptr(x86::r11), args[2]);
|
||||
c.jne(fail);
|
||||
c.mov(x86::qword_ptr(x86::r11), args[3]);
|
||||
c.rdtsc(); // destroys args[1] or args[2]
|
||||
c.shl(x86::rdx, 33);
|
||||
c.shl(x86::rax, 1);
|
||||
c.or_(x86::rax, x86::rdx);
|
||||
c.mov(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.xend();
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.ret();
|
||||
|
||||
c.bind(fail);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.or_(x86::eax, -1);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
{
|
||||
atomic_be_t<u64>& data = vm::_ref<atomic_be_t<u64>>(addr);
|
||||
@ -1032,24 +1144,31 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
if (g_use_rtm)
|
||||
{
|
||||
if (!vm::g_mutex.is_lockable() || vm::g_mutex.is_reading())
|
||||
// Do several attempts (TODO)
|
||||
for (u32 i = 0; i < 5; i++)
|
||||
{
|
||||
_xabort(0);
|
||||
const int r = ppu_stdcx_tx(addr, ppu.rtime, ppu.rdata, reg_value);
|
||||
|
||||
if (r > 0)
|
||||
{
|
||||
vm::reservation_notifier(addr, sizeof(u64)).notify_all();
|
||||
ppu.raddr = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (r < 0)
|
||||
{
|
||||
// Reservation lost
|
||||
ppu.raddr = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value);
|
||||
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u64));
|
||||
vm::notify(addr, sizeof(u64));
|
||||
}
|
||||
|
||||
_xend();
|
||||
// Give up
|
||||
ppu.raddr = 0;
|
||||
return result;
|
||||
return false;
|
||||
}
|
||||
|
||||
vm::writer_lock lock(0);
|
||||
@ -1059,7 +1178,7 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u64));
|
||||
vm::notify(addr, sizeof(u64));
|
||||
vm::reservation_notifier(addr, sizeof(u64)).notify_all();
|
||||
}
|
||||
|
||||
ppu.raddr = 0;
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "stdafx.h"
|
||||
#include "Utilities/JIT.h"
|
||||
#include "Utilities/lockless.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
#include "Emu/Memory/Memory.h"
|
||||
@ -22,8 +23,7 @@
|
||||
#include <cfenv>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
const bool s_use_rtm = utils::has_rtm();
|
||||
#include <shared_mutex>
|
||||
|
||||
const bool s_use_ssse3 =
|
||||
#ifdef _MSC_VER
|
||||
@ -213,6 +213,175 @@ namespace spu
|
||||
}
|
||||
}
|
||||
|
||||
const auto spu_putllc_tx = build_function_asm<int(*)(u32 raddr, u64 rtime, const void* _old, const void* _new)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 4);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0]));
|
||||
|
||||
// Touch memory (heavyweight)
|
||||
c.mov(x86::eax, x86::dword_ptr(args[2]));
|
||||
c.mov(x86::eax, x86::dword_ptr(args[3]));
|
||||
c.lock().add(x86::qword_ptr(x86::r11), 0);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.lock().xadd(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.cmp(x86::rax, args[1]);
|
||||
c.jne(fail);
|
||||
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(args[2], 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(args[2], 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(args[2], 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(args[2], 96));
|
||||
#ifndef _WIN32
|
||||
c.vmovups(x86::ymm6, x86::yword_ptr(args[3], 0));
|
||||
c.vmovups(x86::ymm7, x86::yword_ptr(args[3], 32));
|
||||
c.vmovups(x86::ymm8, x86::yword_ptr(args[3], 64));
|
||||
c.vmovups(x86::ymm9, x86::yword_ptr(args[3], 96));
|
||||
#endif
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
c.cmp(x86::qword_ptr(x86::r10), args[1]);
|
||||
c.jne(fail);
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(x86::r11, 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(x86::r11, 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(x86::r11, 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(x86::r11, 96));
|
||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
||||
c.vptest(x86::ymm0, x86::ymm0);
|
||||
c.jnz(fail);
|
||||
#ifdef _WIN32
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(args[3], 0));
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 0), x86::ymm0);
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(args[3], 32));
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 32), x86::ymm1);
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(args[3], 64));
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 64), x86::ymm2);
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(args[3], 96));
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 96), x86::ymm3);
|
||||
#else
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 0), x86::ymm6);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 32), x86::ymm7);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 64), x86::ymm8);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 96), x86::ymm9);
|
||||
#endif
|
||||
c.rdtsc(); // destroys args[1] or args[2]
|
||||
c.shl(x86::rdx, 33);
|
||||
c.shl(x86::rax, 1);
|
||||
c.or_(x86::rax, x86::rdx);
|
||||
c.mov(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.xend();
|
||||
c.vzeroupper();
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.ret();
|
||||
|
||||
c.bind(fail);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.or_(x86::eax, -1);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 4);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0]));
|
||||
|
||||
// Touch memory
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r11));
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r10));
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r10));
|
||||
c.vmovaps(x86::ymm0, x86::yword_ptr(x86::r11, 0));
|
||||
c.vmovaps(x86::ymm1, x86::yword_ptr(x86::r11, 32));
|
||||
c.vmovaps(x86::ymm2, x86::yword_ptr(x86::r11, 64));
|
||||
c.vmovaps(x86::ymm3, x86::yword_ptr(x86::r11, 96));
|
||||
c.xend();
|
||||
c.vmovups(x86::yword_ptr(args[1], 0), x86::ymm0);
|
||||
c.vmovups(x86::yword_ptr(args[1], 32), x86::ymm1);
|
||||
c.vmovups(x86::yword_ptr(args[1], 64), x86::ymm2);
|
||||
c.vmovups(x86::yword_ptr(args[1], 96), x86::ymm3);
|
||||
c.vzeroupper();
|
||||
c.ret();
|
||||
|
||||
c.bind(fall);
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_putlluc_tx = build_function_asm<bool(*)(u32 raddr, const void* rdata)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
|
||||
// Prepare registers
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 4);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0]));
|
||||
|
||||
// Touch memory (heavyweight)
|
||||
c.lock().add(x86::qword_ptr(x86::r11), 0);
|
||||
c.lock().add(x86::qword_ptr(x86::r10), 0);
|
||||
|
||||
// Prepare data
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(args[1], 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(args[1], 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(args[1], 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(args[1], 96));
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 96), x86::ymm3);
|
||||
c.rdtsc(); // destroys args[1] or args[2]
|
||||
c.shl(x86::rdx, 33);
|
||||
c.shl(x86::rax, 1);
|
||||
c.or_(x86::rax, x86::rdx);
|
||||
c.mov(x86::qword_ptr(x86::r10), x86::rax);
|
||||
c.xend();
|
||||
c.vzeroupper();
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.bind(fall);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
void spu_int_ctrl_t::set(u64 ints)
|
||||
{
|
||||
// leave only enabled interrupts
|
||||
@ -516,10 +685,12 @@ void SPUThread::cpu_task()
|
||||
|
||||
void SPUThread::cpu_mem()
|
||||
{
|
||||
//vm::passive_lock(*this);
|
||||
}
|
||||
|
||||
void SPUThread::cpu_unmem()
|
||||
{
|
||||
//state.test_and_set(cpu_flag::memory);
|
||||
}
|
||||
|
||||
SPUThread::~SPUThread()
|
||||
@ -881,42 +1052,17 @@ void SPUThread::do_putlluc(const spu_mfc_cmd& args)
|
||||
vm::reservation_acquire(addr, 128);
|
||||
|
||||
// Store unconditionally
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
while (g_use_rtm)
|
||||
{
|
||||
// First transaction attempt
|
||||
if (!vm::g_mutex.is_lockable() || vm::g_mutex.is_reading())
|
||||
if (spu_putlluc_tx(addr, to_write.data()))
|
||||
{
|
||||
_xabort(0);
|
||||
vm::reservation_notifier(addr, 128).notify_all();
|
||||
tx_success++;
|
||||
return;
|
||||
}
|
||||
|
||||
data = to_write;
|
||||
vm::reservation_update(addr, 128);
|
||||
vm::notify(addr, 128);
|
||||
_xend();
|
||||
return;
|
||||
}
|
||||
else if (s_use_rtm)
|
||||
{
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
if (utils::transaction_enter())
|
||||
{
|
||||
// Second transaction attempt
|
||||
data = to_write;
|
||||
vm::reservation_update(addr, 128);
|
||||
_xend();
|
||||
}
|
||||
else
|
||||
{
|
||||
vm::reservation_update(addr, 128, true);
|
||||
_mm_sfence();
|
||||
data = to_write;
|
||||
_mm_sfence();
|
||||
vm::reservation_update(addr, 128);
|
||||
}
|
||||
|
||||
vm::notify(addr, 128);
|
||||
return;
|
||||
busy_wait(300);
|
||||
tx_failure++;
|
||||
}
|
||||
|
||||
vm::writer_lock lock(0);
|
||||
@ -925,7 +1071,7 @@ void SPUThread::do_putlluc(const spu_mfc_cmd& args)
|
||||
data = to_write;
|
||||
_mm_sfence();
|
||||
vm::reservation_update(addr, 128);
|
||||
vm::notify(addr, 128);
|
||||
vm::reservation_notifier(addr, 128).notify_all();
|
||||
}
|
||||
|
||||
void SPUThread::do_mfc(bool wait)
|
||||
@ -970,7 +1116,7 @@ void SPUThread::do_mfc(bool wait)
|
||||
{
|
||||
if (!test(ch_stall_mask, mask))
|
||||
{
|
||||
if (s_use_rtm)
|
||||
if (g_use_rtm)
|
||||
{
|
||||
if (do_list_transfer(args))
|
||||
{
|
||||
@ -1002,7 +1148,7 @@ void SPUThread::do_mfc(bool wait)
|
||||
|
||||
if (args.size)
|
||||
{
|
||||
if (s_use_rtm)
|
||||
if (g_use_rtm)
|
||||
{
|
||||
do_dma_transfer(args);
|
||||
}
|
||||
@ -1067,13 +1213,6 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
// Stall infinitely if MFC queue is full
|
||||
while (UNLIKELY(mfc_size >= 16))
|
||||
{
|
||||
do_mfc();
|
||||
|
||||
if (mfc_size < 16)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (test(state, cpu_flag::stop))
|
||||
{
|
||||
return false;
|
||||
@ -1102,18 +1241,11 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
|
||||
if (is_polling)
|
||||
{
|
||||
vm::waiter waiter;
|
||||
waiter.owner = this;
|
||||
waiter.addr = raddr;
|
||||
waiter.size = 128;
|
||||
waiter.stamp = rtime;
|
||||
waiter.data = rdata.data();
|
||||
waiter.init();
|
||||
rtime = vm::reservation_acquire(raddr, 128);
|
||||
_mm_lfence();
|
||||
|
||||
while (vm::reservation_acquire(raddr, 128) == waiter.stamp && rdata == data)
|
||||
while (vm::reservation_acquire(raddr, 128) == rtime && rdata == data)
|
||||
{
|
||||
vm::temporary_unlock(*this);
|
||||
|
||||
if (test(state, cpu_flag::stop))
|
||||
{
|
||||
break;
|
||||
@ -1123,8 +1255,23 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
}
|
||||
}
|
||||
|
||||
while (g_use_rtm)
|
||||
{
|
||||
rtime = spu_getll_tx(raddr, rdata.data());
|
||||
|
||||
if (rtime & 1)
|
||||
{
|
||||
tx_failure++;
|
||||
busy_wait(300);
|
||||
continue;
|
||||
}
|
||||
|
||||
tx_success++;
|
||||
break;
|
||||
}
|
||||
|
||||
// Do several attemps
|
||||
for (uint i = 0; i < 5; i++)
|
||||
for (uint i = 0; !g_use_rtm && i < 5; i++)
|
||||
{
|
||||
rtime = vm::reservation_acquire(raddr, 128);
|
||||
_mm_lfence();
|
||||
@ -1147,19 +1294,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
busy_wait(300);
|
||||
}
|
||||
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
{
|
||||
rtime = vm::reservation_acquire(raddr, 128);
|
||||
|
||||
if (rtime & 1)
|
||||
{
|
||||
_xabort(0);
|
||||
}
|
||||
|
||||
rdata = data;
|
||||
_xend();
|
||||
}
|
||||
else
|
||||
if (!g_use_rtm)
|
||||
{
|
||||
vm::reader_lock lock;
|
||||
rtime = vm::reservation_acquire(raddr, 128);
|
||||
@ -1182,63 +1317,25 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
|
||||
if (raddr == args.eal && rtime == vm::reservation_acquire(raddr, 128))
|
||||
{
|
||||
// TODO: vm::check_addr
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
if (g_use_rtm)
|
||||
{
|
||||
// First transaction attempt
|
||||
if (!vm::g_mutex.is_lockable() || vm::g_mutex.is_reading())
|
||||
// Do several attempts (TODO)
|
||||
for (u32 i = 0;; i++)
|
||||
{
|
||||
_xabort(0);
|
||||
}
|
||||
const int r = spu_putllc_tx(raddr, rtime, rdata.data(), to_write.data());
|
||||
|
||||
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
|
||||
{
|
||||
data = to_write;
|
||||
result = true;
|
||||
|
||||
vm::reservation_update(raddr, 128);
|
||||
vm::notify(raddr, 128);
|
||||
}
|
||||
|
||||
_xend();
|
||||
tx_success++;
|
||||
}
|
||||
else if (s_use_rtm)
|
||||
{
|
||||
// Second transaction attempt
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
// Touch memory without modifying the value
|
||||
vm::_ref<atomic_t<u32>>(args.eal) += 0;
|
||||
|
||||
// Touch reservation memory area as well
|
||||
vm::reservation_acquire(raddr, 128) += 0;
|
||||
|
||||
if (utils::transaction_enter(&tx_status))
|
||||
{
|
||||
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
|
||||
if (r > 0)
|
||||
{
|
||||
data = to_write;
|
||||
vm::reservation_notifier(raddr, 128).notify_all();
|
||||
result = true;
|
||||
|
||||
vm::reservation_update(raddr, 128);
|
||||
tx_success++;
|
||||
break;
|
||||
}
|
||||
|
||||
_xend();
|
||||
tx_success++;
|
||||
|
||||
if (result)
|
||||
if (r < 0)
|
||||
{
|
||||
// First transaction attempt usually fails on vm::notify
|
||||
vm::notify(raddr, 128);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Workaround MSVC
|
||||
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
|
||||
{
|
||||
vm::reservation_update(raddr, 128);
|
||||
// Reservation lost
|
||||
break;
|
||||
}
|
||||
|
||||
// Don't fallback to heavyweight lock, just give up
|
||||
@ -1248,6 +1345,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
else if (rdata == data)
|
||||
{
|
||||
// Full lock (heavyweight)
|
||||
// TODO: vm::check_addr
|
||||
vm::writer_lock lock(1);
|
||||
|
||||
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
|
||||
@ -1259,12 +1357,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
result = true;
|
||||
|
||||
vm::reservation_update(raddr, 128);
|
||||
vm::notify(raddr, 128);
|
||||
tx_success++;
|
||||
}
|
||||
else
|
||||
{
|
||||
tx_failure++;
|
||||
vm::reservation_notifier(raddr, 128).notify_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1332,7 +1425,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
{
|
||||
if (LIKELY(args.size))
|
||||
{
|
||||
if (s_use_rtm)
|
||||
if (g_use_rtm)
|
||||
{
|
||||
do_dma_transfer(args);
|
||||
return true;
|
||||
@ -1377,7 +1470,7 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args)
|
||||
{
|
||||
if (LIKELY(do_dma_check(args) && !test(ch_stall_mask, 1u << args.tag)))
|
||||
{
|
||||
if (s_use_rtm)
|
||||
if (g_use_rtm)
|
||||
{
|
||||
if (LIKELY(do_list_transfer(args)))
|
||||
{
|
||||
@ -1531,14 +1624,7 @@ s64 SPUThread::get_ch_value(u32 ch)
|
||||
{
|
||||
for (int i = 0; i < 10 && channel.get_count() == 0; i++)
|
||||
{
|
||||
// if (!s_use_rtm && mfc_size && !i)
|
||||
// {
|
||||
// do_mfc();
|
||||
// }
|
||||
// else
|
||||
{
|
||||
busy_wait();
|
||||
}
|
||||
busy_wait();
|
||||
}
|
||||
|
||||
u32 out;
|
||||
@ -1568,14 +1654,7 @@ s64 SPUThread::get_ch_value(u32 ch)
|
||||
{
|
||||
for (int i = 0; i < 10 && ch_in_mbox.get_count() == 0; i++)
|
||||
{
|
||||
// if (!s_use_rtm && mfc_size && !i)
|
||||
// {
|
||||
// do_mfc();
|
||||
// }
|
||||
// else
|
||||
{
|
||||
busy_wait();
|
||||
}
|
||||
busy_wait();
|
||||
}
|
||||
|
||||
u32 out;
|
||||
@ -1601,11 +1680,6 @@ s64 SPUThread::get_ch_value(u32 ch)
|
||||
|
||||
case MFC_RdTagStat:
|
||||
{
|
||||
// if (!s_use_rtm && mfc_size)
|
||||
// {
|
||||
// do_mfc();
|
||||
// }
|
||||
|
||||
if (ch_tag_stat.get_count())
|
||||
{
|
||||
u32 out = ch_tag_stat.get_value();
|
||||
@ -1676,11 +1750,6 @@ s64 SPUThread::get_ch_value(u32 ch)
|
||||
|
||||
case SPU_RdEventStat:
|
||||
{
|
||||
// if (!s_use_rtm && mfc_size)
|
||||
// {
|
||||
// do_mfc();
|
||||
// }
|
||||
|
||||
u32 res = get_events();
|
||||
|
||||
if (res)
|
||||
@ -1688,19 +1757,31 @@ s64 SPUThread::get_ch_value(u32 ch)
|
||||
return res;
|
||||
}
|
||||
|
||||
vm::waiter waiter;
|
||||
const u32 mask1 = ch_event_mask;
|
||||
|
||||
if (ch_event_mask & SPU_EVENT_LR)
|
||||
if (mask1 & SPU_EVENT_LR && raddr)
|
||||
{
|
||||
waiter.owner = this;
|
||||
waiter.addr = raddr;
|
||||
waiter.size = 128;
|
||||
waiter.stamp = rtime;
|
||||
waiter.data = rdata.data();
|
||||
waiter.init();
|
||||
if (mask1 != SPU_EVENT_LR)
|
||||
{
|
||||
fmt::throw_exception("Not supported: event mask 0x%x" HERE, mask1);
|
||||
}
|
||||
|
||||
std::shared_lock<notifier> pseudo_lock(vm::reservation_notifier(raddr, 128));
|
||||
|
||||
while (res = get_events(), !res)
|
||||
{
|
||||
if (test(state, cpu_flag::stop + cpu_flag::dbg_global_stop))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
pseudo_lock.mutex()->wait(100);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
while (!(res = get_events(true)))
|
||||
while (res = get_events(true), !res)
|
||||
{
|
||||
if (test(state & cpu_flag::stop))
|
||||
{
|
||||
@ -1738,11 +1819,6 @@ bool SPUThread::set_ch_value(u32 ch, u32 value)
|
||||
|
||||
case SPU_WrOutIntrMbox:
|
||||
{
|
||||
// if (!s_use_rtm && mfc_size)
|
||||
// {
|
||||
// do_mfc(false);
|
||||
// }
|
||||
|
||||
if (offset >= RAW_SPU_BASE_ADDR)
|
||||
{
|
||||
while (!ch_out_intr_mbox.try_push(value))
|
||||
@ -1891,11 +1967,6 @@ bool SPUThread::set_ch_value(u32 ch, u32 value)
|
||||
|
||||
case SPU_WrOutMbox:
|
||||
{
|
||||
// if (!s_use_rtm && mfc_size)
|
||||
// {
|
||||
// do_mfc(false);
|
||||
// }
|
||||
|
||||
while (!ch_out_mbox.try_push(value))
|
||||
{
|
||||
if (test(state & cpu_flag::stop))
|
||||
@ -1939,11 +2010,6 @@ bool SPUThread::set_ch_value(u32 ch, u32 value)
|
||||
break;
|
||||
}
|
||||
|
||||
// if (!s_use_rtm && mfc_size)
|
||||
// {
|
||||
// do_mfc(false);
|
||||
// }
|
||||
|
||||
const u32 completed = get_mfc_completed();
|
||||
|
||||
if (!value)
|
||||
@ -2066,11 +2132,6 @@ bool SPUThread::stop_and_signal(u32 code)
|
||||
{
|
||||
LOG_TRACE(SPU, "stop_and_signal(code=0x%x)", code);
|
||||
|
||||
// if (!s_use_rtm && mfc_size)
|
||||
// {
|
||||
// do_mfc();
|
||||
// }
|
||||
|
||||
if (offset >= RAW_SPU_BASE_ADDR)
|
||||
{
|
||||
status.atomic_op([code](u32& status)
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "Memory.h"
|
||||
#include "Emu/System.h"
|
||||
#include "Utilities/mutex.h"
|
||||
#include "Utilities/cond.h"
|
||||
#include "Utilities/Thread.h"
|
||||
#include "Utilities/VirtualMemory.h"
|
||||
#include "Emu/CPU/CPUThread.h"
|
||||
@ -10,6 +11,8 @@
|
||||
#include <atomic>
|
||||
#include <deque>
|
||||
|
||||
static_assert(sizeof(notifier) == 8, "Unexpected size of notifier");
|
||||
|
||||
namespace vm
|
||||
{
|
||||
static u8* memory_reserve_4GiB(std::uintptr_t _addr = 0)
|
||||
@ -38,12 +41,12 @@ namespace vm
|
||||
// Reservation stats (compressed x16)
|
||||
u8* const g_reservations = memory_reserve_4GiB((std::uintptr_t)g_stat_addr);
|
||||
|
||||
// Reservation sync variables
|
||||
u8* const g_reservations2 = g_reservations + 0x10000000;
|
||||
|
||||
// Memory locations
|
||||
std::vector<std::shared_ptr<block_t>> g_locations;
|
||||
|
||||
// Registered waiters
|
||||
std::deque<vm::waiter*> g_waiters;
|
||||
|
||||
// Memory mutex core
|
||||
shared_mutex g_mutex;
|
||||
|
||||
@ -239,65 +242,6 @@ namespace vm
|
||||
// Memory pages
|
||||
std::array<memory_page, 0x100000000 / 4096> g_pages{};
|
||||
|
||||
void waiter::init()
|
||||
{
|
||||
// Register waiter
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
g_waiters.emplace_back(this);
|
||||
}
|
||||
|
||||
void waiter::test() const
|
||||
{
|
||||
if (std::memcmp(data, vm::base(addr), size) == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (stamp >= reservation_acquire(addr, size))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (owner)
|
||||
{
|
||||
owner->notify();
|
||||
}
|
||||
}
|
||||
|
||||
waiter::~waiter()
|
||||
{
|
||||
// Unregister waiter
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
// Find waiter
|
||||
const auto found = std::find(g_waiters.cbegin(), g_waiters.cend(), this);
|
||||
|
||||
if (found != g_waiters.cend())
|
||||
{
|
||||
g_waiters.erase(found);
|
||||
}
|
||||
}
|
||||
|
||||
void notify(u32 addr, u32 size)
|
||||
{
|
||||
for (const waiter* ptr : g_waiters)
|
||||
{
|
||||
if (ptr->addr / 128 == addr / 128)
|
||||
{
|
||||
ptr->test();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void notify_all()
|
||||
{
|
||||
for (const waiter* ptr : g_waiters)
|
||||
{
|
||||
ptr->test();
|
||||
}
|
||||
}
|
||||
|
||||
static void _page_map(u32 addr, u8 flags, utils::shm& shm)
|
||||
{
|
||||
const u32 size = shm.size();
|
||||
@ -539,6 +483,7 @@ namespace vm
|
||||
if (addr != 0xc0000000 && addr != 0xe0000000)
|
||||
{
|
||||
utils::memory_commit(g_reservations + addr / 16, size / 16);
|
||||
utils::memory_commit(g_reservations2 + addr / 16, size / 16);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
class shared_mutex;
|
||||
class named_thread;
|
||||
class cpu_thread;
|
||||
class notifier;
|
||||
|
||||
namespace vm
|
||||
{
|
||||
@ -15,6 +16,7 @@ namespace vm
|
||||
extern u8* const g_exec_addr;
|
||||
extern u8* const g_stat_addr;
|
||||
extern u8* const g_reservations;
|
||||
extern u8* const g_reservations2;
|
||||
|
||||
enum memory_location_t : uint
|
||||
{
|
||||
@ -41,24 +43,6 @@ namespace vm
|
||||
page_allocated = (1 << 7),
|
||||
};
|
||||
|
||||
struct waiter
|
||||
{
|
||||
named_thread* owner;
|
||||
u32 addr;
|
||||
u32 size;
|
||||
u64 stamp;
|
||||
const void* data;
|
||||
|
||||
waiter() = default;
|
||||
|
||||
waiter(const waiter&) = delete;
|
||||
|
||||
void init();
|
||||
void test() const;
|
||||
|
||||
~waiter();
|
||||
};
|
||||
|
||||
// Address type
|
||||
enum addr_t : u32 {};
|
||||
|
||||
@ -112,14 +96,14 @@ namespace vm
|
||||
inline void reservation_update(u32 addr, u32 size, bool lsb = false)
|
||||
{
|
||||
// Update reservation info with new timestamp
|
||||
reservation_acquire(addr, size) = (__rdtsc() & -2) | u64{lsb};
|
||||
reservation_acquire(addr, size) = (__rdtsc() << 1) | u64{lsb};
|
||||
}
|
||||
|
||||
// Check and notify memory changes at address
|
||||
void notify(u32 addr, u32 size);
|
||||
|
||||
// Check and notify memory changes
|
||||
void notify_all();
|
||||
// Get reservation sync variable
|
||||
inline notifier& reservation_notifier(u32 addr, u32 size)
|
||||
{
|
||||
return *reinterpret_cast<notifier*>(g_reservations2 + addr / 16);
|
||||
}
|
||||
|
||||
// Change memory protection of specified memory region
|
||||
bool page_protect(u32 addr, u32 size, u8 flags_test = 0, u8 flags_set = 0, u8 flags_clear = 0);
|
||||
|
@ -118,16 +118,20 @@ namespace rsx
|
||||
rsx->sync_point_request = true;
|
||||
const u32 addr = get_address(method_registers.semaphore_offset_406e(), method_registers.semaphore_context_dma_406e());
|
||||
|
||||
if (addr >> 28 == 0x4)
|
||||
if (g_use_rtm || addr >> 28 == 0x4)
|
||||
{
|
||||
// TODO: check no reservation area instead
|
||||
vm::write32(addr, arg);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
vm::reader_lock lock;
|
||||
vm::write32(addr, arg);
|
||||
}
|
||||
|
||||
vm::reader_lock lock;
|
||||
vm::write32(addr, arg);
|
||||
vm::notify(addr, 4);
|
||||
if (addr >> 28 != 0x4)
|
||||
{
|
||||
vm::reservation_notifier(addr, 4).notify_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "Loader/ELF.h"
|
||||
|
||||
#include "Utilities/StrUtil.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
|
||||
#include "../Crypto/unself.h"
|
||||
#include "../Crypto/unpkg.h"
|
||||
@ -40,6 +41,8 @@
|
||||
|
||||
cfg_root g_cfg;
|
||||
|
||||
bool g_use_rtm = utils::has_rtm();
|
||||
|
||||
std::string g_cfg_defaults;
|
||||
|
||||
extern atomic_t<u32> g_thread_count;
|
||||
|
@ -456,3 +456,5 @@ struct cfg_root : cfg::node
|
||||
};
|
||||
|
||||
extern cfg_root g_cfg;
|
||||
|
||||
extern bool g_use_rtm;
|
||||
|
Loading…
Reference in New Issue
Block a user