mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-24 19:52:37 +01:00
Rewrite cpu_thread::suspend_all
Now it's a function of higher order. Make only one thread do the hard work of thread pausing.
This commit is contained in:
parent
6d83c9cc0e
commit
050c3e1d6b
@ -201,7 +201,7 @@ asmjit::JitRuntime& asmjit::get_global_runtime()
|
||||
return g_rt;
|
||||
}
|
||||
|
||||
void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
|
||||
asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
|
||||
{
|
||||
Label fall = c.newLabel();
|
||||
Label begin = c.newLabel();
|
||||
@ -234,7 +234,10 @@ void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fall
|
||||
c.jae(fallback);
|
||||
c.align(kAlignCode, 16);
|
||||
c.bind(begin);
|
||||
c.xbegin(fall);
|
||||
return fall;
|
||||
|
||||
// xbegin should be issued manually, allows to add more check before entering transaction
|
||||
//c.xbegin(fall);
|
||||
}
|
||||
|
||||
void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code)
|
||||
|
@ -56,7 +56,7 @@ namespace asmjit
|
||||
asmjit::JitRuntime& get_global_runtime();
|
||||
|
||||
// Emit xbegin and adjacent loop, return label at xbegin
|
||||
void build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than);
|
||||
[[nodiscard]] asmjit::Label build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than);
|
||||
|
||||
// Emit xabort
|
||||
void build_transaction_abort(X86Assembler& c, unsigned char code);
|
||||
|
@ -39,84 +39,6 @@ void shared_mutex::imp_unlock_shared(u32 old)
|
||||
}
|
||||
}
|
||||
|
||||
void shared_mutex::imp_lock_low(u32 val)
|
||||
{
|
||||
verify("shared_mutex underflow" HERE), val < c_err;
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
|
||||
if (try_lock_low())
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire writer lock and downgrade
|
||||
const u32 old = m_value.fetch_add(c_one);
|
||||
|
||||
if (old == 0)
|
||||
{
|
||||
lock_downgrade();
|
||||
return;
|
||||
}
|
||||
|
||||
verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
|
||||
imp_wait();
|
||||
lock_downgrade();
|
||||
}
|
||||
|
||||
void shared_mutex::imp_unlock_low(u32 old)
|
||||
{
|
||||
verify("shared_mutex underflow" HERE), old - 1 < c_err;
|
||||
|
||||
// Check reader count, notify the writer if necessary
|
||||
if ((old - 1) % c_vip == 0)
|
||||
{
|
||||
imp_signal();
|
||||
}
|
||||
}
|
||||
|
||||
void shared_mutex::imp_lock_vip(u32 val)
|
||||
{
|
||||
verify("shared_mutex underflow" HERE), val < c_err;
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
|
||||
if (try_lock_vip())
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire writer lock and downgrade
|
||||
const u32 old = m_value.fetch_add(c_one);
|
||||
|
||||
if (old == 0)
|
||||
{
|
||||
lock_downgrade_to_vip();
|
||||
return;
|
||||
}
|
||||
|
||||
verify("shared_mutex overflow" HERE), (old % c_sig) + c_one < c_sig;
|
||||
imp_wait();
|
||||
lock_downgrade_to_vip();
|
||||
}
|
||||
|
||||
void shared_mutex::imp_unlock_vip(u32 old)
|
||||
{
|
||||
verify("shared_mutex underflow" HERE), old - 1 < c_err;
|
||||
|
||||
// Check reader count, notify the writer if necessary
|
||||
if ((old - 1) % c_one / c_vip == 0)
|
||||
{
|
||||
imp_signal();
|
||||
}
|
||||
}
|
||||
|
||||
void shared_mutex::imp_wait()
|
||||
{
|
||||
while (true)
|
||||
@ -241,18 +163,3 @@ void shared_mutex::imp_lock_unlock()
|
||||
imp_wait();
|
||||
unlock();
|
||||
}
|
||||
|
||||
bool shared_mutex::downgrade_unique_vip_lock_to_low_or_unlock()
|
||||
{
|
||||
return m_value.atomic_op([](u32& value)
|
||||
{
|
||||
if (value % c_one / c_vip == 1)
|
||||
{
|
||||
value -= c_vip - 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
value -= c_vip;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
@ -12,17 +12,12 @@ class shared_mutex final
|
||||
c_one = 1u << 14, // Fixed-point 1.0 value (one writer, max_readers = c_one - 1)
|
||||
c_sig = 1u << 30,
|
||||
c_err = 1u << 31,
|
||||
c_vip = 1u << 7,
|
||||
};
|
||||
|
||||
atomic_t<u32> m_value{};
|
||||
|
||||
void imp_lock_shared(u32 val);
|
||||
void imp_unlock_shared(u32 old);
|
||||
void imp_lock_low(u32 val);
|
||||
void imp_unlock_low(u32 old);
|
||||
void imp_lock_vip(u32 val);
|
||||
void imp_unlock_vip(u32 old);
|
||||
void imp_wait();
|
||||
void imp_signal();
|
||||
void imp_lock(u32 val);
|
||||
@ -88,64 +83,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool try_lock_low()
|
||||
{
|
||||
const u32 value = m_value.load();
|
||||
|
||||
// Conditional increment
|
||||
return value < c_vip - 1 && m_value.compare_and_swap_test(value, value + 1);
|
||||
}
|
||||
|
||||
void lock_low()
|
||||
{
|
||||
const u32 value = m_value.load();
|
||||
|
||||
if (value >= c_vip - 1 || !m_value.compare_and_swap_test(value, value + 1)) [[unlikely]]
|
||||
{
|
||||
imp_lock_low(value);
|
||||
}
|
||||
}
|
||||
|
||||
void unlock_low()
|
||||
{
|
||||
// Unconditional decrement (can result in broken state)
|
||||
const u32 value = m_value.fetch_sub(1);
|
||||
|
||||
if (value >= c_one) [[unlikely]]
|
||||
{
|
||||
imp_unlock_low(value);
|
||||
}
|
||||
}
|
||||
|
||||
bool try_lock_vip()
|
||||
{
|
||||
const u32 value = m_value.load();
|
||||
|
||||
// Conditional increment
|
||||
return (value < c_one - 1 || value & (c_one - c_vip)) && (value % c_vip) == 0 && m_value.compare_and_swap_test(value, value + c_vip);
|
||||
}
|
||||
|
||||
void lock_vip()
|
||||
{
|
||||
const u32 value = m_value.load();
|
||||
|
||||
if ((value >= c_one - 1 && !(value & (c_one - c_vip))) || (value % c_vip) || !m_value.compare_and_swap_test(value, value + c_vip)) [[unlikely]]
|
||||
{
|
||||
imp_lock_vip(value);
|
||||
}
|
||||
}
|
||||
|
||||
void unlock_vip()
|
||||
{
|
||||
// Unconditional decrement (can result in broken state)
|
||||
const u32 value = m_value.fetch_sub(c_vip);
|
||||
|
||||
if (value >= c_one) [[unlikely]]
|
||||
{
|
||||
imp_unlock_vip(value);
|
||||
}
|
||||
}
|
||||
|
||||
bool try_lock()
|
||||
{
|
||||
return m_value.compare_and_swap_test(0, c_one);
|
||||
@ -214,12 +151,6 @@ public:
|
||||
m_value -= c_one - 1;
|
||||
}
|
||||
|
||||
void lock_downgrade_to_vip()
|
||||
{
|
||||
// Convert to vip lock (can result in broken state)
|
||||
m_value -= c_one - c_vip;
|
||||
}
|
||||
|
||||
// Optimized wait for lockability without locking, relaxed
|
||||
void lock_unlock()
|
||||
{
|
||||
@ -240,12 +171,9 @@ public:
|
||||
{
|
||||
return m_value.load() < c_one - 1;
|
||||
}
|
||||
|
||||
// Special purpose logic
|
||||
bool downgrade_unique_vip_lock_to_low_or_unlock();
|
||||
};
|
||||
|
||||
// Simplified shared (reader) lock implementation. Mutually incompatible with low_lock and vip_lock.
|
||||
// Simplified shared (reader) lock implementation.
|
||||
class reader_lock final
|
||||
{
|
||||
shared_mutex& m_mutex;
|
||||
@ -283,47 +211,3 @@ public:
|
||||
m_upgraded ? m_mutex.unlock() : m_mutex.unlock_shared();
|
||||
}
|
||||
};
|
||||
|
||||
// Special shared (reader) lock, mutually exclusive with vip locks. Mutually incompatible with normal shared (reader) lock.
|
||||
class low_lock final
|
||||
{
|
||||
shared_mutex& m_mutex;
|
||||
|
||||
public:
|
||||
low_lock(const low_lock&) = delete;
|
||||
|
||||
low_lock& operator=(const low_lock&) = delete;
|
||||
|
||||
explicit low_lock(shared_mutex& mutex)
|
||||
: m_mutex(mutex)
|
||||
{
|
||||
m_mutex.lock_low();
|
||||
}
|
||||
|
||||
~low_lock()
|
||||
{
|
||||
m_mutex.unlock_low();
|
||||
}
|
||||
};
|
||||
|
||||
// Special shared (reader) lock, mutually exclusive with low locks. Mutually incompatible with normal shared (reader) lock.
|
||||
class vip_lock final
|
||||
{
|
||||
shared_mutex& m_mutex;
|
||||
|
||||
public:
|
||||
vip_lock(const vip_lock&) = delete;
|
||||
|
||||
vip_lock& operator=(const vip_lock&) = delete;
|
||||
|
||||
explicit vip_lock(shared_mutex& mutex)
|
||||
: m_mutex(mutex)
|
||||
{
|
||||
m_mutex.lock_vip();
|
||||
}
|
||||
|
||||
~vip_lock()
|
||||
{
|
||||
m_mutex.unlock_vip();
|
||||
}
|
||||
};
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
DECLARE(cpu_thread::g_threads_created){0};
|
||||
DECLARE(cpu_thread::g_threads_deleted){0};
|
||||
DECLARE(cpu_thread::g_suspend_counter){0};
|
||||
|
||||
LOG_CHANNEL(profiler);
|
||||
LOG_CHANNEL(sys_log, "SYS");
|
||||
@ -245,6 +246,9 @@ struct cpu_counter
|
||||
// For synchronizing suspend_all operation
|
||||
alignas(64) shared_mutex cpu_suspend_lock;
|
||||
|
||||
// Workload linked list
|
||||
alignas(64) atomic_t<cpu_thread::suspend_work*> cpu_suspend_work{};
|
||||
|
||||
// Semaphore for global thread array (global counter)
|
||||
alignas(64) atomic_t<u32> cpu_array_sema{0};
|
||||
|
||||
@ -306,7 +310,7 @@ struct cpu_counter
|
||||
};
|
||||
|
||||
template <typename F>
|
||||
void for_all_cpu(F&& func) noexcept
|
||||
void for_all_cpu(F func) noexcept
|
||||
{
|
||||
auto ctr = g_fxo->get<cpu_counter>();
|
||||
|
||||
@ -475,6 +479,7 @@ bool cpu_thread::check_state() noexcept
|
||||
|
||||
bool cpu_sleep_called = false;
|
||||
bool escape, retval;
|
||||
u64 susp_ctr = -1;
|
||||
|
||||
while (true)
|
||||
{
|
||||
@ -483,6 +488,16 @@ bool cpu_thread::check_state() noexcept
|
||||
{
|
||||
bool store = false;
|
||||
|
||||
// Easy way obtain suspend counter
|
||||
if (flags & cpu_flag::pause && !(flags & cpu_flag::wait))
|
||||
{
|
||||
susp_ctr = g_suspend_counter;
|
||||
}
|
||||
else
|
||||
{
|
||||
susp_ctr = -1;
|
||||
}
|
||||
|
||||
if (flags & cpu_flag::signal)
|
||||
{
|
||||
flags -= cpu_flag::signal;
|
||||
@ -559,8 +574,22 @@ bool cpu_thread::check_state() noexcept
|
||||
continue;
|
||||
}
|
||||
|
||||
// If only cpu_flag::pause was set, notification won't arrive
|
||||
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
|
||||
// If only cpu_flag::pause was set, wait on suspend counter instead
|
||||
if (state0 & cpu_flag::pause)
|
||||
{
|
||||
// Hard way
|
||||
if (susp_ctr == umax)
|
||||
{
|
||||
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Wait for current suspend_all operation
|
||||
while (busy_wait(), g_suspend_counter == susp_ctr)
|
||||
{
|
||||
g_suspend_counter.wait(susp_ctr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -641,69 +670,114 @@ std::string cpu_thread::dump_misc() const
|
||||
return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load());
|
||||
}
|
||||
|
||||
cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept
|
||||
: m_this(_this)
|
||||
void cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
|
||||
{
|
||||
if (m_this)
|
||||
{
|
||||
m_this->state += cpu_flag::wait;
|
||||
}
|
||||
// Can't allow pre-set wait bit (it'd be a problem)
|
||||
verify(HERE), !_this || !(_this->state & cpu_flag::wait);
|
||||
|
||||
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_vip();
|
||||
// Value must be reliable because cpu_flag::wait hasn't been observed only (but not if pause is set)
|
||||
const u64 susp_ctr = g_suspend_counter;
|
||||
|
||||
for_all_cpu([](cpu_thread* cpu)
|
||||
// Try to push workload
|
||||
auto& queue = g_fxo->get<cpu_counter>()->cpu_suspend_work;
|
||||
|
||||
do
|
||||
{
|
||||
// Should be atomic
|
||||
if (!(cpu->state & cpu_flag::pause))
|
||||
// Load current head
|
||||
next = queue.load();
|
||||
|
||||
if (!_this && next)
|
||||
{
|
||||
cpu->state += cpu_flag::pause;
|
||||
// If _this == nullptr, it only works if this is the first workload pushed
|
||||
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
|
||||
continue;
|
||||
}
|
||||
});
|
||||
}
|
||||
while (!queue.compare_and_swap_test(next, this));
|
||||
|
||||
busy_wait(500);
|
||||
|
||||
while (true)
|
||||
if (!next)
|
||||
{
|
||||
bool ok = true;
|
||||
// First thread to push the work to the workload list pauses all threads and processes it
|
||||
std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
|
||||
|
||||
for_all_cpu([&](cpu_thread* cpu)
|
||||
{
|
||||
if (!(cpu->state & cpu_flag::wait))
|
||||
if (!(cpu->state & cpu_flag::pause) && cpu != _this)
|
||||
{
|
||||
ok = false;
|
||||
cpu->state += cpu_flag::pause;
|
||||
}
|
||||
});
|
||||
|
||||
if (ok) [[likely]]
|
||||
busy_wait(500);
|
||||
|
||||
while (true)
|
||||
{
|
||||
break;
|
||||
bool ok = true;
|
||||
|
||||
for_all_cpu([&](cpu_thread* cpu)
|
||||
{
|
||||
if (!(cpu->state & cpu_flag::wait) && cpu != _this)
|
||||
{
|
||||
ok = false;
|
||||
}
|
||||
});
|
||||
|
||||
if (ok) [[likely]]
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
busy_wait(500);
|
||||
}
|
||||
}
|
||||
// Extract queue and reverse element order (FILO to FIFO) (TODO: maybe leave order as is?)
|
||||
auto* head = queue.exchange(nullptr);
|
||||
|
||||
if (auto* prev = head->next)
|
||||
{
|
||||
head->next = nullptr;
|
||||
|
||||
do
|
||||
{
|
||||
auto* pre2 = prev->next;
|
||||
prev->next = head;
|
||||
|
||||
head = std::exchange(prev, pre2);
|
||||
}
|
||||
while (prev);
|
||||
}
|
||||
|
||||
// Execute all stored workload
|
||||
for (; head; head = head->next)
|
||||
{
|
||||
head->exec(head->func_ptr, head->res_buf);
|
||||
}
|
||||
|
||||
// Finalization
|
||||
g_suspend_counter++;
|
||||
|
||||
cpu_thread::suspend_all::~suspend_all()
|
||||
{
|
||||
// Make sure the latest thread does the cleanup and notifies others
|
||||
if (g_fxo->get<cpu_counter>()->cpu_suspend_lock.downgrade_unique_vip_lock_to_low_or_unlock())
|
||||
{
|
||||
for_all_cpu([&](cpu_thread* cpu)
|
||||
{
|
||||
cpu->state -= cpu_flag::pause;
|
||||
if (cpu != _this)
|
||||
{
|
||||
cpu->state -= cpu_flag::pause;
|
||||
}
|
||||
});
|
||||
|
||||
g_fxo->get<cpu_counter>()->cpu_suspend_lock.unlock_low();
|
||||
}
|
||||
else
|
||||
{
|
||||
g_fxo->get<cpu_counter>()->cpu_suspend_lock.lock_unlock();
|
||||
// Seems safe to set pause on self because wait flag hasn't been observed yet
|
||||
_this->state += cpu_flag::pause + cpu_flag::wait;
|
||||
|
||||
// Subscribe for notification broadcast
|
||||
while (busy_wait(), g_suspend_counter == susp_ctr)
|
||||
{
|
||||
g_suspend_counter.wait(susp_ctr);
|
||||
}
|
||||
|
||||
_this->check_state();
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_this)
|
||||
{
|
||||
m_this->check_state();
|
||||
}
|
||||
g_suspend_counter.notify_all();
|
||||
}
|
||||
|
||||
void cpu_thread::stop_all() noexcept
|
||||
@ -716,7 +790,7 @@ void cpu_thread::stop_all() noexcept
|
||||
}
|
||||
else
|
||||
{
|
||||
::vip_lock lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
|
||||
std::lock_guard lock(g_fxo->get<cpu_counter>()->cpu_suspend_lock);
|
||||
|
||||
for_all_cpu([](cpu_thread* cpu)
|
||||
{
|
||||
|
@ -88,7 +88,7 @@ private:
|
||||
|
||||
public:
|
||||
// Thread stats for external observation
|
||||
static atomic_t<u64> g_threads_created, g_threads_deleted;
|
||||
static atomic_t<u64> g_threads_created, g_threads_deleted, g_suspend_counter;
|
||||
|
||||
// Get thread name (as assigned to named_thread)
|
||||
std::string get_name() const;
|
||||
@ -123,18 +123,50 @@ public:
|
||||
// Callback for cpu_flag::ret
|
||||
virtual void cpu_return() {}
|
||||
|
||||
// Thread locker
|
||||
class suspend_all
|
||||
// For internal use
|
||||
struct suspend_work
|
||||
{
|
||||
cpu_thread* m_this;
|
||||
void* func_ptr;
|
||||
void* res_buf;
|
||||
|
||||
public:
|
||||
suspend_all(cpu_thread* _this) noexcept;
|
||||
suspend_all(const suspend_all&) = delete;
|
||||
suspend_all& operator=(const suspend_all&) = delete;
|
||||
~suspend_all();
|
||||
// Type-erased op executor
|
||||
void (*exec)(void* func, void* res);
|
||||
|
||||
// Next object in the linked list
|
||||
suspend_work* next;
|
||||
|
||||
// Internal method
|
||||
void push(cpu_thread* _this) noexcept;
|
||||
};
|
||||
|
||||
// Suspend all threads and execute op (may be executed by other thread than caller!)
|
||||
template <typename F>
|
||||
static auto suspend_all(cpu_thread* _this, F op)
|
||||
{
|
||||
if constexpr (std::is_void_v<std::invoke_result_t<F>>)
|
||||
{
|
||||
suspend_work work{&op, nullptr, [](void* func, void*)
|
||||
{
|
||||
(*static_cast<F*>(func))();
|
||||
}};
|
||||
|
||||
work.push(_this);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::invoke_result_t<F> result;
|
||||
|
||||
suspend_work work{&op, &result, [](void* func, void* res_buf)
|
||||
{
|
||||
*static_cast<std::invoke_result_t<F>*>(res_buf) = (*static_cast<F*>(func))();
|
||||
}};
|
||||
|
||||
work.push(_this);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Stop all threads with cpu_flag::dbg_global_stop
|
||||
static void stop_all() noexcept;
|
||||
|
||||
|
@ -1275,7 +1275,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
|
||||
}
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall, x86::r12, 4);
|
||||
Label tx0 = build_transaction_enter(c, fall, x86::r12, 4);
|
||||
c.xbegin(tx0);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.test(x86::eax, vm::rsrv_unique_lock);
|
||||
c.jnz(skip);
|
||||
@ -1336,7 +1337,6 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.js(fail);
|
||||
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::wait));
|
||||
|
||||
// Touch memory if transaction failed without RETRY flag on the first attempt
|
||||
c.cmp(x86::r12, 1);
|
||||
@ -1361,7 +1361,14 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
|
||||
c.cmp(x86::rax, x86::r13);
|
||||
c.jne(fail2);
|
||||
|
||||
build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
||||
c.jc(fail3);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, x86::r13);
|
||||
c.jne(fail2);
|
||||
c.xbegin(tx1);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -1535,30 +1542,18 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
default: break;
|
||||
}
|
||||
|
||||
cpu_thread::suspend_all cpu_lock(&ppu);
|
||||
|
||||
// Obtain unique lock
|
||||
while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
|
||||
return cpu_thread::suspend_all(&ppu, [&]
|
||||
{
|
||||
busy_wait(100);
|
||||
|
||||
// Give up if reservation has been updated
|
||||
if ((res & -128) != rtime)
|
||||
if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
|
||||
{
|
||||
res -= 1;
|
||||
return false;
|
||||
data.release(reg_value);
|
||||
res += 127;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
|
||||
{
|
||||
data.release(reg_value);
|
||||
res += 63;
|
||||
return true;
|
||||
}
|
||||
|
||||
res -= (vm::rsrv_unique_lock + 1);
|
||||
return false;
|
||||
res -= 1;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
|
||||
|
@ -376,7 +376,8 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
|
||||
}
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall, x86::r12, 4);
|
||||
Label tx0 = build_transaction_enter(c, fall, x86::r12, 4);
|
||||
c.xbegin(tx0);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.test(x86::eax, vm::rsrv_unique_lock);
|
||||
c.jnz(skip);
|
||||
@ -450,7 +451,6 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.js(fail);
|
||||
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::wait));
|
||||
|
||||
// Touch memory if transaction failed without RETRY flag on the first attempt
|
||||
c.cmp(x86::r12, 1);
|
||||
@ -471,11 +471,14 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
|
||||
c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax);
|
||||
c.test(x86::eax, vm::rsrv_unique_lock);
|
||||
c.jnz(fail3);
|
||||
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
||||
c.jc(fail3);
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, x86::r13);
|
||||
c.jne(fail2);
|
||||
|
||||
build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
c.xbegin(tx1);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -648,7 +651,8 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
||||
}
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall, x86::r12, 8);
|
||||
Label tx0 = build_transaction_enter(c, fall, x86::r12, 8);
|
||||
c.xbegin(tx0);
|
||||
c.test(x86::dword_ptr(x86::rbx), vm::rsrv_unique_lock);
|
||||
c.jnz(skip);
|
||||
|
||||
@ -683,7 +687,6 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
||||
//c.jmp(fall);
|
||||
|
||||
c.bind(fall);
|
||||
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::wait));
|
||||
|
||||
// Touch memory if transaction failed without RETRY flag on the first attempt
|
||||
c.cmp(x86::r12, 1);
|
||||
@ -703,7 +706,12 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
||||
c.test(x86::eax, vm::rsrv_unique_lock);
|
||||
c.jnz(fall2);
|
||||
|
||||
build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
Label tx1 = build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
|
||||
// Check pause flag
|
||||
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
|
||||
c.jc(fall2);
|
||||
c.xbegin(tx1);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -1848,38 +1856,26 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
||||
|
||||
if (render) render->pause();
|
||||
|
||||
cpu_thread::suspend_all cpu_lock(this);
|
||||
|
||||
// Obtain unique lock
|
||||
while (res.bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
|
||||
const bool ok = cpu_thread::suspend_all(this, [&]()
|
||||
{
|
||||
busy_wait(100);
|
||||
|
||||
// Give up if reservation has been updated
|
||||
if ((res & -128) != rtime)
|
||||
if ((res & -128) == rtime)
|
||||
{
|
||||
res -= 1;
|
||||
if (render) render->unpause();
|
||||
return false;
|
||||
auto& data = vm::_ref<spu_rdata_t>(addr);
|
||||
|
||||
if (cmp_rdata(rdata, data))
|
||||
{
|
||||
mov_rdata(data, to_write);
|
||||
res += 127;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((res & -128) == rtime)
|
||||
{
|
||||
auto& data = vm::_ref<spu_rdata_t>(addr);
|
||||
res -= 1;
|
||||
return false;
|
||||
});
|
||||
|
||||
if (cmp_rdata(rdata, data))
|
||||
{
|
||||
mov_rdata(data, to_write);
|
||||
res += 63;
|
||||
if (render) render->unpause();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
res -= (vm::rsrv_unique_lock | 1);
|
||||
if (render) render->unpause();
|
||||
return false;
|
||||
return ok;
|
||||
}
|
||||
case 1: return true;
|
||||
case 0: return false;
|
||||
@ -1973,15 +1969,11 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
cpu_thread::suspend_all cpu_lock(cpu);
|
||||
|
||||
while (vm::reservation_acquire(addr, 128).bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
|
||||
cpu_thread::suspend_all(cpu, [&]
|
||||
{
|
||||
busy_wait(100);
|
||||
}
|
||||
|
||||
mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
|
||||
vm::reservation_acquire(addr, 128) += 63;
|
||||
mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
|
||||
vm::reservation_acquire(addr, 128) += 127;
|
||||
});
|
||||
}
|
||||
|
||||
if (render) render->unpause();
|
||||
|
@ -497,33 +497,25 @@ namespace vm
|
||||
|
||||
void reservation_op_internal(u32 addr, std::function<bool()> func)
|
||||
{
|
||||
const auto _cpu = get_current_cpu_thread();
|
||||
|
||||
// Acknowledge contender if necessary (TODO: check)
|
||||
_cpu->state += cpu_flag::wait;
|
||||
|
||||
const bool ok = cpu_thread::suspend_all(get_current_cpu_thread(), [&]
|
||||
{
|
||||
cpu_thread::suspend_all cpu_lock(_cpu);
|
||||
|
||||
// Wait to acquire unique lock
|
||||
while (vm::reservation_acquire(addr, 128).bts(std::countr_zero<u32>(vm::rsrv_unique_lock)))
|
||||
{
|
||||
busy_wait(100);
|
||||
}
|
||||
|
||||
if (func())
|
||||
{
|
||||
// Success, release all locks if necessary
|
||||
vm::reservation_acquire(addr, 128) += 63;
|
||||
vm::reservation_acquire(addr, 128) += 127;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fake update (TODO)
|
||||
vm::reservation_acquire(addr, 128) += 63;
|
||||
vm::reservation_acquire(addr, 128) -= 1;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
vm::reservation_notifier(addr, 128).notify_all();
|
||||
if (ok)
|
||||
{
|
||||
vm::reservation_notifier(addr, 128).notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
void reservation_escape_internal()
|
||||
|
@ -319,35 +319,36 @@ std::vector<u32> cheat_engine::search(const T value, const std::vector<u32>& to_
|
||||
if (Emu.IsStopped())
|
||||
return {};
|
||||
|
||||
cpu_thread::suspend_all cpu_lock(nullptr);
|
||||
|
||||
if (!to_filter.empty())
|
||||
cpu_thread::suspend_all(nullptr, [&]
|
||||
{
|
||||
for (const auto& off : to_filter)
|
||||
if (!to_filter.empty())
|
||||
{
|
||||
if (vm::check_addr(off, sizeof(T)))
|
||||
for (const auto& off : to_filter)
|
||||
{
|
||||
if (*vm::get_super_ptr<T>(off) == value_swapped)
|
||||
results.push_back(off);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Looks through mapped memory
|
||||
for (u32 page_start = 0x10000; page_start < 0xF0000000; page_start += 4096)
|
||||
{
|
||||
if (vm::check_addr(page_start))
|
||||
{
|
||||
// Assumes the values are aligned
|
||||
for (u32 index = 0; index < 4096; index += sizeof(T))
|
||||
if (vm::check_addr(off, sizeof(T)))
|
||||
{
|
||||
if (*vm::get_super_ptr<T>(page_start + index) == value_swapped)
|
||||
results.push_back(page_start + index);
|
||||
if (*vm::get_super_ptr<T>(off) == value_swapped)
|
||||
results.push_back(off);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Looks through mapped memory
|
||||
for (u32 page_start = 0x10000; page_start < 0xF0000000; page_start += 4096)
|
||||
{
|
||||
if (vm::check_addr(page_start))
|
||||
{
|
||||
// Assumes the values are aligned
|
||||
for (u32 index = 0; index < 4096; index += sizeof(T))
|
||||
{
|
||||
if (*vm::get_super_ptr<T>(page_start + index) == value_swapped)
|
||||
results.push_back(page_start + index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
@ -361,19 +362,17 @@ T cheat_engine::get_value(const u32 offset, bool& success)
|
||||
return 0;
|
||||
}
|
||||
|
||||
cpu_thread::suspend_all cpu_lock(nullptr);
|
||||
|
||||
if (!vm::check_addr(offset, sizeof(T)))
|
||||
return cpu_thread::suspend_all(nullptr, [&]() -> T
|
||||
{
|
||||
success = false;
|
||||
return 0;
|
||||
}
|
||||
if (!vm::check_addr(offset, sizeof(T)))
|
||||
{
|
||||
success = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
||||
T ret_value = *vm::get_super_ptr<T>(offset);
|
||||
|
||||
return ret_value;
|
||||
success = true;
|
||||
return *vm::get_super_ptr<T>(offset);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -382,55 +381,61 @@ bool cheat_engine::set_value(const u32 offset, const T value)
|
||||
if (Emu.IsStopped())
|
||||
return false;
|
||||
|
||||
cpu_thread::suspend_all cpu_lock(nullptr);
|
||||
|
||||
if (!vm::check_addr(offset, sizeof(T)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
*vm::get_super_ptr<T>(offset) = value;
|
||||
|
||||
const bool exec_code_at_start = vm::check_addr(offset, 1, vm::page_executable);
|
||||
const bool exec_code_at_end = [&]()
|
||||
return cpu_thread::suspend_all(nullptr, [&]
|
||||
{
|
||||
if constexpr (sizeof(T) == 1)
|
||||
if (!vm::check_addr(offset, sizeof(T)))
|
||||
{
|
||||
return exec_code_at_start;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vm::check_addr(offset + sizeof(T) - 1, 1, vm::page_executable);
|
||||
}
|
||||
}();
|
||||
|
||||
if (exec_code_at_end || exec_code_at_start)
|
||||
{
|
||||
extern void ppu_register_function_at(u32, u32, ppu_function_t);
|
||||
|
||||
u32 addr = offset, size = sizeof(T);
|
||||
|
||||
if (exec_code_at_end && exec_code_at_start)
|
||||
{
|
||||
size = align<u32>(addr + size, 4) - (addr & -4);
|
||||
addr &= -4;
|
||||
}
|
||||
else if (exec_code_at_end)
|
||||
{
|
||||
size -= align<u32>(size - 4096 + (addr & 4095), 4);
|
||||
addr = align<u32>(addr, 4096);
|
||||
}
|
||||
else if (exec_code_at_start)
|
||||
{
|
||||
size = align<u32>(4096 - (addr & 4095), 4);
|
||||
addr &= -4;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reinitialize executable code
|
||||
ppu_register_function_at(addr, size, nullptr);
|
||||
}
|
||||
*vm::get_super_ptr<T>(offset) = value;
|
||||
|
||||
return true;
|
||||
const bool exec_code_at_start = vm::check_addr(offset, 1, vm::page_executable);
|
||||
const bool exec_code_at_end = [&]()
|
||||
{
|
||||
if constexpr (sizeof(T) == 1)
|
||||
{
|
||||
return exec_code_at_start;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vm::check_addr(offset + sizeof(T) - 1, 1, vm::page_executable);
|
||||
}
|
||||
}();
|
||||
|
||||
if (exec_code_at_end || exec_code_at_start)
|
||||
{
|
||||
extern void ppu_register_function_at(u32, u32, ppu_function_t);
|
||||
|
||||
u32 addr = offset, size = sizeof(T);
|
||||
|
||||
if (exec_code_at_end && exec_code_at_start)
|
||||
{
|
||||
size = align<u32>(addr + size, 4) - (addr & -4);
|
||||
addr &= -4;
|
||||
}
|
||||
else if (exec_code_at_end)
|
||||
{
|
||||
size -= align<u32>(size - 4096 + (addr & 4095), 4);
|
||||
addr = align<u32>(addr, 4096);
|
||||
}
|
||||
else if (exec_code_at_start)
|
||||
{
|
||||
size = align<u32>(4096 - (addr & 4095), 4);
|
||||
addr &= -4;
|
||||
}
|
||||
|
||||
// Reinitialize executable code
|
||||
ppu_register_function_at(addr, size, nullptr);
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
bool cheat_engine::is_addr_safe(const u32 offset)
|
||||
|
Loading…
Reference in New Issue
Block a user