mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
Implement cpu_thread::suspend_all
Remove Accurate PUTLLC option. Implement fallback path for SPU transactions.
This commit is contained in:
parent
17d0dcb7a2
commit
5d45a3e47d
@ -190,18 +190,34 @@ asmjit::JitRuntime& asmjit::get_global_runtime()
|
||||
return g_rt;
|
||||
}
|
||||
|
||||
asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback)
|
||||
void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
|
||||
{
|
||||
Label fall = c.newLabel();
|
||||
Label begin = c.newLabel();
|
||||
c.jmp(begin);
|
||||
c.bind(fall);
|
||||
c.test(x86::eax, _XABORT_RETRY);
|
||||
c.jz(fallback);
|
||||
|
||||
if (less_than < 65)
|
||||
{
|
||||
c.add(ctr, 1);
|
||||
c.test(x86::eax, _XABORT_RETRY);
|
||||
c.jz(fallback);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Count an attempt without RETRY flag as 65 normal attempts and continue
|
||||
c.not_(x86::eax);
|
||||
c.and_(x86::eax, _XABORT_RETRY);
|
||||
c.shl(x86::eax, 5);
|
||||
c.add(x86::eax, 1); // eax = RETRY ? 1 : 65
|
||||
c.add(ctr, x86::rax);
|
||||
}
|
||||
|
||||
c.cmp(ctr, less_than);
|
||||
c.jae(fallback);
|
||||
c.align(kAlignCode, 16);
|
||||
c.bind(begin);
|
||||
c.xbegin(fall);
|
||||
return begin;
|
||||
}
|
||||
|
||||
void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code)
|
||||
|
@ -43,7 +43,7 @@ namespace asmjit
|
||||
asmjit::JitRuntime& get_global_runtime();
|
||||
|
||||
// Emit xbegin and adjacent loop, return label at xbegin
|
||||
Label build_transaction_enter(X86Assembler& c, Label fallback);
|
||||
void build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than);
|
||||
|
||||
// Emit xabort
|
||||
void build_transaction_abort(X86Assembler& c, unsigned char code);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "Emu/System.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "Emu/Cell/SPUThread.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
#include "Emu/Cell/RawSPUThread.h"
|
||||
#include "Emu/Cell/lv2/sys_mmapper.h"
|
||||
#include "Emu/Cell/lv2/sys_event.h"
|
||||
@ -1101,6 +1102,11 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
|
||||
|
||||
try
|
||||
{
|
||||
if (cpu)
|
||||
{
|
||||
vm::temporary_unlock(*cpu);
|
||||
}
|
||||
|
||||
handled = rsx::g_access_violation_handler(addr, is_writing);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
@ -1109,7 +1115,6 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
|
||||
|
||||
if (cpu)
|
||||
{
|
||||
vm::temporary_unlock(*cpu);
|
||||
cpu->state += cpu_flag::dbg_pause;
|
||||
|
||||
if (cpu->test_stopped())
|
||||
@ -1131,6 +1136,10 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cpu && cpu->test_stopped())
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
auto code = (const u8*)RIP(context);
|
||||
|
@ -273,6 +273,177 @@ void shared_cond::imp_notify() noexcept
|
||||
balanced_awaken<true>(m_cvx32, utils::popcnt32(wait_mask));
|
||||
}
|
||||
|
||||
void shared_cond::wait_all() noexcept
|
||||
{
|
||||
// Try to acquire waiting state without locking but only if there are other locks
|
||||
const auto [old_, result] = m_cvx32.fetch_op([](u64& cvx32) -> u64
|
||||
{
|
||||
// Check waiting alone
|
||||
if ((cvx32 & 0xffffffff) == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Combine used bits and invert to find least significant bit unused
|
||||
const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true);
|
||||
|
||||
// Set waiting bit (does nothing if all slots are used)
|
||||
cvx32 |= (1ull << slot) & 0xffffffff;
|
||||
return 1ull << slot;
|
||||
});
|
||||
|
||||
if (!result)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (result > 0xffffffffu)
|
||||
{
|
||||
// All slots are used, fallback to spin wait
|
||||
while (m_cvx32 & 0xffffffff)
|
||||
{
|
||||
busy_wait();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 wait_bit = result;
|
||||
const u64 lock_bit = wait_bit | (wait_bit << 32);
|
||||
|
||||
balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int
|
||||
{
|
||||
if ((cvx32 & wait_bit) == 0)
|
||||
{
|
||||
// Remove signal and unlock at once
|
||||
cvx32 &= ~lock_bit;
|
||||
return +1;
|
||||
}
|
||||
|
||||
if constexpr (sizeof...(ret))
|
||||
{
|
||||
cvx32 &= ~lock_bit;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
});
|
||||
}
|
||||
|
||||
bool shared_cond::wait_all(shared_cond::shared_lock& lock) noexcept
|
||||
{
|
||||
AUDIT(lock.m_this == this);
|
||||
|
||||
if (lock.m_slot >= 32)
|
||||
{
|
||||
// Invalid argument, assume notified
|
||||
return true;
|
||||
}
|
||||
|
||||
const u64 wait_bit = c_wait << lock.m_slot;
|
||||
const u64 lock_bit = c_lock << lock.m_slot;
|
||||
|
||||
// Try to acquire waiting state only if there are other locks
|
||||
const auto [old_, not_alone] = m_cvx32.fetch_op([&](u64& cvx32)
|
||||
{
|
||||
// Check locking alone
|
||||
if (((cvx32 >> 32) & cvx32) == (lock_bit >> 32))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// c_lock -> c_wait, c_sig -> unlock
|
||||
cvx32 &= ~(lock_bit & ~wait_bit);
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!not_alone)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Set invalid slot to acknowledge unlocking
|
||||
lock.m_slot = 33;
|
||||
}
|
||||
|
||||
if ((old_ & wait_bit) == 0)
|
||||
{
|
||||
// Already signaled, return without waiting
|
||||
return true;
|
||||
}
|
||||
|
||||
balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int
|
||||
{
|
||||
if ((cvx32 & wait_bit) == 0)
|
||||
{
|
||||
// Remove signal and unlock at once
|
||||
cvx32 &= ~lock_bit;
|
||||
return +1;
|
||||
}
|
||||
|
||||
if constexpr (sizeof...(ret))
|
||||
{
|
||||
cvx32 &= ~lock_bit;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool shared_cond::notify_all(shared_cond::shared_lock& lock) noexcept
|
||||
{
|
||||
AUDIT(lock.m_this == this);
|
||||
|
||||
if (lock.m_slot >= 32)
|
||||
{
|
||||
// Invalid argument
|
||||
return false;
|
||||
}
|
||||
|
||||
const u64 slot_mask = c_sig << lock.m_slot;
|
||||
|
||||
auto [old, ok] = m_cvx32.fetch_op([&](u64& cvx32)
|
||||
{
|
||||
if (((cvx32 << 32) & cvx32) != slot_mask)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (const u64 sig_mask = cvx32 & 0xffffffff)
|
||||
{
|
||||
cvx32 &= (0xffffffffull << 32) & ~slot_mask;
|
||||
cvx32 |= (sig_mask << 32) & ~slot_mask;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
// Not an exclusive reader
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set invalid slot to acknowledge unlocking
|
||||
lock.m_slot = 34;
|
||||
|
||||
// Determine if some waiters need a syscall notification
|
||||
const u64 wait_mask = old & (~old >> 32);
|
||||
|
||||
if (UNLIKELY(!wait_mask))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
balanced_awaken<true>(m_cvx32, utils::popcnt32(wait_mask));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool lf_queue_base::wait(u64 _timeout)
|
||||
{
|
||||
auto _old = m_head.compare_and_swap(0, 1);
|
||||
|
@ -206,7 +206,7 @@ class shared_cond
|
||||
m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32)
|
||||
{
|
||||
// Combine used bits and invert to find least significant bit unused
|
||||
const u32 slot = utils::cnttz32(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true);
|
||||
const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true);
|
||||
|
||||
// Set lock bits (does nothing if all slots are used)
|
||||
const u64 bit = (1ull << slot) & 0xffffffff;
|
||||
@ -217,6 +217,13 @@ class shared_cond
|
||||
|
||||
shared_lock(const shared_lock&) = delete;
|
||||
|
||||
shared_lock(shared_lock&& rhs)
|
||||
: m_this(rhs.m_this)
|
||||
, m_slot(rhs.m_slot)
|
||||
{
|
||||
rhs.m_slot = 32;
|
||||
}
|
||||
|
||||
shared_lock& operator=(const shared_lock&) = delete;
|
||||
|
||||
~shared_lock()
|
||||
@ -261,6 +268,10 @@ public:
|
||||
return imp_wait(lock.m_slot, usec_timeout);
|
||||
}
|
||||
|
||||
void wait_all() noexcept;
|
||||
|
||||
bool wait_all(shared_lock& lock) noexcept;
|
||||
|
||||
void notify_all() noexcept
|
||||
{
|
||||
if (LIKELY(!m_cvx32))
|
||||
@ -268,4 +279,6 @@ public:
|
||||
|
||||
imp_notify();
|
||||
}
|
||||
|
||||
bool notify_all(shared_lock& lock) noexcept;
|
||||
};
|
||||
|
2
llvm
2
llvm
@ -1 +1 @@
|
||||
Subproject commit b860b5e8f4ee90d6eb567d83ce8ed1a3e71e496f
|
||||
Subproject commit 99b5284463025849c59067e79a3c08899049757e
|
@ -19,10 +19,13 @@ void fmt_class_string<cpu_flag>::format(std::string& out, u64 arg)
|
||||
{
|
||||
case cpu_flag::stop: return "STOP";
|
||||
case cpu_flag::exit: return "EXIT";
|
||||
case cpu_flag::wait: return "w";
|
||||
case cpu_flag::pause: return "p";
|
||||
case cpu_flag::suspend: return "s";
|
||||
case cpu_flag::ret: return "ret";
|
||||
case cpu_flag::signal: return "sig";
|
||||
case cpu_flag::memory: return "mem";
|
||||
case cpu_flag::jit_return: return "JIT";
|
||||
case cpu_flag::dbg_global_pause: return "G-PAUSE";
|
||||
case cpu_flag::dbg_global_stop: return "G-EXIT";
|
||||
case cpu_flag::dbg_pause: return "PAUSE";
|
||||
@ -42,10 +45,43 @@ void fmt_class_string<bs_t<cpu_flag>>::format(std::string& out, u64 arg)
|
||||
|
||||
thread_local cpu_thread* g_tls_current_cpu_thread = nullptr;
|
||||
|
||||
// For coordination and notification
|
||||
alignas(64) shared_cond g_cpu_array_lock;
|
||||
|
||||
// For cpu_flag::pause bit setting/removing
|
||||
alignas(64) shared_mutex g_cpu_pause_lock;
|
||||
|
||||
// For cpu_flag::pause
|
||||
alignas(64) atomic_t<u64> g_cpu_pause_ctr{0};
|
||||
|
||||
// Semaphore for global thread array (global counter)
|
||||
alignas(64) atomic_t<u32> g_cpu_array_sema{0};
|
||||
|
||||
// Semaphore subdivision for each array slot (64 x N in total)
|
||||
atomic_t<u64> g_cpu_array_bits[6]{};
|
||||
|
||||
// All registered threads
|
||||
atomic_t<cpu_thread*> g_cpu_array[sizeof(g_cpu_array_bits) * 8]{};
|
||||
|
||||
template <typename F>
|
||||
void for_all_cpu(F&& func) noexcept
|
||||
{
|
||||
for (u32 i = 0; i < ::size32(g_cpu_array_bits); i++)
|
||||
{
|
||||
for (u64 bits = g_cpu_array_bits[i]; bits; bits &= bits - 1)
|
||||
{
|
||||
const u64 index = i * 64 + utils::cnttz64(bits, true);
|
||||
|
||||
if (cpu_thread* cpu = g_cpu_array[index].load())
|
||||
{
|
||||
func(cpu);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cpu_thread::operator()()
|
||||
{
|
||||
state -= cpu_flag::exit;
|
||||
|
||||
g_tls_current_cpu_thread = this;
|
||||
|
||||
if (g_cfg.core.thread_scheduler_enabled)
|
||||
@ -58,6 +94,48 @@ void cpu_thread::operator()()
|
||||
thread_ctrl::set_native_priority(-1);
|
||||
}
|
||||
|
||||
// Register thread in g_cpu_array
|
||||
if (!g_cpu_array_sema.try_inc(sizeof(g_cpu_array_bits) * 8))
|
||||
{
|
||||
LOG_FATAL(GENERAL, "Too many threads");
|
||||
Emu.Pause();
|
||||
return;
|
||||
}
|
||||
|
||||
u64 array_slot = -1;
|
||||
|
||||
for (u32 i = 0;; i = (i + 1) % ::size32(g_cpu_array_bits))
|
||||
{
|
||||
if (LIKELY(~g_cpu_array_bits[i]))
|
||||
{
|
||||
const u64 found = g_cpu_array_bits[i].atomic_op([](u64& bits) -> u64
|
||||
{
|
||||
// Find empty array slot and set its bit
|
||||
if (LIKELY(~bits))
|
||||
{
|
||||
const u64 bit = utils::cnttz64(~bits, true);
|
||||
bits |= 1ull << bit;
|
||||
return bit;
|
||||
}
|
||||
|
||||
return 64;
|
||||
});
|
||||
|
||||
if (LIKELY(found < 64))
|
||||
{
|
||||
// Fixup
|
||||
array_slot = i * 64 + found;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Register and wait if necessary
|
||||
verify("g_cpu_array[...] -> this" HERE), g_cpu_array[array_slot].exchange(this) == nullptr;
|
||||
|
||||
state += cpu_flag::wait;
|
||||
g_cpu_array_lock.wait_all();
|
||||
|
||||
// Check thread status
|
||||
while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop)))
|
||||
{
|
||||
@ -86,6 +164,13 @@ void cpu_thread::operator()()
|
||||
|
||||
thread_ctrl::wait();
|
||||
}
|
||||
|
||||
// Unregister and wait if necessary
|
||||
state += cpu_flag::wait;
|
||||
verify("g_cpu_array[...] -> null" HERE), g_cpu_array[array_slot].exchange(nullptr) == this;
|
||||
g_cpu_array_bits[array_slot / 64] &= ~(1ull << (array_slot % 64));
|
||||
g_cpu_array_sema--;
|
||||
g_cpu_array_lock.wait_all();
|
||||
}
|
||||
|
||||
void cpu_thread::on_abort()
|
||||
@ -105,7 +190,7 @@ cpu_thread::cpu_thread(u32 id)
|
||||
g_threads_created++;
|
||||
}
|
||||
|
||||
bool cpu_thread::check_state()
|
||||
bool cpu_thread::check_state() noexcept
|
||||
{
|
||||
#ifdef WITH_GDB_DEBUGGER
|
||||
if (state & cpu_flag::dbg_pause)
|
||||
@ -117,6 +202,11 @@ bool cpu_thread::check_state()
|
||||
bool cpu_sleep_called = false;
|
||||
bool cpu_flag_memory = false;
|
||||
|
||||
if (!(state & cpu_flag::wait))
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (state & cpu_flag::memory)
|
||||
@ -131,8 +221,9 @@ bool cpu_thread::check_state()
|
||||
state -= cpu_flag::memory;
|
||||
}
|
||||
|
||||
if (state & cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop)
|
||||
if (state & (cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop))
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -141,7 +232,24 @@ bool cpu_thread::check_state()
|
||||
cpu_sleep_called = false;
|
||||
}
|
||||
|
||||
if (!is_paused())
|
||||
const auto [state0, escape] = state.fetch_op([&](bs_t<cpu_flag>& flags)
|
||||
{
|
||||
// Check pause flags which hold thread inside check_state
|
||||
if (flags & (cpu_flag::pause + cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Atomically clean wait flag and escape
|
||||
if (!(flags & (cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop + cpu_flag::ret + cpu_flag::stop)))
|
||||
{
|
||||
flags -= cpu_flag::wait;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (escape)
|
||||
{
|
||||
if (cpu_flag_memory)
|
||||
{
|
||||
@ -150,14 +258,43 @@ bool cpu_thread::check_state()
|
||||
|
||||
break;
|
||||
}
|
||||
else if (!cpu_sleep_called && state & cpu_flag::suspend)
|
||||
else if (!cpu_sleep_called && state0 & cpu_flag::suspend)
|
||||
{
|
||||
cpu_sleep();
|
||||
cpu_sleep_called = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
thread_ctrl::wait();
|
||||
if (state & cpu_flag::wait)
|
||||
{
|
||||
// Spin wait once for a bit before resorting to thread_ctrl::wait
|
||||
for (u32 i = 0; i < 10; i++)
|
||||
{
|
||||
if (state0 & (cpu_flag::pause + cpu_flag::suspend))
|
||||
{
|
||||
busy_wait(500);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(state0 & (cpu_flag::pause + cpu_flag::suspend)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (state0 & (cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause))
|
||||
{
|
||||
thread_ctrl::wait();
|
||||
}
|
||||
else
|
||||
{
|
||||
// If only cpu_flag::pause was set, notification won't arrive
|
||||
g_cpu_array_lock.wait_all();
|
||||
}
|
||||
}
|
||||
|
||||
const auto state_ = state.load();
|
||||
@ -196,3 +333,90 @@ std::string cpu_thread::dump() const
|
||||
{
|
||||
return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load());
|
||||
}
|
||||
|
||||
cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept
|
||||
: m_lock(g_cpu_array_lock.try_shared_lock())
|
||||
, m_this(_this)
|
||||
{
|
||||
// TODO
|
||||
if (!m_lock)
|
||||
{
|
||||
LOG_FATAL(GENERAL, "g_cpu_array_lock: too many concurrent accesses");
|
||||
Emu.Pause();
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_this)
|
||||
{
|
||||
m_this->state += cpu_flag::wait;
|
||||
}
|
||||
|
||||
g_cpu_pause_ctr++;
|
||||
|
||||
reader_lock lock(g_cpu_pause_lock);
|
||||
|
||||
for_all_cpu([](cpu_thread* cpu)
|
||||
{
|
||||
cpu->state += cpu_flag::pause;
|
||||
});
|
||||
|
||||
busy_wait(500);
|
||||
|
||||
while (true)
|
||||
{
|
||||
bool ok = true;
|
||||
|
||||
for_all_cpu([&](cpu_thread* cpu)
|
||||
{
|
||||
if (!(cpu->state & cpu_flag::wait))
|
||||
{
|
||||
ok = false;
|
||||
}
|
||||
});
|
||||
|
||||
if (LIKELY(ok))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
busy_wait(500);
|
||||
}
|
||||
}
|
||||
|
||||
cpu_thread::suspend_all::~suspend_all()
|
||||
{
|
||||
// Make sure the latest thread does the cleanup and notifies others
|
||||
u64 pause_ctr = 0;
|
||||
|
||||
while ((pause_ctr = g_cpu_pause_ctr), !g_cpu_array_lock.wait_all(m_lock))
|
||||
{
|
||||
if (pause_ctr)
|
||||
{
|
||||
std::lock_guard lock(g_cpu_pause_lock);
|
||||
|
||||
// Detect possible unfortunate reordering of flag clearing after suspend_all's reader lock
|
||||
if (g_cpu_pause_ctr != pause_ctr)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for_all_cpu([&](cpu_thread* cpu)
|
||||
{
|
||||
if (g_cpu_pause_ctr == pause_ctr)
|
||||
{
|
||||
cpu->state -= cpu_flag::pause;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (g_cpu_array_lock.notify_all(m_lock))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_this)
|
||||
{
|
||||
m_this->check_state();
|
||||
}
|
||||
}
|
||||
|
@ -2,12 +2,15 @@
|
||||
|
||||
#include "../Utilities/Thread.h"
|
||||
#include "../Utilities/bit_set.h"
|
||||
#include "../Utilities/cond.h"
|
||||
|
||||
// Thread state flags
|
||||
enum class cpu_flag : u32
|
||||
{
|
||||
stop, // Thread not running (HLE, initial state)
|
||||
exit, // Irreversible exit
|
||||
wait, // Indicates waiting state, set by the thread itself
|
||||
pause, // Thread suspended by suspend_all technique
|
||||
suspend, // Thread suspended
|
||||
ret, // Callback return requested
|
||||
signal, // Thread received a signal (HLE)
|
||||
@ -39,15 +42,15 @@ public:
|
||||
const u32 id;
|
||||
|
||||
// Public thread state
|
||||
atomic_bs_t<cpu_flag> state{+cpu_flag::stop};
|
||||
atomic_bs_t<cpu_flag> state{cpu_flag::stop + cpu_flag::wait};
|
||||
|
||||
// Process thread state, return true if the checker must return
|
||||
bool check_state();
|
||||
bool check_state() noexcept;
|
||||
|
||||
// Process thread state (pause)
|
||||
[[nodiscard]] bool test_stopped()
|
||||
{
|
||||
if (UNLIKELY(state))
|
||||
if (state)
|
||||
{
|
||||
if (check_state())
|
||||
{
|
||||
@ -99,6 +102,20 @@ public:
|
||||
|
||||
// Callback for vm::temporary_unlock
|
||||
virtual void cpu_unmem() {}
|
||||
|
||||
// Thread locker
|
||||
class suspend_all
|
||||
{
|
||||
decltype(std::declval<shared_cond&>().try_shared_lock()) m_lock;
|
||||
|
||||
cpu_thread* m_this;
|
||||
|
||||
public:
|
||||
suspend_all(cpu_thread* _this) noexcept;
|
||||
suspend_all(const suspend_all&) = delete;
|
||||
suspend_all& operator=(const suspend_all&) = delete;
|
||||
~suspend_all();
|
||||
};
|
||||
};
|
||||
|
||||
inline cpu_thread* get_current_cpu_thread() noexcept
|
||||
|
@ -1064,11 +1064,12 @@ const auto ppu_stwcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rd
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 7);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
||||
c.xor_(args[0].r32(), args[0].r32());
|
||||
c.bswap(args[2].r32());
|
||||
c.bswap(args[3].r32());
|
||||
|
||||
// Begin transaction
|
||||
Label begin = build_transaction_enter(c, fall);
|
||||
build_transaction_enter(c, fall, args[0], 16);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r10));
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, args[1]);
|
||||
@ -1184,11 +1185,12 @@ const auto ppu_stdcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rd
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.shr(args[0], 7);
|
||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
||||
c.xor_(args[0].r32(), args[0].r32());
|
||||
c.bswap(args[2]);
|
||||
c.bswap(args[3]);
|
||||
|
||||
// Begin transaction
|
||||
Label begin = build_transaction_enter(c, fall);
|
||||
build_transaction_enter(c, fall, args[0], 16);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r10));
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, args[1]);
|
||||
|
@ -1349,6 +1349,12 @@ void spu_stop(spu_thread* _spu, u32 code)
|
||||
{
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
}
|
||||
|
||||
void spu_recompiler::STOP(spu_opcode_t op)
|
||||
@ -1407,7 +1413,7 @@ void spu_recompiler::MFSPR(spu_opcode_t op)
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
}
|
||||
|
||||
static s64 spu_rdch(spu_thread* _spu, u32 ch)
|
||||
static u32 spu_rdch(spu_thread* _spu, u32 ch)
|
||||
{
|
||||
const s64 result = _spu->get_ch_value(ch);
|
||||
|
||||
@ -1416,7 +1422,13 @@ static s64 spu_rdch(spu_thread* _spu, u32 ch)
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
return result;
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
return static_cast<u32>(result & 0xffffffff);
|
||||
}
|
||||
|
||||
void spu_recompiler::RDCH(spu_opcode_t op)
|
||||
@ -2319,14 +2331,26 @@ static void spu_wrch(spu_thread* _spu, u32 ch, u32 value)
|
||||
{
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
}
|
||||
|
||||
static void spu_wrch_mfc(spu_thread* _spu, spu_function_t _ret)
|
||||
static void spu_wrch_mfc(spu_thread* _spu)
|
||||
{
|
||||
if (!_spu->process_mfc_cmd())
|
||||
{
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
}
|
||||
|
||||
void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
|
@ -167,6 +167,13 @@ bool spu_interpreter::RDCH(spu_thread& spu, spu_opcode_t op)
|
||||
}
|
||||
|
||||
spu.gpr[op.rt] = v128::from32r(static_cast<u32>(result));
|
||||
|
||||
if (spu.state)
|
||||
{
|
||||
spu.pc += 4;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -414,7 +421,18 @@ bool spu_interpreter::MTSPR(spu_thread& spu, spu_opcode_t op)
|
||||
|
||||
bool spu_interpreter::WRCH(spu_thread& spu, spu_opcode_t op)
|
||||
{
|
||||
return spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3]);
|
||||
if (!spu.set_ch_value(op.ra, spu.gpr[op.rt]._u32[3]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (spu.state)
|
||||
{
|
||||
spu.pc += 4;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool spu_interpreter::BIZ(spu_thread& spu, spu_opcode_t op)
|
||||
|
@ -5125,34 +5125,30 @@ public:
|
||||
call("spu_unknown", &exec_unk, m_thread, m_ir->getInt32(op_unk.opcode));
|
||||
}
|
||||
|
||||
static bool exec_stop(spu_thread* _spu, u32 code)
|
||||
static void exec_stop(spu_thread* _spu, u32 code)
|
||||
{
|
||||
return _spu->stop_and_signal(code);
|
||||
if (!_spu->stop_and_signal(code))
|
||||
{
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
}
|
||||
|
||||
void STOP(spu_opcode_t op) //
|
||||
{
|
||||
if (m_interp_magn)
|
||||
{
|
||||
const auto succ = call("spu_syscall", &exec_stop, m_thread, m_ir->CreateAnd(m_interp_op, m_ir->getInt32(0x3fff)));
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(succ, next, stop);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->SetInsertPoint(next);
|
||||
call("spu_syscall", &exec_stop, m_thread, m_ir->CreateAnd(m_interp_op, m_ir->getInt32(0x3fff)));
|
||||
return;
|
||||
}
|
||||
|
||||
update_pc();
|
||||
const auto succ = call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(op.opcode & 0x3fff));
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(succ, next, stop);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true);
|
||||
m_ir->CreateBr(next);
|
||||
m_ir->SetInsertPoint(next);
|
||||
call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(op.opcode & 0x3fff));
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
{
|
||||
@ -5167,28 +5163,35 @@ public:
|
||||
{
|
||||
if (m_interp_magn)
|
||||
{
|
||||
const auto succ = call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(0x3fff));
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(succ, next, stop);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->SetInsertPoint(next);
|
||||
call("spu_syscall", &exec_stop, m_thread, m_ir->getInt32(0x3fff));
|
||||
return;
|
||||
}
|
||||
|
||||
STOP(spu_opcode_t{0x3fff});
|
||||
}
|
||||
|
||||
static s64 exec_rdch(spu_thread* _spu, u32 ch)
|
||||
static u32 exec_rdch(spu_thread* _spu, u32 ch)
|
||||
{
|
||||
return _spu->get_ch_value(ch);
|
||||
const s64 result = _spu->get_ch_value(ch);
|
||||
|
||||
if (result < 0)
|
||||
{
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
return static_cast<u32>(result & 0xffffffff);
|
||||
}
|
||||
|
||||
static s64 exec_read_in_mbox(spu_thread* _spu)
|
||||
static u32 exec_read_in_mbox(spu_thread* _spu)
|
||||
{
|
||||
// TODO
|
||||
return _spu->get_ch_value(SPU_RdInMbox);
|
||||
return exec_rdch(_spu, SPU_RdInMbox);
|
||||
}
|
||||
|
||||
static u32 exec_read_dec(spu_thread* _spu)
|
||||
@ -5203,7 +5206,7 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
static s64 exec_read_events(spu_thread* _spu)
|
||||
static u32 exec_read_events(spu_thread* _spu)
|
||||
{
|
||||
if (const u32 events = _spu->get_events())
|
||||
{
|
||||
@ -5211,7 +5214,7 @@ public:
|
||||
}
|
||||
|
||||
// TODO
|
||||
return _spu->get_ch_value(SPU_RdEventStat);
|
||||
return exec_rdch(_spu, SPU_RdEventStat);
|
||||
}
|
||||
|
||||
llvm::Value* get_rdch(spu_opcode_t op, u32 off, bool atomic)
|
||||
@ -5234,20 +5237,17 @@ public:
|
||||
const auto _cur = m_ir->GetInsertBlock();
|
||||
const auto done = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto wait = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpSLT(val0, m_ir->getInt64(0)), done, wait);
|
||||
const auto cond = m_ir->CreateICmpSLT(val0, m_ir->getInt64(0));
|
||||
val0 = m_ir->CreateTrunc(val0, get_type<u32>());
|
||||
m_ir->CreateCondBr(cond, done, wait);
|
||||
m_ir->SetInsertPoint(wait);
|
||||
const auto val1 = call("spu_read_channel", &exec_rdch, m_thread, m_ir->getInt32(op.ra));
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpSLT(val1, m_ir->getInt64(0)), stop, done);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true);
|
||||
m_ir->CreateBr(done);
|
||||
m_ir->SetInsertPoint(done);
|
||||
const auto rval = m_ir->CreatePHI(get_type<u64>(), 2);
|
||||
const auto rval = m_ir->CreatePHI(get_type<u32>(), 2);
|
||||
rval->addIncoming(val0, _cur);
|
||||
rval->addIncoming(val1, wait);
|
||||
rval->addIncoming(m_ir->getInt64(0), stop);
|
||||
return m_ir->CreateTrunc(rval, get_type<u32>());
|
||||
return rval;
|
||||
}
|
||||
|
||||
void RDCH(spu_opcode_t op) //
|
||||
@ -5257,13 +5257,6 @@ public:
|
||||
if (m_interp_magn)
|
||||
{
|
||||
res.value = call("spu_read_channel", &exec_rdch, m_thread, get_imm<u32>(op.ra).value);
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->SetInsertPoint(next);
|
||||
res.value = m_ir->CreateTrunc(res.value, get_type<u32>());
|
||||
set_vr(op.rt, insert(splat<u32[4]>(0), 3, res));
|
||||
return;
|
||||
}
|
||||
@ -5279,14 +5272,6 @@ public:
|
||||
{
|
||||
update_pc();
|
||||
res.value = call("spu_read_in_mbox", &exec_read_in_mbox, m_thread);
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true);
|
||||
m_ir->CreateBr(next);
|
||||
m_ir->SetInsertPoint(next);
|
||||
res.value = m_ir->CreateTrunc(res.value, get_type<u32>());
|
||||
break;
|
||||
}
|
||||
case MFC_RdTagStat:
|
||||
@ -5333,14 +5318,6 @@ public:
|
||||
{
|
||||
update_pc();
|
||||
res.value = call("spu_read_events", &exec_read_events, m_thread);
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true);
|
||||
m_ir->CreateBr(next);
|
||||
m_ir->SetInsertPoint(next);
|
||||
res.value = m_ir->CreateTrunc(res.value, get_type<u32>());
|
||||
break;
|
||||
}
|
||||
case SPU_RdMachStat:
|
||||
@ -5353,14 +5330,6 @@ public:
|
||||
{
|
||||
update_pc();
|
||||
res.value = call("spu_read_channel", &exec_rdch, m_thread, m_ir->getInt32(op.ra));
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpSLT(res.value, m_ir->getInt64(0)), stop, next);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true);
|
||||
m_ir->CreateBr(next);
|
||||
m_ir->SetInsertPoint(next);
|
||||
res.value = m_ir->CreateTrunc(res.value, get_type<u32>());
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -5471,14 +5440,18 @@ public:
|
||||
set_vr(op.rt, insert(splat<u32[4]>(0), 3, res));
|
||||
}
|
||||
|
||||
static bool exec_wrch(spu_thread* _spu, u32 ch, u32 value)
|
||||
static void exec_wrch(spu_thread* _spu, u32 ch, u32 value)
|
||||
{
|
||||
return _spu->set_ch_value(ch, value);
|
||||
}
|
||||
if (!_spu->set_ch_value(ch, value))
|
||||
{
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
static void exec_mfc(spu_thread* _spu)
|
||||
{
|
||||
return _spu->do_mfc();
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
}
|
||||
|
||||
static void exec_list_unstall(spu_thread* _spu, u32 tag)
|
||||
@ -5491,12 +5464,21 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
return exec_mfc(_spu);
|
||||
_spu->do_mfc();
|
||||
}
|
||||
|
||||
static bool exec_mfc_cmd(spu_thread* _spu)
|
||||
static void exec_mfc_cmd(spu_thread* _spu)
|
||||
{
|
||||
return _spu->process_mfc_cmd();
|
||||
if (!_spu->process_mfc_cmd())
|
||||
{
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
|
||||
if (_spu->test_stopped())
|
||||
{
|
||||
_spu->pc += 4;
|
||||
spu_runtime::g_escape(_spu);
|
||||
}
|
||||
}
|
||||
|
||||
void WRCH(spu_opcode_t op) //
|
||||
@ -5505,13 +5487,7 @@ public:
|
||||
|
||||
if (m_interp_magn)
|
||||
{
|
||||
const auto succ = call("spu_write_channel", &exec_wrch, m_thread, get_imm<u32>(op.ra).value, val.value);
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(succ, next, stop);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->SetInsertPoint(next);
|
||||
call("spu_write_channel", &exec_wrch, m_thread, get_imm<u32>(op.ra).value, val.value);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -5922,14 +5898,7 @@ public:
|
||||
}
|
||||
|
||||
update_pc();
|
||||
const auto succ = call("spu_write_channel", &exec_wrch, m_thread, m_ir->getInt32(op.ra), val.value);
|
||||
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto stop = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
m_ir->CreateCondBr(succ, next, stop);
|
||||
m_ir->SetInsertPoint(stop);
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true);
|
||||
m_ir->CreateBr(next);
|
||||
m_ir->SetInsertPoint(next);
|
||||
call("spu_write_channel", &exec_wrch, m_thread, m_ir->getInt32(op.ra), val.value);
|
||||
}
|
||||
|
||||
void LNOP(spu_opcode_t op) //
|
||||
|
@ -29,36 +29,39 @@ static const bool s_tsx_avx = utils::has_avx();
|
||||
// For special case
|
||||
static const bool s_tsx_haswell = utils::has_rtm() && !utils::has_mpx();
|
||||
|
||||
#ifdef _MSC_VER
|
||||
bool operator ==(const u128& lhs, const u128& rhs)
|
||||
static FORCE_INLINE bool cmp_rdata(const decltype(spu_thread::rdata)& lhs, const decltype(spu_thread::rdata)& rhs)
|
||||
{
|
||||
return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
|
||||
const v128 a = (lhs[0] ^ rhs[0]) | (lhs[1] ^ rhs[1]);
|
||||
const v128 b = (lhs[2] ^ rhs[2]) | (lhs[3] ^ rhs[3]);
|
||||
const v128 c = (lhs[4] ^ rhs[4]) | (lhs[5] ^ rhs[5]);
|
||||
const v128 d = (lhs[6] ^ rhs[6]) | (lhs[7] ^ rhs[7]);
|
||||
const v128 r = (a | b) | (c | d);
|
||||
return !(r._u64[0] | r._u64[1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
static FORCE_INLINE void mov_rdata(u128* const dst, const u128* const src)
|
||||
static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const decltype(spu_thread::rdata)& src)
|
||||
{
|
||||
{
|
||||
const u128 data0 = src[0];
|
||||
const u128 data1 = src[1];
|
||||
const u128 data2 = src[2];
|
||||
const v128 data0 = src[0];
|
||||
const v128 data1 = src[1];
|
||||
const v128 data2 = src[2];
|
||||
dst[0] = data0;
|
||||
dst[1] = data1;
|
||||
dst[2] = data2;
|
||||
}
|
||||
|
||||
{
|
||||
const u128 data0 = src[3];
|
||||
const u128 data1 = src[4];
|
||||
const u128 data2 = src[5];
|
||||
const v128 data0 = src[3];
|
||||
const v128 data1 = src[4];
|
||||
const v128 data2 = src[5];
|
||||
dst[3] = data0;
|
||||
dst[4] = data1;
|
||||
dst[5] = data2;
|
||||
}
|
||||
|
||||
{
|
||||
const u128 data0 = src[6];
|
||||
const u128 data1 = src[7];
|
||||
const v128 data0 = src[6];
|
||||
const v128 data1 = src[7];
|
||||
dst[6] = data0;
|
||||
dst[7] = data1;
|
||||
}
|
||||
@ -182,13 +185,15 @@ namespace spu
|
||||
}
|
||||
}
|
||||
|
||||
const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const void* _old, const void* _new)>([](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const void* _old, const void* _new)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label fail = c.newLabel();
|
||||
Label _ret = c.newLabel();
|
||||
Label skip = c.newLabel();
|
||||
Label next = c.newLabel();
|
||||
|
||||
if (utils::has_avx() && !s_tsx_avx)
|
||||
{
|
||||
@ -197,8 +202,6 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r15);
|
||||
c.push(x86::r14);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r12);
|
||||
c.push(x86::rbx);
|
||||
@ -234,8 +237,6 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const
|
||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||
c.xor_(x86::r12d, x86::r12d);
|
||||
c.mov(x86::r13, args[1]);
|
||||
c.mov(x86::r14, args[2]);
|
||||
c.mov(x86::r15, args[3]);
|
||||
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
@ -270,10 +271,13 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const
|
||||
}
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
build_transaction_enter(c, fall, x86::r12, 4);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, x86::r13);
|
||||
c.jne(fail);
|
||||
c.test(x86::qword_ptr(x86::rbx), 127);
|
||||
c.jnz(skip);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -329,24 +333,34 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const
|
||||
|
||||
c.sub(x86::qword_ptr(x86::rbx), -128);
|
||||
c.xend();
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.mov(x86::eax, 1);
|
||||
c.jmp(_ret);
|
||||
|
||||
// Touch memory after transaction failure
|
||||
c.bind(skip);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.xor_(x86::r12d, x86::r12d);
|
||||
build_transaction_abort(c, 0);
|
||||
//c.jmp(fall);
|
||||
|
||||
c.bind(fall);
|
||||
c.sar(x86::eax, 24);
|
||||
c.js(fail);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.lock().add(x86::qword_ptr(x86::rbp), 0);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.lock().add(x86::qword_ptr(x86::rbx), 1);
|
||||
c.mov(x86::r12d, 1);
|
||||
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::wait));
|
||||
|
||||
// Touch memory if transaction failed without RETRY flag on the first attempt
|
||||
c.cmp(x86::r12, 1);
|
||||
c.jne(next);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.lock().add(x86::dword_ptr(x86::rbp), 0);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
|
||||
Label fall2 = c.newLabel();
|
||||
Label next2 = c.newLabel();
|
||||
Label fail2 = c.newLabel();
|
||||
|
||||
// Lightened transaction: only compare and swap data
|
||||
Label retry = build_transaction_enter(c, fall2);
|
||||
c.bind(next);
|
||||
build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -379,7 +393,7 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const
|
||||
c.ptest(x86::xmm0, x86::xmm0);
|
||||
}
|
||||
|
||||
c.jnz(fail);
|
||||
c.jnz(fail2);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -402,86 +416,24 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const
|
||||
|
||||
c.xend();
|
||||
c.lock().add(x86::qword_ptr(x86::rbx), 127);
|
||||
c.mov(x86::rax, x86::r12);
|
||||
c.mov(x86::eax, 1);
|
||||
c.jmp(_ret);
|
||||
|
||||
// Touch memory after transaction failure
|
||||
c.bind(fall2);
|
||||
c.lea(x86::r12, x86::qword_ptr(x86::r12, 1));
|
||||
|
||||
if (s_tsx_haswell || std::thread::hardware_concurrency() < 12)
|
||||
{
|
||||
// Call yield and restore data
|
||||
c.call(imm_ptr(&std::this_thread::yield));
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(x86::r14, 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(x86::r14, 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(x86::r14, 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(x86::r14, 96));
|
||||
c.vmovups(x86::ymm4, x86::yword_ptr(x86::r15, 0));
|
||||
c.vmovups(x86::ymm5, x86::yword_ptr(x86::r15, 32));
|
||||
c.vmovups(x86::ymm6, x86::yword_ptr(x86::r15, 64));
|
||||
c.vmovups(x86::ymm7, x86::yword_ptr(x86::r15, 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movaps(x86::xmm0, x86::oword_ptr(x86::r14, 0));
|
||||
c.movaps(x86::xmm1, x86::oword_ptr(x86::r14, 16));
|
||||
c.movaps(x86::xmm2, x86::oword_ptr(x86::r14, 32));
|
||||
c.movaps(x86::xmm3, x86::oword_ptr(x86::r14, 48));
|
||||
c.movaps(x86::xmm4, x86::oword_ptr(x86::r14, 64));
|
||||
c.movaps(x86::xmm5, x86::oword_ptr(x86::r14, 80));
|
||||
c.movaps(x86::xmm6, x86::oword_ptr(x86::r14, 96));
|
||||
c.movaps(x86::xmm7, x86::oword_ptr(x86::r14, 112));
|
||||
c.movaps(x86::xmm8, x86::oword_ptr(x86::r15, 0));
|
||||
c.movaps(x86::xmm9, x86::oword_ptr(x86::r15, 16));
|
||||
c.movaps(x86::xmm10, x86::oword_ptr(x86::r15, 32));
|
||||
c.movaps(x86::xmm11, x86::oword_ptr(x86::r15, 48));
|
||||
c.movaps(x86::xmm12, x86::oword_ptr(x86::r15, 64));
|
||||
c.movaps(x86::xmm13, x86::oword_ptr(x86::r15, 80));
|
||||
c.movaps(x86::xmm14, x86::oword_ptr(x86::r15, 96));
|
||||
c.movaps(x86::xmm15, x86::oword_ptr(x86::r15, 112));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Label loop1 = c.newLabel();
|
||||
c.mov(x86::eax, x86::r12d);
|
||||
c.and_(x86::eax, 0xf);
|
||||
c.shl(x86::eax, 3);
|
||||
c.or_(x86::eax, 1);
|
||||
c.bind(loop1);
|
||||
c.pause();
|
||||
c.dec(x86::eax);
|
||||
c.jnz(loop1);
|
||||
}
|
||||
|
||||
c.movzx(x86::eax, x86::r12b);
|
||||
c.not_(x86::al);
|
||||
c.shl(x86::eax, 4);
|
||||
c.xor_(x86::rbp, x86::rax);
|
||||
c.lock().add(x86::qword_ptr(x86::rbp), 0);
|
||||
c.xor_(x86::rbp, x86::rax);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, x86::r13);
|
||||
c.jne(fail);
|
||||
c.cmp(x86::r12, 16);
|
||||
c.jb(retry);
|
||||
c.mov(x86::rax, imm_ptr(&g_cfg.core.spu_accurate_putllc.get()));
|
||||
c.test(x86::byte_ptr(x86::rax), 1);
|
||||
c.jnz(retry);
|
||||
c.sar(x86::eax, 24);
|
||||
c.js(fail2);
|
||||
c.mov(x86::eax, 2);
|
||||
c.jmp(_ret);
|
||||
|
||||
c.bind(fail);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.test(x86::r12, x86::r12);
|
||||
c.jz(next2);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.jmp(_ret);
|
||||
|
||||
c.bind(fail2);
|
||||
build_transaction_abort(c, 0xff);
|
||||
c.lock().sub(x86::qword_ptr(x86::rbx), 1);
|
||||
c.bind(next2);
|
||||
c.mov(x86::rax, x86::r12);
|
||||
c.not_(x86::rax);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
//c.jmp(_ret);
|
||||
|
||||
c.bind(_ret);
|
||||
@ -516,13 +468,11 @@ const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::r12);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::r14);
|
||||
c.pop(x86::r15);
|
||||
c.pop(x86::rbp);
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, u64* rtime)>([](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -558,10 +508,9 @@ const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, u64*
|
||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||
c.xor_(x86::r12d, x86::r12d);
|
||||
c.mov(x86::r13, args[1]);
|
||||
c.mov(x86::qword_ptr(x86::rsp, 64), args[2]);
|
||||
|
||||
// Begin transaction
|
||||
Label begin = build_transaction_enter(c, fall);
|
||||
build_transaction_enter(c, fall, x86::r12, 16);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
|
||||
if (s_tsx_avx)
|
||||
@ -605,32 +554,12 @@ const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, u64*
|
||||
}
|
||||
|
||||
c.and_(x86::rax, -128);
|
||||
c.mov(args[2], x86::qword_ptr(x86::rsp, 64));
|
||||
c.mov(x86::qword_ptr(args[2]), x86::rax);
|
||||
c.mov(x86::rax, x86::r12);
|
||||
c.jmp(_ret);
|
||||
|
||||
// Touch memory after transaction failure
|
||||
c.bind(fall);
|
||||
c.lea(x86::r12, x86::qword_ptr(x86::r12, 1));
|
||||
c.mov(x86::eax, 1);
|
||||
//c.jmp(_ret);
|
||||
|
||||
if (s_tsx_haswell || std::thread::hardware_concurrency() < 12)
|
||||
{
|
||||
c.call(imm_ptr(&std::this_thread::yield));
|
||||
}
|
||||
else
|
||||
{
|
||||
c.mov(args[0], 500);
|
||||
c.call(imm_ptr(&::busy_wait));
|
||||
}
|
||||
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.xor_(x86::rbx, 0xf80);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbp));
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.xor_(x86::rbx, 0xf80);
|
||||
c.jmp(begin);
|
||||
c.bind(_ret);
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -654,7 +583,7 @@ const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, u64*
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_getll_fast = build_function_asm<u64(*)(u32 raddr, void* rdata, u64* rtime)>([](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_getll_inexact = build_function_asm<u64(*)(u32 raddr, void* rdata)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -691,7 +620,6 @@ const auto spu_getll_fast = build_function_asm<u64(*)(u32 raddr, void* rdata, u6
|
||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||
c.xor_(x86::r12d, x86::r12d);
|
||||
c.mov(x86::r13, args[1]);
|
||||
c.mov(x86::qword_ptr(x86::rsp, 64), args[2]);
|
||||
|
||||
// Begin copying
|
||||
Label begin = c.newLabel();
|
||||
@ -719,14 +647,15 @@ const auto spu_getll_fast = build_function_asm<u64(*)(u32 raddr, void* rdata, u6
|
||||
}
|
||||
|
||||
// Verify and retry if necessary.
|
||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.je(test0);
|
||||
c.pause();
|
||||
c.mov(args[0], x86::rax);
|
||||
c.xor_(args[0], x86::qword_ptr(x86::rbx));
|
||||
c.test(args[0], -128);
|
||||
c.jz(test0);
|
||||
c.lea(x86::r12, x86::qword_ptr(x86::r12, 1));
|
||||
c.jmp(begin);
|
||||
|
||||
c.bind(test0);
|
||||
c.test(x86::eax, 0x7f);
|
||||
c.test(x86::eax, 127);
|
||||
c.jz(_ret);
|
||||
c.and_(x86::rax, -128);
|
||||
|
||||
@ -774,8 +703,6 @@ const auto spu_getll_fast = build_function_asm<u64(*)(u32 raddr, void* rdata, u6
|
||||
|
||||
c.jz(_ret);
|
||||
c.lea(x86::r12, x86::qword_ptr(x86::r12, 2));
|
||||
c.mov(args[0], 500);
|
||||
c.call(imm_ptr(&::busy_wait));
|
||||
c.jmp(begin);
|
||||
|
||||
c.bind(_ret);
|
||||
@ -799,10 +726,6 @@ const auto spu_getll_fast = build_function_asm<u64(*)(u32 raddr, void* rdata, u6
|
||||
c.movaps(x86::oword_ptr(x86::r13, 112), x86::xmm7);
|
||||
}
|
||||
|
||||
c.mov(args[2], x86::qword_ptr(x86::rsp, 64));
|
||||
c.mov(x86::qword_ptr(args[2]), x86::rax);
|
||||
c.mov(x86::rax, x86::r12);
|
||||
|
||||
#ifdef _WIN32
|
||||
if (!s_tsx_avx)
|
||||
{
|
||||
@ -826,12 +749,14 @@ const auto spu_getll_fast = build_function_asm<u64(*)(u32 raddr, void* rdata, u6
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rdata)>([](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rdata, spu_thread* _spu)>([](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
Label fall = c.newLabel();
|
||||
Label _ret = c.newLabel();
|
||||
Label skip = c.newLabel();
|
||||
Label next = c.newLabel();
|
||||
|
||||
if (utils::has_avx() && !s_tsx_avx)
|
||||
{
|
||||
@ -884,7 +809,9 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
||||
}
|
||||
|
||||
// Begin transaction
|
||||
build_transaction_enter(c, fall);
|
||||
build_transaction_enter(c, fall, x86::r12, 8);
|
||||
c.test(x86::dword_ptr(x86::rbx), 127);
|
||||
c.jnz(skip);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -907,21 +834,31 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
||||
|
||||
c.sub(x86::qword_ptr(x86::rbx), -128);
|
||||
c.xend();
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.mov(x86::eax, 1);
|
||||
c.jmp(_ret);
|
||||
|
||||
// Touch memory after transaction failure
|
||||
c.bind(skip);
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.xor_(x86::r12d, x86::r12d);
|
||||
build_transaction_abort(c, 0);
|
||||
//c.jmp(fall);
|
||||
|
||||
c.bind(fall);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.lock().add(x86::qword_ptr(x86::rbp), 0);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.lock().add(x86::qword_ptr(x86::rbx), 1);
|
||||
c.mov(x86::r12d, 1);
|
||||
c.lock().bts(x86::dword_ptr(args[2], ::offset32(&spu_thread::state)), static_cast<u32>(cpu_flag::wait));
|
||||
|
||||
// Touch memory if transaction failed without RETRY flag on the first attempt
|
||||
c.cmp(x86::r12, 1);
|
||||
c.jne(next);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
c.lock().add(x86::dword_ptr(x86::rbp), 0);
|
||||
c.xor_(x86::rbp, 0xf80);
|
||||
|
||||
Label fall2 = c.newLabel();
|
||||
|
||||
// Lightened transaction
|
||||
Label retry = build_transaction_enter(c, fall2);
|
||||
c.bind(next);
|
||||
build_transaction_enter(c, fall2, x86::r12, 666);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
@ -944,57 +881,12 @@ const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rda
|
||||
|
||||
c.xend();
|
||||
c.lock().add(x86::qword_ptr(x86::rbx), 127);
|
||||
c.mov(x86::rax, x86::r12);
|
||||
c.mov(x86::eax, 1);
|
||||
c.jmp(_ret);
|
||||
|
||||
// Touch memory after transaction failure
|
||||
c.bind(fall2);
|
||||
c.lea(x86::r12, x86::qword_ptr(x86::r12, 1));
|
||||
|
||||
if (s_tsx_haswell || std::thread::hardware_concurrency() < 12)
|
||||
{
|
||||
// Call yield and restore data
|
||||
c.call(imm_ptr(&std::this_thread::yield));
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(x86::r13, 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(x86::r13, 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(x86::r13, 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(x86::r13, 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movaps(x86::xmm0, x86::oword_ptr(x86::r13, 0));
|
||||
c.movaps(x86::xmm1, x86::oword_ptr(x86::r13, 16));
|
||||
c.movaps(x86::xmm2, x86::oword_ptr(x86::r13, 32));
|
||||
c.movaps(x86::xmm3, x86::oword_ptr(x86::r13, 48));
|
||||
c.movaps(x86::xmm4, x86::oword_ptr(x86::r13, 64));
|
||||
c.movaps(x86::xmm5, x86::oword_ptr(x86::r13, 80));
|
||||
c.movaps(x86::xmm6, x86::oword_ptr(x86::r13, 96));
|
||||
c.movaps(x86::xmm7, x86::oword_ptr(x86::r13, 112));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Label loop1 = c.newLabel();
|
||||
c.mov(x86::eax, x86::r12d);
|
||||
c.and_(x86::eax, 0xf);
|
||||
c.shl(x86::eax, 3);
|
||||
c.or_(x86::eax, 1);
|
||||
c.bind(loop1);
|
||||
c.pause();
|
||||
c.dec(x86::eax);
|
||||
c.jnz(loop1);
|
||||
}
|
||||
|
||||
c.movzx(x86::eax, x86::r12b);
|
||||
c.not_(x86::al);
|
||||
c.shl(x86::eax, 4);
|
||||
c.xor_(x86::rbp, x86::rax);
|
||||
c.lock().add(x86::qword_ptr(x86::rbp), 0);
|
||||
c.xor_(x86::rbp, x86::rax);
|
||||
c.jmp(retry);
|
||||
c.mov(x86::eax, 2);
|
||||
//c.jmp(_ret);
|
||||
|
||||
c.bind(_ret);
|
||||
|
||||
@ -1486,7 +1378,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
|
||||
while (size)
|
||||
{
|
||||
*reinterpret_cast<u128*>(dst) = *reinterpret_cast<const u128*>(src);
|
||||
*reinterpret_cast<v128*>(dst) = *reinterpret_cast<const v128*>(src);
|
||||
|
||||
dst += 16;
|
||||
src += 16;
|
||||
@ -1501,7 +1393,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
|
||||
while (size >= 128)
|
||||
{
|
||||
mov_rdata(reinterpret_cast<u128*>(dst), reinterpret_cast<const u128*>(src));
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
|
||||
dst += 128;
|
||||
src += 128;
|
||||
@ -1510,7 +1402,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
|
||||
while (size)
|
||||
{
|
||||
*reinterpret_cast<u128*>(dst) = *reinterpret_cast<const u128*>(src);
|
||||
*reinterpret_cast<v128*>(dst) = *reinterpret_cast<const v128*>(src);
|
||||
|
||||
dst += 16;
|
||||
src += 16;
|
||||
@ -1556,7 +1448,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
{
|
||||
while (size >= 128)
|
||||
{
|
||||
mov_rdata(reinterpret_cast<u128*>(dst), reinterpret_cast<const u128*>(src));
|
||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||
|
||||
dst += 128;
|
||||
src += 128;
|
||||
@ -1565,7 +1457,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
|
||||
while (size)
|
||||
{
|
||||
*reinterpret_cast<u128*>(dst) = *reinterpret_cast<const u128*>(src);
|
||||
*reinterpret_cast<v128*>(dst) = *reinterpret_cast<const v128*>(src);
|
||||
|
||||
dst += 16;
|
||||
src += 16;
|
||||
@ -1690,7 +1582,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
|
||||
if (raddr && addr == raddr)
|
||||
{
|
||||
// Last check for event before we clear the reservation
|
||||
if ((vm::reservation_acquire(addr, 128) & -128) != rtime || rdata != vm::_ref<decltype(rdata)>(addr))
|
||||
if ((vm::reservation_acquire(addr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(addr)))
|
||||
{
|
||||
ch_event_stat |= SPU_EVENT_LR;
|
||||
}
|
||||
@ -1703,11 +1595,31 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
|
||||
// Store unconditionally
|
||||
if (LIKELY(g_use_rtm))
|
||||
{
|
||||
const u64 count = spu_putlluc_tx(addr, to_write.data());
|
||||
const u32 result = spu_putlluc_tx(addr, to_write.data(), this);
|
||||
|
||||
if (count >= 10)
|
||||
if (result == 2)
|
||||
{
|
||||
LOG_ERROR(SPU, "%s took too long: %u", args.cmd, count);
|
||||
cpu_thread::suspend_all cpu_lock(this);
|
||||
|
||||
// Try to obtain bit 7 (+64)
|
||||
if (!atomic_storage<u64>::bts(vm::reservation_acquire(addr, 128).raw(), 6))
|
||||
{
|
||||
auto& data = vm::_ref<decltype(rdata)>(addr);
|
||||
mov_rdata(data, to_write);
|
||||
|
||||
// Keep checking written data against a rogue transaction sneak in
|
||||
while (std::atomic_thread_fence(std::memory_order_seq_cst), !cmp_rdata(data, to_write))
|
||||
{
|
||||
mov_rdata(data, to_write);
|
||||
}
|
||||
|
||||
vm::reservation_acquire(addr, 128) += 63;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Give up if another PUTLLUC command took precedence
|
||||
vm::reservation_acquire(addr, 128) -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1722,12 +1634,12 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
|
||||
// Full lock (heavyweight)
|
||||
// TODO: vm::check_addr
|
||||
vm::writer_lock lock(addr);
|
||||
mov_rdata(data.data(), to_write.data());
|
||||
mov_rdata(data, to_write);
|
||||
res.release(res.load() + 127);
|
||||
}
|
||||
else
|
||||
{
|
||||
mov_rdata(data.data(), to_write.data());
|
||||
mov_rdata(data, to_write);
|
||||
res.release(res.load() + 127);
|
||||
}
|
||||
}
|
||||
@ -1847,6 +1759,8 @@ bool spu_thread::process_mfc_cmd()
|
||||
// Stall infinitely if MFC queue is full
|
||||
while (UNLIKELY(mfc_size >= 16))
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (is_stopped())
|
||||
{
|
||||
return false;
|
||||
@ -1873,8 +1787,10 @@ bool spu_thread::process_mfc_cmd()
|
||||
{
|
||||
rtime = vm::reservation_acquire(addr, 128) & -128;
|
||||
|
||||
while (rdata == data && (vm::reservation_acquire(addr, 128)) == rtime)
|
||||
while (cmp_rdata(rdata, data) && (vm::reservation_acquire(addr, 128)) == rtime)
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (is_stopped())
|
||||
{
|
||||
break;
|
||||
@ -1882,15 +1798,40 @@ bool spu_thread::process_mfc_cmd()
|
||||
|
||||
thread_ctrl::wait_for(100);
|
||||
}
|
||||
|
||||
if (test_stopped())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (LIKELY(g_use_rtm))
|
||||
if (LIKELY(g_use_rtm && !g_cfg.core.spu_accurate_getllar && raddr != addr))
|
||||
{
|
||||
const u64 count = g_cfg.core.spu_accurate_getllar ? spu_getll_tx(addr, dst.data(), &ntime) : spu_getll_fast(addr, dst.data(), &ntime);
|
||||
// TODO: maybe always start from a transaction
|
||||
ntime = spu_getll_inexact(addr, dst.data());
|
||||
}
|
||||
else if (g_use_rtm)
|
||||
{
|
||||
ntime = spu_getll_tx(addr, dst.data());
|
||||
|
||||
if (count >= 10)
|
||||
if (ntime == 1)
|
||||
{
|
||||
LOG_ERROR(SPU, "%s took too long: %u", ch_mfc_cmd.cmd, count);
|
||||
if (!g_cfg.core.spu_accurate_getllar)
|
||||
{
|
||||
ntime = spu_getll_inexact(addr, dst.data());
|
||||
}
|
||||
else
|
||||
{
|
||||
cpu_thread::suspend_all cpu_lock(this);
|
||||
|
||||
while (vm::reservation_acquire(addr, 128) & 127)
|
||||
{
|
||||
busy_wait(100);
|
||||
}
|
||||
|
||||
ntime = vm::reservation_acquire(addr, 128);
|
||||
mov_rdata(dst, data);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1907,37 +1848,37 @@ bool spu_thread::process_mfc_cmd()
|
||||
vm::writer_lock lock(addr);
|
||||
|
||||
ntime = old_time;
|
||||
mov_rdata(dst.data(), data.data());
|
||||
mov_rdata(dst, data);
|
||||
res.release(old_time);
|
||||
}
|
||||
else
|
||||
{
|
||||
ntime = old_time;
|
||||
mov_rdata(dst.data(), data.data());
|
||||
mov_rdata(dst, data);
|
||||
res.release(old_time);
|
||||
}
|
||||
}
|
||||
|
||||
if (const u32 _addr = raddr)
|
||||
if (raddr && raddr != addr)
|
||||
{
|
||||
// Last check for event before we replace the reservation with a new one
|
||||
if ((vm::reservation_acquire(_addr, 128) & -128) != rtime || rdata != vm::_ref<decltype(rdata)>(_addr))
|
||||
if ((vm::reservation_acquire(raddr, 128) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr)))
|
||||
{
|
||||
ch_event_stat |= SPU_EVENT_LR;
|
||||
}
|
||||
}
|
||||
else if (raddr == addr)
|
||||
{
|
||||
// Lost previous reservation on polling
|
||||
if (ntime != rtime || !cmp_rdata(rdata, dst))
|
||||
{
|
||||
ch_event_stat |= SPU_EVENT_LR;
|
||||
|
||||
if (_addr == addr)
|
||||
{
|
||||
// Lost current reservation
|
||||
raddr = 0;
|
||||
ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
raddr = addr;
|
||||
rtime = ntime;
|
||||
mov_rdata(rdata.data(), dst.data());
|
||||
mov_rdata(rdata, dst);
|
||||
|
||||
ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
|
||||
return true;
|
||||
@ -1949,29 +1890,39 @@ bool spu_thread::process_mfc_cmd()
|
||||
const u32 addr = ch_mfc_cmd.eal & -128u;
|
||||
u32 result = 0;
|
||||
|
||||
if (raddr == addr && rtime == (vm::reservation_acquire(raddr, 128) & -128))
|
||||
if (raddr == addr)
|
||||
{
|
||||
const auto& to_write = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ff80);
|
||||
|
||||
if (LIKELY(g_use_rtm))
|
||||
{
|
||||
u64 count = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data());
|
||||
result = spu_putllc_tx(addr, rtime, rdata.data(), to_write.data());
|
||||
|
||||
if ((count >> 63) == 0)
|
||||
if (result == 2)
|
||||
{
|
||||
result = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
count = ~count;
|
||||
}
|
||||
result = 0;
|
||||
|
||||
if (count >= 10)
|
||||
{
|
||||
LOG_ERROR(SPU, "%s took too long: %u (r=%u)", ch_mfc_cmd.cmd, count, result);
|
||||
cpu_thread::suspend_all cpu_lock(this);
|
||||
|
||||
// Give up if other PUTLLC/PUTLLUC commands are in progress
|
||||
if (!vm::reservation_acquire(addr, 128).try_dec(rtime + 1))
|
||||
{
|
||||
auto& data = vm::_ref<decltype(rdata)>(addr);
|
||||
|
||||
if ((vm::reservation_acquire(addr, 128) & -128) == rtime && cmp_rdata(rdata, data))
|
||||
{
|
||||
mov_rdata(data, to_write);
|
||||
vm::reservation_acquire(addr, 128) += 127;
|
||||
result = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
vm::reservation_acquire(addr, 128) -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (auto& data = vm::_ref<decltype(rdata)>(addr); rdata == data)
|
||||
else if (auto& data = vm::_ref<decltype(rdata)>(addr); rtime == (vm::reservation_acquire(raddr, 128) & -128) && cmp_rdata(rdata, data))
|
||||
{
|
||||
auto& res = vm::reservation_lock(raddr, 128);
|
||||
const u64 old_time = res.load() & -128;
|
||||
@ -1984,9 +1935,9 @@ bool spu_thread::process_mfc_cmd()
|
||||
// TODO: vm::check_addr
|
||||
vm::writer_lock lock(addr);
|
||||
|
||||
if (rdata == data)
|
||||
if (cmp_rdata(rdata, data))
|
||||
{
|
||||
mov_rdata(data.data(), to_write.data());
|
||||
mov_rdata(data, to_write);
|
||||
res.release(old_time + 128);
|
||||
result = 1;
|
||||
}
|
||||
@ -2012,7 +1963,7 @@ bool spu_thread::process_mfc_cmd()
|
||||
if (raddr)
|
||||
{
|
||||
// Last check for event before we clear the reservation
|
||||
if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & -128) || rdata != vm::_ref<decltype(rdata)>(raddr))
|
||||
if (raddr == addr || rtime != (vm::reservation_acquire(raddr, 128) & -128) || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr)))
|
||||
{
|
||||
ch_event_stat |= SPU_EVENT_LR;
|
||||
}
|
||||
@ -2164,7 +2115,7 @@ u32 spu_thread::get_events(bool waiting)
|
||||
}
|
||||
|
||||
// Check reservation status and set SPU_EVENT_LR if lost
|
||||
if (raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & -128) != rtime || rdata != vm::_ref<decltype(rdata)>(raddr)))
|
||||
if (raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & -128) != rtime || !cmp_rdata(rdata, vm::_ref<decltype(rdata)>(raddr))))
|
||||
{
|
||||
ch_event_stat |= SPU_EVENT_LR;
|
||||
raddr = 0;
|
||||
@ -2256,6 +2207,11 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
|
||||
auto read_channel = [&](spu_channel& channel) -> s64
|
||||
{
|
||||
if (channel.get_count() == 0)
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 10 && channel.get_count() == 0; i++)
|
||||
{
|
||||
busy_wait();
|
||||
@ -2273,6 +2229,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
thread_ctrl::wait();
|
||||
}
|
||||
|
||||
check_state();
|
||||
return out;
|
||||
};
|
||||
|
||||
@ -2284,6 +2241,11 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
}
|
||||
case SPU_RdInMbox:
|
||||
{
|
||||
if (ch_in_mbox.get_count() == 0)
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
for (int i = 0; i < 10 && ch_in_mbox.get_count() == 0; i++)
|
||||
@ -2300,6 +2262,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
int_ctrl[2].set(SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT);
|
||||
}
|
||||
|
||||
check_state();
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -2410,6 +2373,8 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
|
||||
while (res = get_events(), !res)
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (is_stopped())
|
||||
{
|
||||
return -1;
|
||||
@ -2418,11 +2383,14 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
pseudo_lock.wait(100);
|
||||
}
|
||||
|
||||
check_state();
|
||||
return res;
|
||||
}
|
||||
|
||||
while (res = get_events(true), !res)
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (is_stopped())
|
||||
{
|
||||
return -1;
|
||||
@ -2431,6 +2399,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||
thread_ctrl::wait_for(100);
|
||||
}
|
||||
|
||||
check_state();
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -2463,6 +2432,8 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
||||
{
|
||||
while (!ch_out_intr_mbox.try_push(value))
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (is_stopped())
|
||||
{
|
||||
return false;
|
||||
@ -2472,9 +2443,12 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
||||
}
|
||||
|
||||
int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT);
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
|
||||
state += cpu_flag::wait;
|
||||
|
||||
const u32 code = value >> 24;
|
||||
{
|
||||
if (code < 64)
|
||||
@ -2609,6 +2583,8 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
||||
{
|
||||
while (!ch_out_mbox.try_push(value))
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (is_stopped())
|
||||
{
|
||||
return false;
|
||||
@ -2617,6 +2593,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
||||
thread_ctrl::wait();
|
||||
}
|
||||
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2770,6 +2747,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
|
||||
if (offset >= RAW_SPU_BASE_ADDR)
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
status.atomic_op([code](u32& status)
|
||||
{
|
||||
status = (status & 0xffff) | (code << 16);
|
||||
@ -2779,6 +2757,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
|
||||
int_ctrl[2].set(SPU_INT2_STAT_SPU_STOP_AND_SIGNAL_INT);
|
||||
state += cpu_flag::stop;
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2808,6 +2787,8 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
// HACK: wait for executable code
|
||||
while (!_ref<u32>(pc))
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (is_stopped())
|
||||
{
|
||||
return false;
|
||||
@ -2816,12 +2797,15 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
thread_ctrl::wait_for(1000);
|
||||
}
|
||||
|
||||
check_state();
|
||||
return false;
|
||||
}
|
||||
|
||||
case 0x001:
|
||||
{
|
||||
state += cpu_flag::wait;
|
||||
thread_ctrl::wait_for(1000); // hack
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2857,6 +2841,8 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
|
||||
std::shared_ptr<lv2_event_queue> queue;
|
||||
|
||||
state += cpu_flag::wait;
|
||||
|
||||
while (true)
|
||||
{
|
||||
queue.reset();
|
||||
@ -2897,6 +2883,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
|
||||
if (!queue)
|
||||
{
|
||||
check_state();
|
||||
return ch_in_mbox.set_values(1, CELL_EINVAL), true; // TODO: check error value
|
||||
}
|
||||
|
||||
@ -2927,6 +2914,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
const auto data3 = static_cast<u32>(std::get<3>(event));
|
||||
ch_in_mbox.set_values(4, CELL_OK, data1, data2, data3);
|
||||
queue->events.pop_front();
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -2972,6 +2960,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
}
|
||||
}
|
||||
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3045,6 +3034,8 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
{
|
||||
/* ===== sys_spu_thread_group_exit ===== */
|
||||
|
||||
state += cpu_flag::wait;
|
||||
|
||||
u32 value = 0;
|
||||
|
||||
if (!ch_out_mbox.try_pop(value))
|
||||
@ -3069,6 +3060,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
group->join_state = SYS_SPU_THREAD_GROUP_JOIN_GROUP_EXIT;
|
||||
|
||||
state += cpu_flag::stop;
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3076,6 +3068,8 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
{
|
||||
/* ===== sys_spu_thread_exit ===== */
|
||||
|
||||
state += cpu_flag::wait;
|
||||
|
||||
if (!ch_out_mbox.get_count())
|
||||
{
|
||||
fmt::throw_exception("sys_spu_thread_exit(): Out_MBox is empty" HERE);
|
||||
@ -3084,6 +3078,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
LOG_TRACE(SPU, "sys_spu_thread_exit(status=0x%x)", ch_out_mbox.get_value());
|
||||
status |= SPU_STATUS_STOPPED_BY_STOP;
|
||||
state += cpu_flag::stop;
|
||||
check_state();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -529,7 +529,7 @@ public:
|
||||
|
||||
// Reservation Data
|
||||
u64 rtime = 0;
|
||||
std::array<u128, 8> rdata{};
|
||||
std::array<v128, 8> rdata{};
|
||||
u32 raddr = 0;
|
||||
|
||||
u32 srr0;
|
||||
|
@ -357,6 +357,11 @@ s32 sys_net_bnet_accept(ppu_thread& ppu, s32 s, vm::ptr<sys_net_sockaddr> addr,
|
||||
}
|
||||
}
|
||||
|
||||
if (ppu.is_stopped())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto newsock = std::make_shared<lv2_socket>(native_socket);
|
||||
|
||||
result = idm::import_existing<lv2_socket>(newsock);
|
||||
@ -975,6 +980,11 @@ s32 sys_net_bnet_recvfrom(ppu_thread& ppu, s32 s, vm::ptr<void> buf, u32 len, s3
|
||||
}
|
||||
}
|
||||
|
||||
if (ppu.is_stopped())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO
|
||||
if (addr)
|
||||
{
|
||||
@ -1796,6 +1806,11 @@ s32 sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr<sys_net_fd_set> readf
|
||||
}
|
||||
}
|
||||
|
||||
if (ppu.is_stopped())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (readfds)
|
||||
*readfds = rread;
|
||||
if (writefds)
|
||||
|
@ -172,6 +172,8 @@ namespace vm
|
||||
|
||||
void temporary_unlock(cpu_thread& cpu) noexcept
|
||||
{
|
||||
cpu.state += cpu_flag::wait;
|
||||
|
||||
if (g_tls_locked && g_tls_locked->compare_and_swap_test(&cpu, nullptr))
|
||||
{
|
||||
cpu.cpu_unmem();
|
||||
|
@ -936,11 +936,18 @@ void Emulator::Load(const std::string& title_id, bool add_only, bool force_globa
|
||||
|
||||
// Set RTM usage
|
||||
g_use_rtm = utils::has_rtm() && ((utils::has_mpx() && g_cfg.core.enable_TSX == tsx_usage::enabled) || g_cfg.core.enable_TSX == tsx_usage::forced);
|
||||
|
||||
if (g_use_rtm && !utils::has_mpx())
|
||||
{
|
||||
LOG_WARNING(GENERAL, "TSX forced by User");
|
||||
}
|
||||
|
||||
if (g_use_rtm && g_cfg.core.preferred_spu_threads)
|
||||
{
|
||||
g_cfg.core.preferred_spu_threads.set(0);
|
||||
LOG_ERROR(GENERAL, "Preferred SPU Threads forcefully disabled - not compatible with TSX in this version.");
|
||||
}
|
||||
|
||||
// Load patches from different locations
|
||||
fxm::check_unlocked<patch_engine>()->append(fs::get_config_dir() + "data/" + m_title_id + "/patch.yml");
|
||||
|
||||
|
@ -385,7 +385,6 @@ struct cfg_root : cfg::node
|
||||
cfg::_enum<spu_block_size_type> spu_block_size{this, "SPU Block Size", spu_block_size_type::safe};
|
||||
cfg::_bool spu_accurate_getllar{this, "Accurate GETLLAR", false};
|
||||
cfg::_bool spu_accurate_putlluc{this, "Accurate PUTLLUC", false};
|
||||
cfg::_bool spu_accurate_putllc{this, "Accurate PUTLLC", false};
|
||||
cfg::_bool spu_verification{this, "SPU Verification", true}; // Should be enabled
|
||||
cfg::_bool spu_cache{this, "SPU Cache", true};
|
||||
cfg::_enum<tsx_usage> enable_TSX{this, "Enable TSX", tsx_usage::enabled}; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
|
||||
|
Loading…
Reference in New Issue
Block a user