1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 02:32:36 +01:00

New reservations

Memory system cleanup
sys_memory_get_page_attribute
This commit is contained in:
Nekotekina 2017-02-17 22:35:57 +03:00
parent 7cdb5f3123
commit 5e3bacbd9b
26 changed files with 1536 additions and 1531 deletions

View File

@ -24,6 +24,7 @@
#include "sync.h"
thread_local u64 g_tls_fault_all = 0;
thread_local u64 g_tls_fault_rsx = 0;
thread_local u64 g_tls_fault_spu = 0;
@ -974,7 +975,7 @@ size_t get_x64_access_size(x64_context* context, x64_op_t op, x64_reg_t reg, siz
{
if (EFLAGS(context) & 0x400 /* direction flag */)
{
// skip reservation bound check (TODO)
// TODO
return 0;
}
@ -990,22 +991,6 @@ size_t get_x64_access_size(x64_context* context, x64_op_t op, x64_reg_t reg, siz
}
}
if (op == X64OP_CMPXCHG)
{
// Detect whether the instruction can't actually modify memory to avoid breaking reservation
u64 cmp, exch;
if (!get_x64_reg_value(context, reg, d_size, i_size, cmp) || !get_x64_reg_value(context, X64R_RAX, d_size, i_size, exch))
{
return -1;
}
if (cmp == exch)
{
// skip reservation bound check
return 0;
}
}
return d_size;
}
@ -1016,9 +1001,24 @@ namespace rsx
bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
{
const auto cpu = get_current_cpu_thread();
if (cpu)
{
cpu->state += cpu_flag::is_waiting;
}
g_tls_fault_all++;
if (rsx::g_access_violation_handler && rsx::g_access_violation_handler(addr, is_writing))
{
g_tls_fault_rsx++;
if (cpu)
{
cpu->state -= cpu_flag::is_waiting;
}
return true;
}
@ -1147,456 +1147,26 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
}
}
if (cpu)
{
cpu->state -= cpu_flag::is_waiting;
}
// skip processed instruction
RIP(context) += i_size;
g_tls_fault_spu++;
return true;
}
// check if fault is caused by the reservation
return vm::reservation_query(addr, (u32)a_size, is_writing, [&]() -> bool
{
// write memory using "privileged" access to avoid breaking reservation
if (!d_size || !i_size)
{
LOG_ERROR(MEMORY, "Invalid or unsupported instruction (op=%d, reg=%d, d_size=%lld, a_size=0x%llx, i_size=%lld)", (u32)op, (u32)reg, d_size, a_size, i_size);
report_opcode();
return false;
}
switch (op)
{
case X64OP_STORE:
case X64OP_STORE_BE:
{
if (d_size == 16 && op == X64OP_STORE)
{
if (reg - X64R_XMM0 >= 16)
{
LOG_ERROR(MEMORY, "X64OP_STORE: d_size=16, reg=%d", (u32)reg);
return false;
}
std::memcpy(vm::base_priv(addr), XMMREG(context, reg - X64R_XMM0), 16);
break;
}
u64 reg_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value))
{
return false;
}
if (op == X64OP_STORE_BE && d_size == 2)
{
reg_value = se_storage<u16>::swap((u16)reg_value);
}
else if (op == X64OP_STORE_BE && d_size == 4)
{
reg_value = se_storage<u32>::swap((u32)reg_value);
}
else if (op == X64OP_STORE_BE && d_size == 8)
{
reg_value = se_storage<u64>::swap(reg_value);
}
else if (op == X64OP_STORE_BE)
{
return false;
}
if (d_size == 1)
{
*(volatile u8*)vm::base_priv(addr) = (u8)reg_value;
}
else if (d_size == 2 && addr % 2 == 0)
{
*(volatile u16*)vm::base_priv(addr) = (u16)reg_value;
}
else if (d_size == 4 && addr % 4 == 0)
{
*(volatile u32*)vm::base_priv(addr) = (u32)reg_value;
}
else if (d_size == 8 && addr % 8 == 0)
{
*(volatile u64*)vm::base_priv(addr) = (u64)reg_value;
}
else
{
std::memcpy(vm::base_priv(addr), &reg_value, d_size);
}
break;
}
case X64OP_MOVS:
{
if (d_size > 8)
{
LOG_ERROR(MEMORY, "X64OP_MOVS: d_size=%lld", d_size);
return false;
}
if (vm::base(addr) != (void*)RDI(context))
{
LOG_ERROR(MEMORY, "X64OP_MOVS: rdi=0x%llx, rsi=0x%llx, addr=0x%x", (u64)RDI(context), (u64)RSI(context), addr);
return false;
}
u32 a_addr = addr;
while (a_addr >> 12 == addr >> 12)
{
u64 value;
// copy data
std::memcpy(&value, (void*)RSI(context), d_size);
std::memcpy(vm::base_priv(a_addr), &value, d_size);
// shift pointers
if (EFLAGS(context) & 0x400 /* direction flag */)
{
LOG_ERROR(MEMORY, "X64OP_MOVS TODO: reversed direction");
return false;
//RSI(context) -= d_size;
//RDI(context) -= d_size;
//a_addr -= (u32)d_size;
}
else
{
RSI(context) += d_size;
RDI(context) += d_size;
a_addr += (u32)d_size;
}
// decrement counter
if (reg == X64_NOT_SET || !--RCX(context))
{
break;
}
}
if (reg == X64_NOT_SET || !RCX(context))
{
break;
}
// don't skip partially processed instruction
return true;
}
case X64OP_STOS:
{
if (d_size > 8)
{
LOG_ERROR(MEMORY, "X64OP_STOS: d_size=%lld", d_size);
return false;
}
if (vm::base(addr) != (void*)RDI(context))
{
LOG_ERROR(MEMORY, "X64OP_STOS: rdi=0x%llx, addr=0x%x", (u64)RDI(context), addr);
return false;
}
u64 value;
if (!get_x64_reg_value(context, X64R_RAX, d_size, i_size, value))
{
return false;
}
u32 a_addr = addr;
while (a_addr >> 12 == addr >> 12)
{
// fill data with value
std::memcpy(vm::base_priv(a_addr), &value, d_size);
// shift pointers
if (EFLAGS(context) & 0x400 /* direction flag */)
{
LOG_ERROR(MEMORY, "X64OP_STOS TODO: reversed direction");
return false;
//RDI(context) -= d_size;
//a_addr -= (u32)d_size;
}
else
{
RDI(context) += d_size;
a_addr += (u32)d_size;
}
// decrement counter
if (reg == X64_NOT_SET || !--RCX(context))
{
break;
}
}
if (reg == X64_NOT_SET || !RCX(context))
{
break;
}
// don't skip partially processed instruction
return true;
}
case X64OP_XCHG:
{
u64 reg_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value))
{
return false;
}
switch (d_size)
{
case 1: reg_value = ((atomic_t<u8>*)vm::base_priv(addr))->exchange((u8)reg_value); break;
case 2: reg_value = ((atomic_t<u16>*)vm::base_priv(addr))->exchange((u16)reg_value); break;
case 4: reg_value = ((atomic_t<u32>*)vm::base_priv(addr))->exchange((u32)reg_value); break;
case 8: reg_value = ((atomic_t<u64>*)vm::base_priv(addr))->exchange((u64)reg_value); break;
default: return false;
}
if (!put_x64_reg_value(context, reg, d_size, reg_value))
{
return false;
}
break;
}
case X64OP_CMPXCHG:
{
u64 reg_value, old_value, cmp_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value) || !get_x64_reg_value(context, X64R_RAX, d_size, i_size, cmp_value))
{
return false;
}
switch (d_size)
{
case 1: old_value = ((atomic_t<u8>*)vm::base_priv(addr))->compare_and_swap((u8)cmp_value, (u8)reg_value); break;
case 2: old_value = ((atomic_t<u16>*)vm::base_priv(addr))->compare_and_swap((u16)cmp_value, (u16)reg_value); break;
case 4: old_value = ((atomic_t<u32>*)vm::base_priv(addr))->compare_and_swap((u32)cmp_value, (u32)reg_value); break;
case 8: old_value = ((atomic_t<u64>*)vm::base_priv(addr))->compare_and_swap((u64)cmp_value, (u64)reg_value); break;
default: return false;
}
if (!put_x64_reg_value(context, X64R_RAX, d_size, old_value) || !set_x64_cmp_flags(context, d_size, cmp_value, old_value))
{
return false;
}
break;
}
case X64OP_AND:
{
u64 value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
}
switch (d_size)
{
case 1: value = *(atomic_t<u8>*)vm::base_priv(addr) &= (u8)value; break;
case 2: value = *(atomic_t<u16>*)vm::base_priv(addr) &= (u16)value; break;
case 4: value = *(atomic_t<u32>*)vm::base_priv(addr) &= (u32)value; break;
case 8: value = *(atomic_t<u64>*)vm::base_priv(addr) &= (u64)value; break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, value, 0))
{
return false;
}
break;
}
case X64OP_OR:
{
u64 value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
}
switch (d_size)
{
case 1: value = *(atomic_t<u8>*)vm::base_priv(addr) |= (u8)value; break;
case 2: value = *(atomic_t<u16>*)vm::base_priv(addr) |= (u16)value; break;
case 4: value = *(atomic_t<u32>*)vm::base_priv(addr) |= (u32)value; break;
case 8: value = *(atomic_t<u64>*)vm::base_priv(addr) |= (u64)value; break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, value, 0))
{
return false;
}
break;
}
case X64OP_XOR:
{
u64 value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
}
switch (d_size)
{
case 1: value = *(atomic_t<u8>*)vm::base_priv(addr) ^= (u8)value; break;
case 2: value = *(atomic_t<u16>*)vm::base_priv(addr) ^= (u16)value; break;
case 4: value = *(atomic_t<u32>*)vm::base_priv(addr) ^= (u32)value; break;
case 8: value = *(atomic_t<u64>*)vm::base_priv(addr) ^= (u64)value; break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, value, 0))
{
return false;
}
break;
}
case X64OP_INC:
{
u64 value;
switch (d_size)
{
case 1: value = ++*(atomic_t<u8>*)vm::base_priv(addr); break;
case 2: value = ++*(atomic_t<u16>*)vm::base_priv(addr); break;
case 4: value = ++*(atomic_t<u32>*)vm::base_priv(addr); break;
case 8: value = ++*(atomic_t<u64>*)vm::base_priv(addr); break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, value, 1, false)) // ???
{
return false;
}
break;
}
case X64OP_DEC:
{
u64 value;
switch (d_size)
{
case 1: value = --*(atomic_t<u8>*)vm::base_priv(addr); break;
case 2: value = --*(atomic_t<u16>*)vm::base_priv(addr); break;
case 4: value = --*(atomic_t<u32>*)vm::base_priv(addr); break;
case 8: value = --*(atomic_t<u64>*)vm::base_priv(addr); break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, value, -1, false)) // ???
{
return false;
}
break;
}
case X64OP_ADD:
{
u64 value, new_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
}
switch (d_size)
{
case 1: new_value = *(atomic_t<u8>*)vm::base_priv(addr) += (u8)value; break;
case 2: new_value = *(atomic_t<u16>*)vm::base_priv(addr) += (u16)value; break;
case 4: new_value = *(atomic_t<u32>*)vm::base_priv(addr) += (u32)value; break;
case 8: new_value = *(atomic_t<u64>*)vm::base_priv(addr) += (u64)value; break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, new_value, value)) // ???
{
return false;
}
break;
}
case X64OP_ADC:
{
u64 value, new_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
}
switch (d_size)
{
case 1: new_value = *(atomic_t<u8>*)vm::base_priv(addr) += (u8)(value + (EFLAGS(context) & 1)); break;
case 2: new_value = *(atomic_t<u16>*)vm::base_priv(addr) += (u16)(value + (EFLAGS(context) & 1)); break;
case 4: new_value = *(atomic_t<u32>*)vm::base_priv(addr) += (u32)(value + (EFLAGS(context) & 1)); break;
case 8: new_value = *(atomic_t<u64>*)vm::base_priv(addr) += (u64)(value + (EFLAGS(context) & 1)); break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, new_value, value + (EFLAGS(context) & 1))) // ???
{
return false;
}
break;
}
case X64OP_SUB:
{
u64 value, new_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
}
switch (d_size)
{
case 1: new_value = *(atomic_t<u8>*)vm::base_priv(addr) -= (u8)value; break;
case 2: new_value = *(atomic_t<u16>*)vm::base_priv(addr) -= (u16)value; break;
case 4: new_value = *(atomic_t<u32>*)vm::base_priv(addr) -= (u32)value; break;
case 8: new_value = *(atomic_t<u64>*)vm::base_priv(addr) -= (u64)value; break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, new_value, 0 - value)) // ???
{
return false;
}
break;
}
case X64OP_SBB:
{
u64 value, new_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
}
switch (d_size)
{
case 1: new_value = *(atomic_t<u8>*)vm::base_priv(addr) -= (u8)(value + (EFLAGS(context) & 1)); break;
case 2: new_value = *(atomic_t<u16>*)vm::base_priv(addr) -= (u16)(value + (EFLAGS(context) & 1)); break;
case 4: new_value = *(atomic_t<u32>*)vm::base_priv(addr) -= (u32)(value + (EFLAGS(context) & 1)); break;
case 8: new_value = *(atomic_t<u64>*)vm::base_priv(addr) -= (u64)(value + (EFLAGS(context) & 1)); break;
default: return false;
}
if (!set_x64_cmp_flags(context, d_size, new_value, 0 - (value + (EFLAGS(context) & 1)))) // ???
{
return false;
}
break;
}
default:
{
LOG_ERROR(MEMORY, "Invalid or unsupported operation (op=%d, reg=%d, d_size=%lld, a_size=0x%llx, i_size=%lld)", (u32)op, (u32)reg, d_size, a_size, i_size);
report_opcode();
return false;
}
}
// skip processed instruction
RIP(context) += i_size;
return true;
});
// TODO: allow recovering from a page fault as a feature of PS3 virtual memory
if (cpu)
{
LOG_FATAL(MEMORY, "Access violation %s location 0x%x", is_writing ? "writing" : "reading", addr);
cpu->state += cpu_flag::dbg_pause;
cpu->test_state();
}
return true;
}
#ifdef __linux__
@ -1655,29 +1225,6 @@ static bool is_leaf_function(u64 rip)
#endif
}
static thread_local u64 s_tls_ret_pos = 0;
static thread_local u64 s_tls_ret_addr = 0;
[[noreturn]] static void throw_access_violation(const char* cause, u64 addr)
{
if (s_tls_ret_pos) *(u64*)s_tls_ret_pos = s_tls_ret_addr; // Fix stack
vm::throw_access_violation(addr, cause);
std::abort();
}
// Modify context in order to convert hardware exception to C++ exception
static void prepare_throw_access_violation(x64_context* context, const char* cause, u32 address)
{
// Set throw_access_violation() call args (old register values are lost)
ARG1(context) = (u64)cause;
ARG2(context) = address;
// Push the exception address as a "return" address (throw_access_violation() shall not return)
s_tls_ret_addr = RIP(context);
s_tls_ret_pos = is_leaf_function(s_tls_ret_addr) ? 0 : RSP(context) -= sizeof(u64);
RIP(context) = (u64)std::addressof(throw_access_violation);
}
#ifdef _WIN32
static LONG exception_handler(PEXCEPTION_POINTERS pExp)
@ -1687,8 +1234,6 @@ static LONG exception_handler(PEXCEPTION_POINTERS pExp)
if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && addr64 < 0x100000000ull)
{
vm::g_tls_fault_count++;
if (thread_ctrl::get_current() && handle_access_violation((u32)addr64, is_writing, pExp->ContextRecord))
{
return EXCEPTION_CONTINUE_EXECUTION;
@ -1707,14 +1252,6 @@ static LONG exception_filter(PEXCEPTION_POINTERS pExp)
const u64 addr64 = pExp->ExceptionRecord->ExceptionInformation[1] - (u64)vm::base(0);
const auto cause = pExp->ExceptionRecord->ExceptionInformation[0] != 0 ? "writing" : "reading";
if (!(vm::g_tls_fault_count & (1ull << 63)) && addr64 < 0x100000000ull)
{
vm::g_tls_fault_count |= (1ull << 63);
// Setup throw_access_violation() call on the context
prepare_throw_access_violation(pExp->ContextRecord, cause, (u32)addr64);
return EXCEPTION_CONTINUE_EXECUTION;
}
msg += fmt::format("Segfault %s location %p at %p.\n", cause, pExp->ExceptionRecord->ExceptionInformation[1], pExp->ExceptionRecord->ExceptionAddress);
}
else
@ -1828,21 +1365,16 @@ static void signal_handler(int sig, siginfo_t* info, void* uct)
if (addr64 < 0x100000000ull)
{
vm::g_tls_fault_count++;
// Try to process access violation
if (!thread_ctrl::get_current() || !handle_access_violation((u32)addr64, is_writing, context))
if (thread_ctrl::get_current() && handle_access_violation((u32)addr64, is_writing, context))
{
// Setup throw_access_violation() call on the context
prepare_throw_access_violation(context, cause, (u32)addr64);
return;
}
}
else
{
// TODO (debugger interaction)
report_fatal_error(fmt::format("Segfault %s location %p at %p.", cause, info->si_addr, RIP(context)));
std::abort();
}
// TODO (debugger interaction)
report_fatal_error(fmt::format("Segfault %s location %p at %p.", cause, info->si_addr, RIP(context)));
std::abort();
}
const bool s_exception_handler_set = []() -> bool
@ -1863,17 +1395,6 @@ const bool s_exception_handler_set = []() -> bool
#endif
const bool s_self_test = []() -> bool
{
// Find ret instruction
if ((*(u8*)throw_access_violation & 0xF6) == 0xC2)
{
std::abort();
}
return true;
}();
// TODO
extern atomic_t<u32> g_thread_count(0);
@ -1972,9 +1493,6 @@ void thread_ctrl::initialize()
void thread_ctrl::finalize(std::exception_ptr eptr) noexcept
{
// TODO
vm::reservation_free();
// Run atexit functions
m_task.invoke();
m_task.reset();
@ -1998,7 +1516,7 @@ void thread_ctrl::finalize(std::exception_ptr eptr) noexcept
LOG_NOTICE(GENERAL, "Thread time: %fs (%fGc); Faults: %u [rsx:%u, spu:%u];",
time / 1000000000.,
cycles / 1000000000.,
vm::g_tls_fault_count,
g_tls_fault_all,
g_tls_fault_rsx,
g_tls_fault_spu);

View File

@ -1,47 +1,322 @@
#include "stdafx.h"
#include "Emu/Memory/vm.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/lv2/sys_sync.h"
#include "MFC.h"
const char* get_mfc_cmd_name(u32 cmd)
template <>
void fmt_class_string<MFC>::format(std::string& out, u64 arg)
{
switch (cmd)
format_enum(out, arg, [](MFC cmd)
{
case MFC_PUT_CMD: return "PUT";
case MFC_PUTB_CMD: return "PUTB";
case MFC_PUTF_CMD: return "PUTF";
case MFC_PUTS_CMD: return "PUTS";
case MFC_PUTBS_CMD: return "PUTBS";
case MFC_PUTFS_CMD: return "PUTFS";
case MFC_PUTR_CMD: return "PUTR";
case MFC_PUTRB_CMD: return "PUTRB";
case MFC_PUTRF_CMD: return "PUTRF";
case MFC_GET_CMD: return "GET";
case MFC_GETB_CMD: return "GETB";
case MFC_GETF_CMD: return "GETF";
case MFC_GETS_CMD: return "GETS";
case MFC_GETBS_CMD: return "GETBS";
case MFC_GETFS_CMD: return "GETFS";
case MFC_PUTL_CMD: return "PUTL";
case MFC_PUTLB_CMD: return "PUTLB";
case MFC_PUTLF_CMD: return "PUTLF";
case MFC_PUTRL_CMD: return "PUTRL";
case MFC_PUTRLB_CMD: return "PUTRLB";
case MFC_PUTRLF_CMD: return "PUTRLF";
case MFC_GETL_CMD: return "GETL";
case MFC_GETLB_CMD: return "GETLB";
case MFC_GETLF_CMD: return "GETLF";
switch (cmd)
{
case MFC_PUT_CMD: return "PUT";
case MFC_PUTB_CMD: return "PUTB";
case MFC_PUTF_CMD: return "PUTF";
case MFC_PUTS_CMD: return "PUTS";
case MFC_PUTBS_CMD: return "PUTBS";
case MFC_PUTFS_CMD: return "PUTFS";
case MFC_PUTR_CMD: return "PUTR";
case MFC_PUTRB_CMD: return "PUTRB";
case MFC_PUTRF_CMD: return "PUTRF";
case MFC_GET_CMD: return "GET";
case MFC_GETB_CMD: return "GETB";
case MFC_GETF_CMD: return "GETF";
case MFC_GETS_CMD: return "GETS";
case MFC_GETBS_CMD: return "GETBS";
case MFC_GETFS_CMD: return "GETFS";
case MFC_PUTL_CMD: return "PUTL";
case MFC_PUTLB_CMD: return "PUTLB";
case MFC_PUTLF_CMD: return "PUTLF";
case MFC_PUTRL_CMD: return "PUTRL";
case MFC_PUTRLB_CMD: return "PUTRLB";
case MFC_PUTRLF_CMD: return "PUTRLF";
case MFC_GETL_CMD: return "GETL";
case MFC_GETLB_CMD: return "GETLB";
case MFC_GETLF_CMD: return "GETLF";
case MFC_GETLLAR_CMD: return "GETLLAR";
case MFC_PUTLLC_CMD: return "PUTLLC";
case MFC_PUTLLUC_CMD: return "PUTLLUC";
case MFC_PUTQLLUC_CMD: return "PUTQLLUC";
case MFC_GETLLAR_CMD: return "GETLLAR";
case MFC_PUTLLC_CMD: return "PUTLLC";
case MFC_PUTLLUC_CMD: return "PUTLLUC";
case MFC_PUTQLLUC_CMD: return "PUTQLLUC";
case MFC_SNDSIG_CMD: return "SNDSIG";
case MFC_SNDSIGB_CMD: return "SNDSIGB";
case MFC_SNDSIGF_CMD: return "SNDSIGF";
case MFC_BARRIER_CMD: return "BARRIER";
case MFC_EIEIO_CMD: return "EIEIO";
case MFC_SYNC_CMD: return "SYNC";
case MFC_SNDSIG_CMD: return "SNDSIG";
case MFC_SNDSIGB_CMD: return "SNDSIGB";
case MFC_SNDSIGF_CMD: return "SNDSIGF";
case MFC_BARRIER_CMD: return "BARRIER";
case MFC_EIEIO_CMD: return "EIEIO";
case MFC_SYNC_CMD: return "SYNC";
}
return unknown;
});
}
mfc_thread::mfc_thread()
: cpu_thread(0)
{
}
mfc_thread::~mfc_thread()
{
}
std::string mfc_thread::get_name() const
{
return "MFC Thread";
}
void mfc_thread::cpu_task()
{
state -= cpu_flag::is_waiting;
u32 no_updates = 0;
while (!m_spus.empty() || m_spuq.size() != 0)
{
// Add or remove destroyed SPU threads
while (m_spuq.size())
{
state += cpu_flag::is_waiting;
auto& thread_ptr = m_spuq[0];
// Look for deleted threads if nullptr received
for (auto it = m_spus.cbegin(); !thread_ptr && it != m_spus.cend();)
{
if (test(it->get()->state, cpu_flag::exit))
{
it = m_spus.erase(it);
}
else
{
it++;
}
}
// Add thread
if (thread_ptr)
{
m_spus.emplace_back(std::move(thread_ptr));
}
m_spuq.end_pop();
no_updates = 0;
}
test_state();
// Process SPU threads
for (const auto& thread_ptr : m_spus)
{
SPUThread& spu = *thread_ptr;
const auto proxy_size = spu.mfc_proxy.size();
const auto queue_size = spu.mfc_queue.size();
if (proxy_size)
{
const auto& cmd = spu.mfc_proxy[0];
spu.do_dma_transfer(cmd);
if (cmd.cmd & MFC_START_MASK && !spu.status.test_and_set(SPU_STATUS_RUNNING))
{
spu.run();
}
spu.mfc_proxy.end_pop();
no_updates = 0;
}
test_state();
if (queue_size)
{
auto& cmd = spu.mfc_queue[0];
if ((cmd.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK)) == MFC_PUTQLLUC_CMD)
{
auto& data = vm::ps3::_ref<decltype(spu.rdata)>(cmd.eal);
const auto to_write = spu._ref<decltype(spu.rdata)>(cmd.lsa & 0x3ffff);
cmd.size = 0;
no_updates = 0;
// Store unconditionally
state += cpu_flag::is_waiting;
writer_lock lock(vm::g_mutex);
data = to_write;
vm::reservation_update(cmd.eal, 128);
vm::notify(cmd.eal, 128);
}
else if (cmd.cmd & MFC_LIST_MASK)
{
struct list_element
{
be_t<u16> sb; // Stall-and-Notify bit (0x8000)
be_t<u16> ts; // List Transfer Size
be_t<u32> ea; // External Address Low
};
if (cmd.size && (spu.ch_stall_mask & (1u << cmd.tag)) == 0)
{
cmd.lsa &= 0x3fff0;
const list_element item = spu._ref<list_element>(cmd.eal & 0x3fff8);
const u32 size = item.ts;
const u32 addr = item.ea;
if (size)
{
spu_mfc_cmd transfer;
transfer.eal = addr;
transfer.eah = 0;
transfer.lsa = cmd.lsa | (addr & 0xf);
transfer.tag = cmd.tag;
transfer.cmd = MFC(cmd.cmd & ~MFC_LIST_MASK);
transfer.size = size;
spu.do_dma_transfer(transfer);
cmd.lsa += std::max<u32>(size, 16);
}
cmd.eal += 8;
cmd.size -= 8;
no_updates = 0;
if (item.sb & 0x8000)
{
spu.ch_stall_stat.push_or(spu, 1 << cmd.tag);
const u32 evt = spu.ch_event_stat.fetch_or(SPU_EVENT_SN);
if (evt & SPU_EVENT_WAITING)
{
spu.notify();
}
else if (evt & SPU_EVENT_INTR_ENABLED)
{
spu.state += cpu_flag::suspend;
}
}
}
}
else if (LIKELY(cmd.size))
{
spu.do_dma_transfer(cmd);
cmd.size = 0;
}
else if (UNLIKELY((cmd.cmd & ~0xc) == MFC_BARRIER_CMD))
{
// TODO (MFC_BARRIER_CMD, MFC_EIEIO_CMD, MFC_SYNC_CMD)
_mm_mfence();
}
if (!cmd.size)
{
spu.mfc_queue.end_pop();
no_updates = 0;
}
}
test_state();
if (spu.ch_tag_upd)
{
// Mask incomplete transfers
u32 completed = spu.ch_tag_mask;
for (u32 i = 0; i < spu.mfc_queue.size(); i++)
{
const auto& _cmd = spu.mfc_queue[i];
if (_cmd.size)
{
if (spu.ch_tag_upd == 1)
{
completed &= ~(1u << _cmd.tag);
}
else
{
completed = 0;
break;
}
}
}
if (completed && spu.ch_tag_upd.exchange(0))
{
spu.ch_tag_stat.push(spu, completed);
no_updates = 0;
}
}
test_state();
}
if (no_updates++)
{
state += cpu_flag::is_waiting;
if (no_updates >= 3)
{
if (m_spuq.size())
{
no_updates = 0;
}
for (const auto& thread_ptr : m_spus)
{
SPUThread& spu = *thread_ptr;
if (spu.mfc_proxy.size())
{
no_updates = 0;
break;
}
if (spu.mfc_queue.size())
{
auto& cmd = spu.mfc_queue[0];
if ((cmd.cmd & MFC_LIST_MASK) == 0 || (spu.ch_stall_mask & (1u << cmd.tag)) == 0)
{
no_updates = 0;
break;
}
}
if (spu.ch_tag_upd)
{
no_updates = 0;
break;
}
}
if (no_updates)
{
thread_ctrl::wait_for(100);
}
}
else
{
reader_lock lock(vm::g_mutex);
vm::notify_all();
}
}
}
}
void mfc_thread::add_spu(spu_ptr _spu)
{
while (!m_spuq.try_push(std::move(_spu)))
{
busy_wait();
continue;
}
return "UNKNOWN";
run();
}

View File

@ -1,8 +1,11 @@
#pragma once
const char* get_mfc_cmd_name(u32 cmd);
#include "Emu/CPU/CPUThread.h"
#include "Utilities/lockless.h"
enum : u32
#include <deque>
enum MFC : u8
{
MFC_PUT_CMD = 0x20, MFC_PUTB_CMD = 0x21, MFC_PUTF_CMD = 0x22,
MFC_PUTS_CMD = 0x28, MFC_PUTBS_CMD = 0x29, MFC_PUTFS_CMD = 0x2a,
@ -80,29 +83,34 @@ enum : u32
MFG_MULTISOURCE_SYNC_EVENT = 0x00001000,
};
struct spu_mfc_arg_t
struct alignas(16) spu_mfc_cmd
{
union
{
u64 ea;
struct
{
u32 eal;
u32 eah;
};
};
MFC cmd;
u8 tag;
u16 size;
u32 lsa;
union
{
struct
{
u16 tag;
u16 size;
};
u32 size_tag;
};
u32 eal;
u32 eah;
};
class mfc_thread : public cpu_thread
{
using spu_ptr = std::shared_ptr<class SPUThread>;
// SPU threads to poll
std::vector<spu_ptr> m_spus;
// SPU threads to enqueue
lf_mpsc<spu_ptr, 128> m_spuq;
public:
mfc_thread();
virtual ~mfc_thread() override;
virtual std::string get_name() const override;
virtual void cpu_task() override;
virtual void add_spu(spu_ptr _spu);
};

View File

@ -217,7 +217,7 @@ struct vdec_thread : ppu_thread
cellVdec.trace("End sequence...");
}
while (true)
while (max_frames)
{
vdec_frame frame;
frame.avf.reset(av_frame_alloc());
@ -338,10 +338,13 @@ struct vdec_thread : ppu_thread
}
}
cb_func(*this, id, vcmd == vdec_cmd::decode ? CELL_VDEC_MSG_TYPE_AUDONE : CELL_VDEC_MSG_TYPE_SEQDONE, CELL_OK, cb_arg);
lv2_obj::sleep(*this);
while (std::lock_guard<std::mutex>{mutex}, out.size() > max_frames)
if (max_frames)
{
cb_func(*this, id, vcmd == vdec_cmd::decode ? CELL_VDEC_MSG_TYPE_AUDONE : CELL_VDEC_MSG_TYPE_SEQDONE, CELL_OK, cb_arg);
lv2_obj::sleep(*this);
}
while (std::lock_guard<std::mutex>{mutex}, max_frames && out.size() > max_frames)
{
thread_ctrl::wait();
}
@ -450,7 +453,7 @@ s32 cellVdecClose(ppu_thread& ppu, u32 handle)
{
std::lock_guard<std::mutex> lock(vdec->mutex);
vdec->cmd_push({vdec_cmd::close, 0});
vdec->out = decltype(vdec->out){};
vdec->max_frames = 0;
}
vdec->notify();

View File

@ -263,6 +263,11 @@ extern u64 get_timebased_time();
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
extern void ppu_execute_function(ppu_thread& ppu, u32 index);
extern u32 ppu_lwarx(ppu_thread& ppu, u32 addr);
extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr);
extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value);
extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value);
namespace vm { using namespace ps3; }
class ppu_scale_table_t
@ -2269,11 +2274,7 @@ bool ppu_interpreter::MFOCRF(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LWARX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
be_t<u32> value;
vm::reservation_acquire(&value, vm::cast(addr, HERE), SIZE_32(value));
ppu.gpr[op.rd] = value;
ppu.gpr[op.rd] = ppu_lwarx(ppu, vm::cast(addr, HERE));
return true;
}
@ -2435,11 +2436,7 @@ bool ppu_interpreter::MULHW(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LDARX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
be_t<u64> value;
vm::reservation_acquire(&value, vm::cast(addr, HERE), SIZE_32(value));
ppu.gpr[op.rd] = value;
ppu.gpr[op.rd] = ppu_ldarx(ppu, vm::cast(addr, HERE));
return true;
}
@ -2565,9 +2562,7 @@ bool ppu_interpreter::STDX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STWCX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
const be_t<u32> value = (u32)ppu.gpr[op.rs];
ppu_cr_set(ppu, 0, false, false, vm::reservation_update(vm::cast(addr, HERE), &value, SIZE_32(value)), ppu.xer.so);
ppu_cr_set(ppu, 0, false, false, ppu_stwcx(ppu, vm::cast(addr, HERE), (u32)ppu.gpr[op.rs]), ppu.xer.so);
return true;
}
@ -2635,9 +2630,7 @@ bool ppu_interpreter::ADDZE(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STDCX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
const be_t<u64> value = ppu.gpr[op.rs];
ppu_cr_set(ppu, 0, false, false, vm::reservation_update(vm::cast(addr, HERE), &value, SIZE_32(value)), ppu.xer.so);
ppu_cr_set(ppu, 0, false, false, ppu_stdcx(ppu, vm::cast(addr, HERE), ppu.gpr[op.rs]), ppu.xer.so);
return true;
}

View File

@ -684,28 +684,72 @@ static void ppu_trace(u64 addr)
extern u32 ppu_lwarx(ppu_thread& ppu, u32 addr)
{
be_t<u32> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
ppu.rtime = vm::reservation_acquire(addr, sizeof(u32));
_mm_lfence();
ppu.raddr = addr;
ppu.rdata = vm::_ref<const atomic_be_t<u32>>(addr);
return static_cast<u32>(ppu.rdata);
}
extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
{
be_t<u64> reg_value;
vm::reservation_acquire(&reg_value, addr, sizeof(reg_value));
return reg_value;
ppu.rtime = vm::reservation_acquire(addr, sizeof(u64));
_mm_lfence();
ppu.raddr = addr;
ppu.rdata = vm::_ref<const atomic_be_t<u64>>(addr);
return ppu.rdata;
}
extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
{
const be_t<u32> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
atomic_be_t<u32>& data = vm::_ref<atomic_be_t<u32>>(addr);
if (ppu.raddr != addr || ppu.rdata != data.load())
{
ppu.raddr = 0;
return false;
}
ppu.state += cpu_flag::is_waiting;
writer_lock lock(vm::g_mutex);
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast<u32>(ppu.rdata), reg_value);
if (result)
{
vm::reservation_update(addr, sizeof(u32));
vm::notify(addr, sizeof(u32));
}
ppu.raddr = 0;
ppu.state -= cpu_flag::is_waiting;
return result;
}
extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
{
const be_t<u64> data = reg_value;
return vm::reservation_update(addr, &data, sizeof(data));
atomic_be_t<u64>& data = vm::_ref<atomic_be_t<u64>>(addr);
if (ppu.raddr != addr || ppu.rdata != data.load())
{
ppu.raddr = 0;
return false;
}
ppu.state += cpu_flag::is_waiting;
writer_lock lock(vm::g_mutex);
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value);
if (result)
{
vm::reservation_update(addr, sizeof(u64));
vm::notify(addr, sizeof(u64));
}
ppu.raddr = 0;
ppu.state -= cpu_flag::is_waiting;
return result;
}
static bool adde_carry(u64 a, u64 b, bool c)

View File

@ -114,11 +114,14 @@ public:
}
fpscr;
u32 cia{}; // Current Instruction Address
u32 raddr{0}; // Reservation addr
u64 rtime{0};
u64 rdata{0}; // Reservation data
u64 lr{}; // Link Register
u64 ctr{}; // Counter Register
u32 vrsave{0xffffffff}; // VR Save Register (almost unused)
u32 cia{}; // Current Instruction Address
atomic_t<u32> prio{0}; // Thread priority (0..3071)
const u32 stack_size; // Stack size
const u32 stack_addr; // Stack address

View File

@ -7,7 +7,7 @@
#include "Emu/Cell/RawSPUThread.h"
// Originally, SPU MFC registers are accessed externally in a concurrent manner (don't mix with channels, SPU MFC channels are isolated)
thread_local spu_mfc_arg_t raw_spu_mfc[8] = {};
thread_local spu_mfc_cmd g_tls_mfc[8] = {};
void RawSPUThread::cpu_task()
{
@ -49,13 +49,14 @@ bool RawSPUThread::read_reg(const u32 addr, u32& value)
{
case MFC_CMDStatus_offs:
{
value = MFC_PPU_DMA_CMD_ENQUEUE_SUCCESSFUL;
value = g_tls_mfc[index].cmd;
return true;
}
case MFC_QStatus_offs:
{
value = MFC_PROXY_COMMAND_QUEUE_EMPTY_FLAG | MFC_PPU_MAX_QUEUE_SPACE;
const auto size = mfc_proxy.size();
value = (size ? 0 : MFC_PROXY_COMMAND_QUEUE_EMPTY_FLAG) | (8 - size);
return true;
}
@ -103,48 +104,55 @@ bool RawSPUThread::write_reg(const u32 addr, const u32 value)
break;
}
raw_spu_mfc[index].lsa = value;
g_tls_mfc[index].lsa = value;
return true;
}
case MFC_EAH_offs:
{
raw_spu_mfc[index].eah = value;
g_tls_mfc[index].eah = value;
return true;
}
case MFC_EAL_offs:
{
raw_spu_mfc[index].eal = value;
g_tls_mfc[index].eal = value;
return true;
}
case MFC_Size_Tag_offs:
{
if (value >> 16 > 16 * 1024 || (u16)value >= 32)
{
break;
}
raw_spu_mfc[index].size_tag = value;
g_tls_mfc[index].tag = value & 0xff;
g_tls_mfc[index].size = value >> 16;
return true;
}
case MFC_Class_CMD_offs:
{
do_dma_transfer(value & ~MFC_START_MASK, raw_spu_mfc[index]);
raw_spu_mfc[index] = {}; // clear non-persistent data
if (value & MFC_START_MASK)
g_tls_mfc[index].cmd = MFC(value & 0xff);
if (mfc_proxy.try_push(g_tls_mfc[index]))
{
try_start();
}
auto mfc = fxm::check_unlocked<mfc_thread>();
if (test(mfc->state, cpu_flag::stop) && mfc->state.test_and_reset(cpu_flag::stop))
{
mfc->notify();
}
g_tls_mfc[index] = {};
g_tls_mfc[index].cmd = MFC(MFC_PPU_DMA_CMD_ENQUEUE_SUCCESSFUL);
}
else
{
g_tls_mfc[index] = {};
g_tls_mfc[index].cmd = MFC(MFC_PPU_DMA_QUEUE_FULL);
}
return true;
}
case Prxy_QueryType_offs:
{
// TODO
// 0 - no query requested; cancel previous request
// 1 - set (interrupt) status upon completion of any enabled tag groups
// 2 - set (interrupt) status upon completion of all enabled tag groups
@ -235,4 +243,6 @@ void spu_load_exec(const spu_exec_object& elf)
spu->cpu_init();
spu->npc = elf.header.e_entry;
fxm::get_always<mfc_thread>()->add_spu(std::move(spu));
}

View File

@ -408,7 +408,37 @@ void spu_recompiler::MFSPR(spu_opcode_t op)
void spu_recompiler::RDCH(spu_opcode_t op)
{
InterpreterCall(op); // TODO
switch (op.ra)
{
case SPU_RdSRR0:
{
const XmmLink& vr = XmmAlloc();
c->movd(vr, SPU_OFF_32(srr0));
c->pslldq(vr, 12);
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
return;
}
case MFC_RdTagMask:
{
const XmmLink& vr = XmmAlloc();
c->movd(vr, SPU_OFF_32(ch_tag_mask));
c->pslldq(vr, 12);
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
return;
}
case SPU_RdEventMask:
{
const XmmLink& vr = XmmAlloc();
c->movd(vr, SPU_OFF_32(ch_event_mask));
c->pslldq(vr, 12);
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
return;
}
default:
{
InterpreterCall(op); // TODO
}
}
}
void spu_recompiler::RCHCNT(spu_opcode_t op)
@ -831,7 +861,62 @@ void spu_recompiler::MTSPR(spu_opcode_t op)
void spu_recompiler::WRCH(spu_opcode_t op)
{
InterpreterCall(op); // TODO
switch (op.ra)
{
case SPU_WrSRR0:
{
c->mov(*addr, SPU_OFF_32(gpr[op.rt]._u32[3]));
c->mov(SPU_OFF_32(srr0), *addr);
c->unuse(*addr);
return;
}
case MFC_WrTagMask:
{
c->mov(*addr, SPU_OFF_32(gpr[op.rt]._u32[3]));
c->mov(SPU_OFF_32(ch_tag_mask), *addr);
c->unuse(*addr);
return;
}
case MFC_LSA:
{
c->mov(*addr, SPU_OFF_32(gpr[op.rt]._u32[3]));
c->mov(SPU_OFF_32(ch_mfc_cmd.lsa), *addr);
c->unuse(*addr);
return;
}
case MFC_EAH:
{
c->mov(*addr, SPU_OFF_32(gpr[op.rt]._u32[3]));
c->mov(SPU_OFF_32(ch_mfc_cmd.eah), *addr);
c->unuse(*addr);
return;
}
case MFC_EAL:
{
c->mov(*addr, SPU_OFF_32(gpr[op.rt]._u32[3]));
c->mov(SPU_OFF_32(ch_mfc_cmd.eal), *addr);
c->unuse(*addr);
return;
}
case MFC_Size:
{
c->mov(*addr, SPU_OFF_32(gpr[op.rt]._u32[3]));
c->mov(SPU_OFF_16(ch_mfc_cmd.size), addr->r16());
c->unuse(*addr);
return;
}
case MFC_TagID:
{
c->mov(*addr, SPU_OFF_32(gpr[op.rt]._u32[3]));
c->mov(SPU_OFF_8(ch_mfc_cmd.tag), addr->r8());
c->unuse(*addr);
return;
}
default:
{
InterpreterCall(op); // TODO
}
}
}
void spu_recompiler::BIZ(spu_opcode_t op)

View File

@ -16,14 +16,24 @@
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/SPUInterpreter.h"
#include "Emu/Cell/SPURecompiler.h"
#include "Emu/Memory/wait_engine.h"
#include "Emu/Cell/RawSPUThread.h"
#include <cmath>
#include <cfenv>
#include <thread>
#include <shared_mutex>
#ifdef _MSC_VER
bool operator ==(const u128& lhs, const u128& rhs)
{
return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
}
#endif
extern u64 get_timebased_time();
extern thread_local u64 g_tls_fault_spu;
enum class spu_decoder_type
{
precise,
@ -132,6 +142,7 @@ std::string SPUThread::get_name() const
std::string SPUThread::dump() const
{
std::string&& ret = cpu_thread::dump();
ret += fmt::format("\n" "Tag mask: 0x%08x\n" "MFC entries: %u\n", +ch_tag_mask, mfc_queue.size());
ret += "Registers:\n=========\n";
for (uint i = 0; i<128; ++i) ret += fmt::format("GPR[%d] = %s\n", i, gpr[i]);
@ -144,12 +155,13 @@ void SPUThread::cpu_init()
gpr = {};
fpscr.Reset();
ch_mfc_args = {};
mfc_queue.clear();
ch_mfc_cmd = {};
srr0 = 0;
ch_tag_upd = 0;
ch_tag_mask = 0;
ch_tag_stat.data.store({});
ch_stall_mask = 0;
ch_stall_stat.data.store({});
ch_atomic_stat.data.store({});
@ -165,7 +177,7 @@ void SPUThread::cpu_init()
ch_event_mask = 0;
ch_event_stat = 0;
last_raddr = 0;
raddr = 0;
ch_dec_start_timestamp = get_timebased_time(); // ???
ch_dec_value = 0;
@ -187,13 +199,6 @@ void SPUThread::cpu_task()
{
std::fesetround(FE_TOWARDZERO);
if (custom_task)
{
if (check_state()) return;
return custom_task(*this);
}
if (g_cfg_spu_decoder.get() == spu_decoder_type::asmjit)
{
if (!spu_db) spu_db = fxm::get_always<SPUDatabase>();
@ -275,14 +280,12 @@ void SPUThread::push_snr(u32 number, u32 value)
}
}
void SPUThread::do_dma_transfer(u32 cmd, spu_mfc_arg_t args)
void SPUThread::do_dma_transfer(const spu_mfc_cmd& args, bool from_mfc)
{
if (cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK))
{
_mm_mfence();
}
const bool is_get = (args.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK)) == MFC_GET_CMD;
u32 eal = vm::cast(args.ea, HERE);
u32 eal = args.eal;
u32 lsa = args.lsa & 0x3ffff;
if (eal >= SYS_SPU_THREAD_BASE_LOW && offset < RAW_SPU_BASE_ADDR) // SPU Thread Group MMIO (LS and SNR)
{
@ -297,102 +300,309 @@ void SPUThread::do_dma_transfer(u32 cmd, spu_mfc_arg_t args)
{
eal = spu.offset + offset; // redirect access
}
else if ((cmd & MFC_PUT_CMD) && args.size == 4 && (offset == SYS_SPU_THREAD_SNR1 || offset == SYS_SPU_THREAD_SNR2))
else if (!is_get && args.size == 4 && (offset == SYS_SPU_THREAD_SNR1 || offset == SYS_SPU_THREAD_SNR2))
{
spu.push_snr(SYS_SPU_THREAD_SNR2 == offset, _ref<u32>(args.lsa));
spu.push_snr(SYS_SPU_THREAD_SNR2 == offset, _ref<u32>(lsa));
return;
}
else
{
fmt::throw_exception("Invalid MMIO offset (cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, cmd, args.lsa, args.ea, args.tag, args.size);
fmt::throw_exception("Invalid MMIO offset (cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, args.cmd, args.lsa, args.eal, args.tag, args.size);
}
}
else
{
fmt::throw_exception("Invalid thread type (cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, cmd, args.lsa, args.ea, args.tag, args.size);
fmt::throw_exception("Invalid thread type (cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, args.cmd, args.lsa, args.eal, args.tag, args.size);
}
}
switch (cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK))
if (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK)) _mm_mfence();
void* dst = vm::base(eal);
void* src = vm::base(offset + lsa);
if (is_get)
{
case MFC_PUT_CMD:
case MFC_PUTR_CMD:
{
std::memcpy(vm::base(eal), vm::base(offset + args.lsa), args.size);
return;
std::swap(dst, src);
}
case MFC_GET_CMD:
switch (u32 size = args.size)
{
std::memcpy(vm::base(offset + args.lsa), vm::base(eal), args.size);
return;
case 1:
{
*static_cast<u8*>(dst) = *static_cast<const u8*>(src);
break;
}
case 2:
{
*static_cast<u16*>(dst) = *static_cast<const u16*>(src);
break;
}
case 4:
{
//if (is_get && !from_mfc)
{
*static_cast<u32*>(dst) = *static_cast<const u32*>(src);
break;
}
//_mm_stream_si32(static_cast<s32*>(dst), *static_cast<const s32*>(src));
break;
}
case 8:
{
//if (is_get && !from_mfc)
{
*static_cast<u64*>(dst) = *static_cast<const u64*>(src);
break;
}
//_mm_stream_si64(static_cast<s64*>(dst), *static_cast<const s64*>(src));
break;
}
default:
{
auto vdst = static_cast<__m128i*>(dst);
auto vsrc = static_cast<const __m128i*>(src);
auto vcnt = size / sizeof(__m128i);
//if (is_get && !from_mfc)
{
while (vcnt >= 8)
{
const __m128i data[]
{
_mm_load_si128(vsrc + 0),
_mm_load_si128(vsrc + 1),
_mm_load_si128(vsrc + 2),
_mm_load_si128(vsrc + 3),
_mm_load_si128(vsrc + 4),
_mm_load_si128(vsrc + 5),
_mm_load_si128(vsrc + 6),
_mm_load_si128(vsrc + 7),
};
_mm_store_si128(vdst + 0, data[0]);
_mm_store_si128(vdst + 1, data[1]);
_mm_store_si128(vdst + 2, data[2]);
_mm_store_si128(vdst + 3, data[3]);
_mm_store_si128(vdst + 4, data[4]);
_mm_store_si128(vdst + 5, data[5]);
_mm_store_si128(vdst + 6, data[6]);
_mm_store_si128(vdst + 7, data[7]);
vcnt -= 8;
vsrc += 8;
vdst += 8;
}
while (vcnt--)
{
_mm_store_si128(vdst++, _mm_load_si128(vsrc++));
}
break;
}
// Disabled
while (vcnt >= 8)
{
const __m128i data[]
{
_mm_load_si128(vsrc + 0),
_mm_load_si128(vsrc + 1),
_mm_load_si128(vsrc + 2),
_mm_load_si128(vsrc + 3),
_mm_load_si128(vsrc + 4),
_mm_load_si128(vsrc + 5),
_mm_load_si128(vsrc + 6),
_mm_load_si128(vsrc + 7),
};
_mm_stream_si128(vdst + 0, data[0]);
_mm_stream_si128(vdst + 1, data[1]);
_mm_stream_si128(vdst + 2, data[2]);
_mm_stream_si128(vdst + 3, data[3]);
_mm_stream_si128(vdst + 4, data[4]);
_mm_stream_si128(vdst + 5, data[5]);
_mm_stream_si128(vdst + 6, data[6]);
_mm_stream_si128(vdst + 7, data[7]);
vcnt -= 8;
vsrc += 8;
vdst += 8;
}
while (vcnt--)
{
_mm_stream_si128(vdst++, _mm_load_si128(vsrc++));
}
}
}
fmt::throw_exception("Invalid command %s (cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, get_mfc_cmd_name(cmd), cmd, args.lsa, args.ea, args.tag, args.size);
if (is_get && from_mfc)
{
//_mm_sfence();
}
}
void SPUThread::do_dma_list_cmd(u32 cmd, spu_mfc_arg_t args)
void SPUThread::process_mfc_cmd()
{
if (!(cmd & MFC_LIST_MASK))
LOG_TRACE(SPU, "DMAC: cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x", ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size);
const auto mfc = fxm::check_unlocked<mfc_thread>();
// Check queue size
while (mfc_queue.size() >= 16)
{
fmt::throw_exception("Invalid command %s (cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, get_mfc_cmd_name(cmd), cmd, args.lsa, args.ea, args.tag, args.size);
}
const u32 list_addr = args.ea & 0x3ffff;
const u32 list_size = args.size / 8;
args.lsa &= 0x3fff0;
struct list_element
{
be_t<u16> sb; // Stall-and-Notify bit (0x8000)
be_t<u16> ts; // List Transfer Size
be_t<u32> ea; // External Address Low
};
for (u32 i = 0; i < list_size; i++)
{
auto rec = vm::ps3::ptr<list_element>::make(offset + list_addr + i * 8);
const u32 size = rec->ts;
const u32 addr = rec->ea;
if (size)
if (test(state, cpu_flag::stop + cpu_flag::dbg_global_stop))
{
spu_mfc_arg_t transfer;
transfer.ea = addr;
transfer.lsa = args.lsa | (addr & 0xf);
transfer.tag = args.tag;
transfer.size = size;
do_dma_transfer(cmd & ~MFC_LIST_MASK, transfer);
args.lsa += std::max<u32>(size, 16);
}
if (rec->sb & 0x8000)
{
ch_stall_stat.set_value((1 << args.tag) | ch_stall_stat.get_value());
ch_event_stat |= SPU_EVENT_SN;
spu_mfc_arg_t stalled;
stalled.ea = (args.ea & ~0xffffffff) | (list_addr + (i + 1) * 8);
stalled.lsa = args.lsa;
stalled.tag = args.tag;
stalled.size = (list_size - i - 1) * 8;
mfc_queue.emplace_back(cmd, stalled);
return;
}
// TODO: investigate lost notifications
busy_wait();
_mm_lfence();
}
}
void SPUThread::process_mfc_cmd(u32 cmd)
{
LOG_TRACE(SPU, "DMA %s: cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x", get_mfc_cmd_name(cmd), cmd, ch_mfc_args.lsa, ch_mfc_args.ea, ch_mfc_args.tag, ch_mfc_args.size);
switch (cmd)
switch (ch_mfc_cmd.cmd)
{
case MFC_GETLLAR_CMD:
{
auto& data = vm::ps3::_ref<decltype(rdata)>(ch_mfc_cmd.eal);
const u32 _addr = ch_mfc_cmd.eal;
const u64 _time = vm::reservation_acquire(raddr, 128);
if (raddr && raddr != ch_mfc_cmd.eal)
{
ch_event_stat |= SPU_EVENT_LR;
}
const bool is_polling = false;// raddr == _addr && rtime == _time; // TODO
_mm_lfence();
raddr = _addr;
rtime = _time;
if (is_polling)
{
vm::waiter waiter;
waiter.owner = this;
waiter.addr = raddr;
waiter.size = 128;
waiter.stamp = rtime;
waiter.data = rdata.data();
waiter.init();
while (vm::reservation_acquire(raddr, 128) == waiter.stamp && rdata == data)
{
if (test(state, cpu_flag::stop))
{
break;
}
thread_ctrl::wait_for(100);
}
}
else
{
// Fast path
rdata = data;
_mm_lfence();
}
// Hack: ensure no other atomic updates have happened during reading the data
if (is_polling || UNLIKELY(vm::reservation_acquire(raddr, 128) != rtime))
{
// TODO: vm::check_addr
reader_lock lock(vm::g_mutex);
rtime = vm::reservation_acquire(raddr, 128);
rdata = data;
}
// Copy to LS
_ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ffff) = rdata;
return ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
}
case MFC_PUTLLC_CMD:
{
// Store conditionally
auto& data = vm::ps3::_ref<decltype(rdata)>(ch_mfc_cmd.eal);
const auto to_write = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ffff);
bool result = false;
if (raddr == ch_mfc_cmd.eal && rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
{
lv2_obj::lock_all();
// TODO: vm::check_addr
if (rtime == vm::reservation_acquire(raddr, 128) && rdata == data)
{
data = to_write;
result = true;
vm::reservation_update(raddr, 128);
vm::notify(raddr, 128);
}
lv2_obj::unlock_all();
}
if (result)
{
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
}
else
{
ch_atomic_stat.set_value(MFC_PUTLLC_FAILURE);
}
if (raddr && !result)
{
ch_event_stat |= SPU_EVENT_LR;
}
raddr = 0;
return;
}
case MFC_PUTLLUC_CMD:
{
if (raddr && ch_mfc_cmd.eal == raddr)
{
ch_event_stat |= SPU_EVENT_LR;
raddr = 0;
}
auto& data = vm::ps3::_ref<decltype(rdata)>(ch_mfc_cmd.eal);
const auto to_write = _ref<decltype(rdata)>(ch_mfc_cmd.lsa & 0x3ffff);
// Store unconditionally
// TODO: vm::check_addr
writer_lock lock(vm::g_mutex);
data = to_write;
vm::reservation_update(ch_mfc_cmd.eal, 128);
vm::notify(ch_mfc_cmd.eal, 128);
ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS);
return;
}
case MFC_PUTQLLUC_CMD:
{
ch_mfc_cmd.size = 128;
break;
}
case MFC_SNDSIG_CMD:
case MFC_SNDSIGB_CMD:
case MFC_SNDSIGF_CMD:
{
ch_mfc_cmd.size = 4;
// Fallthrough
}
case MFC_PUT_CMD:
case MFC_PUTB_CMD:
case MFC_PUTF_CMD:
@ -403,9 +613,28 @@ void SPUThread::process_mfc_cmd(u32 cmd)
case MFC_GETB_CMD:
case MFC_GETF_CMD:
{
return do_dma_transfer(cmd, ch_mfc_args);
}
// Try to process small transfers immediately
if (ch_mfc_cmd.size <= 256 && mfc_queue.size() == 0)
{
std::shared_lock<shared_mutex> lock(vm::g_mutex, std::try_to_lock);
if (!lock)
{
break;
}
if (!vm::check_addr(ch_mfc_cmd.eal, ch_mfc_cmd.size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0)))
{
// TODO
break;
}
do_dma_transfer(ch_mfc_cmd, false);
return;
}
break;
}
case MFC_PUTL_CMD:
case MFC_PUTLB_CMD:
case MFC_PUTLF_CMD:
@ -416,87 +645,106 @@ void SPUThread::process_mfc_cmd(u32 cmd)
case MFC_GETLB_CMD:
case MFC_GETLF_CMD:
{
return do_dma_list_cmd(cmd, ch_mfc_args);
}
case MFC_GETLLAR_CMD: // acquire reservation
{
const u32 raddr = vm::cast(ch_mfc_args.ea, HERE);
vm::reservation_acquire(vm::base(offset + ch_mfc_args.lsa), raddr, 128);
if (std::exchange(last_raddr, raddr))
if (ch_mfc_cmd.size <= 16 * 8 && mfc_queue.size() == 0 && (ch_stall_mask & (1u << ch_mfc_cmd.tag)) == 0)
{
ch_event_stat |= SPU_EVENT_LR;
}
std::shared_lock<shared_mutex> lock(vm::g_mutex, std::try_to_lock);
return ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
}
case MFC_PUTLLC_CMD: // store conditionally
{
if (vm::reservation_update(vm::cast(ch_mfc_args.ea, HERE), vm::base(offset + ch_mfc_args.lsa), 128))
{
if (std::exchange(last_raddr, 0) == 0)
if (!lock)
{
fmt::throw_exception("PUTLLC succeeded without GETLLAR" HERE);
break;
}
return ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
}
else
{
if (std::exchange(last_raddr, 0))
struct list_element
{
ch_event_stat |= SPU_EVENT_LR;
be_t<u16> sb;
be_t<u16> ts;
be_t<u32> ea;
};
u32 total_size = 0;
while (ch_mfc_cmd.size && total_size < 256)
{
ch_mfc_cmd.lsa &= 0x3fff0;
const list_element item = _ref<list_element>(ch_mfc_cmd.eal & 0x3fff8);
if (item.sb & 0x8000)
{
break;
}
const u32 size = item.ts;
const u32 addr = item.ea;
if (size)
{
if (total_size + size > 256)
{
break;
}
if (!vm::check_addr(addr, size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0)))
{
// TODO
break;
}
spu_mfc_cmd transfer;
transfer.eal = addr;
transfer.eah = 0;
transfer.lsa = ch_mfc_cmd.lsa | (addr & 0xf);
transfer.tag = ch_mfc_cmd.tag;
transfer.cmd = MFC(ch_mfc_cmd.cmd & ~MFC_LIST_MASK);
transfer.size = size;
do_dma_transfer(transfer);
const u32 add_size = std::max<u32>(size, 16);
ch_mfc_cmd.lsa += add_size;
total_size += add_size;
}
ch_mfc_cmd.eal += 8;
ch_mfc_cmd.size -= 8;
}
return ch_atomic_stat.set_value(MFC_PUTLLC_FAILURE);
if (ch_mfc_cmd.size == 0)
{
return;
}
}
break;
}
case MFC_PUTLLUC_CMD: // store unconditionally
case MFC_PUTQLLUC_CMD:
{
vm::reservation_op(vm::cast(ch_mfc_args.ea, HERE), 128, [this]()
{
std::memcpy(vm::base_priv(vm::cast(ch_mfc_args.ea, HERE)), vm::base(offset + ch_mfc_args.lsa), 128);
});
if (last_raddr != 0 && vm::g_tls_did_break_reservation)
{
ch_event_stat |= SPU_EVENT_LR;
last_raddr = 0;
}
if (cmd == MFC_PUTLLUC_CMD)
{
ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS);
}
return;
}
case MFC_BARRIER_CMD:
case MFC_EIEIO_CMD:
case MFC_SYNC_CMD:
_mm_mfence();
return;
{
ch_mfc_cmd.size = 0;
break;
}
default:
{
fmt::throw_exception("Unknown command (cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size);
}
}
fmt::throw_exception("Unknown command %s (cmd=0x%x, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE,
get_mfc_cmd_name(cmd), cmd, ch_mfc_args.lsa, ch_mfc_args.ea, ch_mfc_args.tag, ch_mfc_args.size);
// Enqueue
verify(HERE), mfc_queue.try_push(ch_mfc_cmd);
if (test(mfc->state, cpu_flag::is_waiting))
{
mfc->notify();
}
}
u32 SPUThread::get_events(bool waiting)
{
// check reservation status and set SPU_EVENT_LR if lost
if (last_raddr != 0 && !vm::reservation_test(this->get()))
// Check reservation status and set SPU_EVENT_LR if lost
if (raddr && (vm::reservation_acquire(raddr, sizeof(rdata)) != rtime || rdata != vm::ps3::_ref<decltype(rdata)>(raddr)))
{
ch_event_stat |= SPU_EVENT_LR;
last_raddr = 0;
raddr = 0;
}
// SPU Decrementer Event
@ -508,27 +756,18 @@ u32 SPUThread::get_events(bool waiting)
}
}
// initialize waiting
if (waiting)
// Simple polling or polling with atomically set/removed SPU_EVENT_WAITING flag
return !waiting ? ch_event_stat & ch_event_mask : ch_event_stat.atomic_op([&](u32& stat) -> u32
{
// polling with atomically set/removed SPU_EVENT_WAITING flag
return ch_event_stat.atomic_op([this](u32& stat) -> u32
if (u32 res = stat & ch_event_mask)
{
if (u32 res = stat & ch_event_mask)
{
stat &= ~SPU_EVENT_WAITING;
return res;
}
else
{
stat |= SPU_EVENT_WAITING;
return 0;
}
});
}
stat &= ~SPU_EVENT_WAITING;
return res;
}
// simple polling
return ch_event_stat & ch_event_mask;
stat |= SPU_EVENT_WAITING;
return 0;
});
}
void SPUThread::set_events(u32 mask)
@ -572,19 +811,16 @@ u32 SPUThread::get_ch_count(u32 ch)
switch (ch)
{
//case MFC_Cmd: return 16;
//case SPU_WrSRR0: return 1; break;
//case SPU_RdSRR0: return 1; break;
case SPU_WrOutMbox: return ch_out_mbox.get_count() ^ 1; break;
case SPU_WrOutIntrMbox: return ch_out_intr_mbox.get_count() ^ 1; break;
case SPU_RdInMbox: return ch_in_mbox.get_count(); break;
case MFC_RdTagStat: return ch_tag_stat.get_count(); break;
case MFC_RdListStallStat: return ch_stall_stat.get_count(); break;
case MFC_WrTagUpdate: return ch_tag_stat.get_count(); break; // hack
case SPU_RdSigNotify1: return ch_snr1.get_count(); break;
case SPU_RdSigNotify2: return ch_snr2.get_count(); break;
case MFC_RdAtomicStat: return ch_atomic_stat.get_count(); break;
case SPU_RdEventStat: return get_events() ? 1 : 0; break;
case SPU_WrOutMbox: return ch_out_mbox.get_count() ^ 1;
case SPU_WrOutIntrMbox: return ch_out_intr_mbox.get_count() ^ 1;
case SPU_RdInMbox: return ch_in_mbox.get_count();
case MFC_RdTagStat: return ch_tag_stat.get_count();
case MFC_RdListStallStat: return ch_stall_stat.get_count();
case MFC_WrTagUpdate: return ch_tag_upd == 0;
case SPU_RdSigNotify1: return ch_snr1.get_count();
case SPU_RdSigNotify2: return ch_snr2.get_count();
case MFC_RdAtomicStat: return ch_atomic_stat.get_count();
case SPU_RdEventStat: return get_events() != 0;
}
fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???");
@ -596,11 +832,19 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
auto read_channel = [&](spu_channel_t& channel)
{
if (!channel.try_pop(out))
for (int i = 0; i < 10 && channel.get_count() == 0; i++)
{
thread_ctrl::wait([&] { return test(state & cpu_flag::stop) || channel.try_pop(out); });
busy_wait();
}
return !test(state & cpu_flag::stop);
while (!channel.try_pop(out))
{
if (test(state, cpu_flag::stop))
{
return false;
}
thread_ctrl::wait();
}
return true;
@ -617,6 +861,11 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
{
while (true)
{
for (int i = 0; i < 10 && ch_in_mbox.get_count() == 0; i++)
{
busy_wait();
}
if (const uint old_count = ch_in_mbox.try_pop(out))
{
if (old_count == 4 /* SPU_IN_MBOX_THRESHOLD */) // TODO: check this
@ -681,35 +930,37 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
case SPU_RdEventStat:
{
// start waiting or return immediately
if (u32 res = get_events(true))
u32 res = get_events();
if (res)
{
out = res;
return true;
}
vm::waiter waiter;
if (ch_event_mask & SPU_EVENT_LR)
{
// register waiter if polling reservation status is required
vm::wait_op(last_raddr, 128, [&] { return get_events(true) || test(state & cpu_flag::stop); });
waiter.owner = this;
waiter.addr = raddr;
waiter.size = 128;
waiter.stamp = rtime;
waiter.data = rdata.data();
waiter.init();
}
else
while (!(res = get_events(true)))
{
// simple waiting loop otherwise
while (!get_events(true) && !test(state & cpu_flag::stop))
if (test(state & cpu_flag::stop))
{
thread_ctrl::wait();
return false;
}
}
ch_event_stat &= ~SPU_EVENT_WAITING;
if (test(state & cpu_flag::stop))
{
return false;
thread_ctrl::wait_for(100);
}
out = get_events();
out = res;
return true;
}
@ -908,91 +1159,95 @@ bool SPUThread::set_ch_value(u32 ch, u32 value)
case MFC_WrTagUpdate:
{
ch_tag_stat.set_value(ch_tag_mask); // hack
if (value > 2)
{
break;
}
ch_tag_stat.set_value(0, false);
ch_tag_upd = value;
if (mfc_queue.size() == 0 && (!value || ch_tag_upd.exchange(0)))
{
ch_tag_stat.set_value(ch_tag_mask);
}
else if (!value)
{
u32 completed = ch_tag_mask;
for (u32 i = 0; completed && i < 16; i++)
{
const auto& _cmd = mfc_queue.get_push(i);
if (_cmd.size)
{
completed &= ~(1u << _cmd.tag);
}
}
ch_tag_stat.set_value(completed);
}
else
{
auto mfc = fxm::check_unlocked<mfc_thread>();
if (test(mfc->state, cpu_flag::is_waiting))
{
mfc->notify();
}
}
return true;
}
case MFC_LSA:
{
if (value >= 0x40000)
{
break;
}
ch_mfc_args.lsa = value;
ch_mfc_cmd.lsa = value;
return true;
}
case MFC_EAH:
{
ch_mfc_args.eah = value;
ch_mfc_cmd.eah = value;
return true;
}
case MFC_EAL:
{
ch_mfc_args.eal = value;
ch_mfc_cmd.eal = value;
return true;
}
case MFC_Size:
{
if (value > 16 * 1024)
{
break;
}
ch_mfc_args.size = (u16)value;
ch_mfc_cmd.size = value & 0xffff;
return true;
}
case MFC_TagID:
{
if (value >= 32)
{
break;
}
ch_mfc_args.tag = (u16)value;
ch_mfc_cmd.tag = value & 0xff;
return true;
}
case MFC_Cmd:
{
process_mfc_cmd(value);
ch_mfc_args = {}; // clear non-persistent data
ch_mfc_cmd.cmd = MFC(value & 0xff);
process_mfc_cmd();
ch_mfc_cmd = {}; // clear non-persistent data
return true;
}
case MFC_WrListStallAck:
{
if (value >= 32)
// Reset stall status for specified tag
if (atomic_storage<u32>::btr(ch_stall_mask.raw(), value))
{
break;
}
size_t processed = 0;
for (size_t i = 0; i < mfc_queue.size(); i++)
{
if (mfc_queue[i].second.tag == value)
auto mfc = fxm::check_unlocked<mfc_thread>();
if (test(mfc->state, cpu_flag::is_waiting))
{
do_dma_list_cmd(mfc_queue[i].first, mfc_queue[i].second);
mfc_queue[i].second.tag = 0xdead;
processed++;
}
}
while (processed)
{
for (size_t i = 0; i < mfc_queue.size(); i++)
{
if (mfc_queue[i].second.tag == 0xdead)
{
mfc_queue.erase(mfc_queue.begin() + i);
processed--;
break;
}
mfc->notify();
}
}
@ -1343,11 +1598,9 @@ void SPUThread::fast_call(u32 ls_addr)
auto old_pc = pc;
auto old_lr = gpr[0]._u32[3];
auto old_stack = gpr[1]._u32[3]; // only saved and restored (may be wrong)
auto old_task = std::move(custom_task);
pc = ls_addr;
gpr[0]._u32[3] = 0x0;
custom_task = nullptr;
try
{
@ -1364,31 +1617,4 @@ void SPUThread::fast_call(u32 ls_addr)
pc = old_pc;
gpr[0]._u32[3] = old_lr;
gpr[1]._u32[3] = old_stack;
custom_task = std::move(old_task);
}
void SPUThread::RegisterHleFunction(u32 addr, std::function<bool(SPUThread&)> function)
{
m_addr_to_hle_function_map[addr] = function;
_ref<u32>(addr) = 0x00000003; // STOP 3
}
void SPUThread::UnregisterHleFunction(u32 addr)
{
m_addr_to_hle_function_map.erase(addr);
}
void SPUThread::UnregisterHleFunctions(u32 start_addr, u32 end_addr)
{
for (auto iter = m_addr_to_hle_function_map.begin(); iter != m_addr_to_hle_function_map.end();)
{
if (iter->first >= start_addr && iter->first <= end_addr)
{
m_addr_to_hle_function_map.erase(iter++);
}
else
{
iter++;
}
}
}

View File

@ -186,6 +186,16 @@ public:
if (old.wait) spu.notify();
}
bool push_and(u32 value)
{
const auto old = data.fetch_op([=](sync_var_t& data)
{
data.value &= ~value;
});
return (old.value & value) != 0;
}
// push unconditionally (overwriting previous value), may require notification
void push(cpu_thread& spu, u32 value)
{
@ -510,18 +520,29 @@ public:
SPUThread(const std::string& name, u32 index, lv2_spu_group* group);
std::array<v128, 128> gpr; // General-Purpose Registers
// General-Purpose Registers
std::array<v128, 128> gpr;
SPU_FPSCR fpscr;
std::unordered_map<u32, std::function<bool(SPUThread& SPU)>> m_addr_to_hle_function_map;
// MFC command data
spu_mfc_cmd ch_mfc_cmd;
spu_mfc_arg_t ch_mfc_args;
// MFC command queue (consumer: MFC thread)
lf_spsc<spu_mfc_cmd, 16> mfc_queue;
std::vector<std::pair<u32, spu_mfc_arg_t>> mfc_queue; // Only used for stalled list transfers
// MFC command proxy queue (consumer: MFC thread)
lf_mpsc<spu_mfc_cmd, 8> mfc_proxy;
// Reservation Data
u64 rtime = 0;
std::array<u128, 8> rdata{};
u32 raddr = 0;
u32 srr0;
u32 ch_tag_mask;
atomic_t<u32> ch_tag_upd;
atomic_t<u32> ch_tag_mask;
spu_channel_t ch_tag_stat;
atomic_t<u32> ch_stall_mask;
spu_channel_t ch_stall_stat;
spu_channel_t ch_atomic_stat;
@ -537,7 +558,6 @@ public:
atomic_t<u32> ch_event_mask;
atomic_t<u32> ch_event_stat;
u32 last_raddr; // Last Reservation Address (0 if not set)
u64 ch_dec_start_timestamp; // timestamp of writing decrementer value
u32 ch_dec_value; // written decrementer value
@ -558,7 +578,6 @@ public:
const std::string m_name; // Thread name
std::function<void(SPUThread&)> custom_task;
std::exception_ptr pending_exception;
std::shared_ptr<class SPUDatabase> spu_db;
@ -566,10 +585,9 @@ public:
u32 recursion_level = 0;
void push_snr(u32 number, u32 value);
void do_dma_transfer(u32 cmd, spu_mfc_arg_t args);
void do_dma_list_cmd(u32 cmd, spu_mfc_arg_t args);
void process_mfc_cmd(u32 cmd);
void do_dma_transfer(const spu_mfc_cmd& args, bool from_mfc = true);
void process_mfc_cmd();
u32 get_events(bool waiting = false);
void set_events(u32 mask);
void set_interrupt_status(bool enable);
@ -594,8 +612,4 @@ public:
{
return *_ptr<T>(lsa);
}
void RegisterHleFunction(u32 addr, std::function<bool(SPUThread&)> function);
void UnregisterHleFunction(u32 addr);
void UnregisterHleFunctions(u32 start_addr, u32 end_addr);
};

View File

@ -5,6 +5,7 @@
#include "Emu/Cell/PPUFunction.h"
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/MFC.h"
#include "sys_sync.h"
#include "sys_lwmutex.h"
#include "sys_lwcond.h"
@ -1093,8 +1094,25 @@ void lv2_obj::awake(cpu_thread& cpu, u32 prio)
if (prio == -4)
{
// Yield command
const u64 start_time = get_system_time();
for (std::size_t i = 0, pos = -1; i < g_ppu.size(); i++)
{
if (g_ppu[i] == &cpu)
{
pos = i;
prio = g_ppu[i]->prio;
}
else if (i == pos + 1 && prio != -4 && g_ppu[i]->prio != prio)
{
return;
}
}
unqueue(g_ppu, &cpu);
unqueue(g_pending, &cpu);
static_cast<ppu_thread&>(cpu).start_time = start_time;
}
if (prio < INT32_MAX && !unqueue(g_ppu, &cpu))
@ -1153,6 +1171,84 @@ void lv2_obj::awake(cpu_thread& cpu, u32 prio)
schedule_all();
}
void lv2_obj::lock_all()
{
std::size_t count = 0;
std::array<cpu_thread*, 32> array;
{
semaphore_lock lock(g_mutex);
if (g_pending.empty() || g_pending.front())
{
if (auto mfc = fxm::check_unlocked<mfc_thread>())
{
if (!mfc->state.test_and_set(cpu_flag::suspend))
{
array.at(count++) = mfc;
}
}
}
for (std::size_t i = 0, x = g_ppu.size(); i < x; i++)
{
const auto target = g_ppu[i];
if (!target->state.test_and_set(cpu_flag::suspend))
{
g_pending.emplace_back(target);
}
}
for (cpu_thread* target : g_pending)
{
if (target && !test(target->state, cpu_flag::is_waiting))
{
array.at(count++) = target;
}
}
g_pending.emplace_front(nullptr);
}
vm::g_mutex.lock();
for (std::size_t i = 0; i < count; i++)
{
while (!test(array[i]->state, cpu_flag::is_waiting))
{
busy_wait();
}
}
}
void lv2_obj::unlock_all()
{
vm::g_mutex.unlock();
semaphore_lock lock(g_mutex);
if (!g_pending.empty() && !g_pending.front())
{
g_pending.pop_front();
if (g_pending.empty() || g_pending.front())
{
if (auto mfc = fxm::check_unlocked<mfc_thread>())
{
const auto old_state = mfc->state.fetch_sub(cpu_flag::suspend);
if (!test(old_state, cpu_flag::stop) && test(old_state, cpu_flag::is_waiting))
{
mfc->notify();
}
}
}
}
schedule_all();
}
void lv2_obj::cleanup()
{
g_ppu.clear();
@ -1199,6 +1295,10 @@ void lv2_obj::schedule_all()
break;
}
}
// Check memory
//reader_lock lock(vm::g_mutex);
//vm::notify(0, -1);
}
void ppu_thread::cpu_sleep()

View File

@ -144,13 +144,21 @@ error_code sys_memory_free(u32 addr)
error_code sys_memory_get_page_attribute(u32 addr, vm::ptr<sys_page_attr_t> attr)
{
sys_memory.error("sys_memory_get_page_attribute(addr=0x%x, attr=*0x%x)", addr, attr);
sys_memory.trace("sys_memory_get_page_attribute(addr=0x%x, attr=*0x%x)", addr, attr);
// TODO: Implement per thread page attribute setting.
attr->attribute = 0x40000ull; // SYS_MEMORY_PROT_READ_WRITE
attr->access_right = 0xFull; // SYS_MEMORY_ACCESS_RIGHT_ANY
if (!vm::check_addr(addr))
{
return CELL_EINVAL;
}
if (!vm::check_addr(attr.addr(), attr.size()))
{
return CELL_EFAULT;
}
attr->attribute = 0x40000ull; // SYS_MEMORY_PROT_READ_WRITE (TODO)
attr->access_right = 0xFull; // SYS_MEMORY_ACCESS_RIGHT_ANY (TODO)
attr->page_size = 4096;
return CELL_OK;
}

View File

@ -113,6 +113,8 @@ error_code sys_spu_thread_initialize(vm::ptr<u32> thread, u32 group_id, u32 spu_
auto spu = idm::make_ptr<SPUThread>(thread_name, spu_num, group.get());
fxm::get_always<mfc_thread>()->add_spu(spu);
*thread = spu->id;
group->threads[spu_num] = std::move(spu);
@ -224,6 +226,8 @@ error_code sys_spu_thread_group_destroy(u32 id)
}
}
fxm::check_unlocked<mfc_thread>()->add_spu(nullptr);
return CELL_OK;
}
@ -1080,7 +1084,11 @@ error_code sys_raw_spu_create(vm::ptr<u32> id, vm::ptr<void> attr)
thread->cpu_init();
*id = thread->index;
const u32 _id = thread->index;
fxm::get_always<mfc_thread>()->add_spu(std::move(thread));
*id = _id;
return CELL_OK;
}
@ -1142,6 +1150,8 @@ error_code sys_raw_spu_destroy(ppu_thread& ppu, u32 id)
idm::remove<RawSPUThread>(thread->id);
fxm::check_unlocked<mfc_thread>()->add_spu(nullptr);
return CELL_OK;
}

View File

@ -1,7 +1,10 @@
#include "stdafx.h"
#include "Memory.h"
#include "Emu/System.h"
#include "Utilities/mutex.h"
#include "Utilities/Thread.h"
#include "Utilities/VirtualMemory.h"
#include "Emu/CPU/CPUThread.h"
#ifdef _WIN32
#include <Windows.h>
@ -18,312 +21,173 @@
#endif
#endif
#include "wait_engine.h"
#include <mutex>
#include <atomic>
#include <deque>
namespace vm
{
thread_local u64 g_tls_fault_count{};
// Emulated virtual memory (4 GiB)
u8* const g_base_addr = static_cast<u8*>(memory_helper::reserve_memory(0x100000000));
template<std::size_t Size> struct mapped_ptr_deleter
// Memory locations
std::vector<std::shared_ptr<block_t>> g_locations;
// Reservations (lock lines) in a single memory page
using reservation_info = std::array<std::atomic<u64>, 4096 / 128>;
// Registered waiters
std::deque<vm::waiter*> g_waiters;
// Memory mutex
shared_mutex g_mutex;
// Page information
struct memory_page
{
void operator ()(void* ptr)
// Memory flags
atomic_t<u8> flags;
atomic_t<u32> waiters;
// Reservations
atomic_t<reservation_info*> reservations;
// Access reservation info
std::atomic<u64>& operator [](u32 addr)
{
#ifdef _WIN32
::UnmapViewOfFile(ptr);
#else
::munmap(ptr, Size);
#endif
auto ptr = reservations.load();
if (!ptr)
{
// Opportunistic memory allocation
ptr = new reservation_info{};
if (auto old_ptr = reservations.compare_and_swap(nullptr, ptr))
{
delete ptr;
ptr = old_ptr;
}
}
return (*ptr)[(addr & 0xfff) >> 7];
}
};
using mapped_ptr_t = std::unique_ptr<u8[], mapped_ptr_deleter<0x100000000>>;
std::array<mapped_ptr_t, 2> initialize()
template <typename T = writer_lock>
struct mem_lock
{
#ifdef _WIN32
const HANDLE memory_handle = ::CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE | SEC_RESERVE, 0x1, 0x0, NULL);
cpu_thread* thread;
T lock;
if (memory_handle == NULL)
template <typename X>
mem_lock(X&& mtx)
: thread(find_thread())
, lock(std::forward<X>(mtx))
{
MessageBoxA(0, fmt::format("CreateFileMapping() failed (0x%x).", GetLastError()).c_str(), "vm::initialize()", MB_ICONERROR);
std::abort();
}
mapped_ptr_t base_addr(static_cast<u8*>(::MapViewOfFile(memory_handle, FILE_MAP_WRITE, 0, 0, 0x100000000)));
mapped_ptr_t priv_addr(static_cast<u8*>(::MapViewOfFile(memory_handle, FILE_MAP_WRITE, 0, 0, 0x100000000)));
::CloseHandle(memory_handle);
#else
const int memory_handle = ::shm_open("/rpcs3_vm", O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (memory_handle == -1)
~mem_lock()
{
std::printf("shm_open('/rpcs3_vm') failed (%d).\n", errno);
std::abort();
}
if (::ftruncate(memory_handle, 0x100000000) == -1)
{
std::printf("ftruncate(memory_handle) failed (%d).\n", errno);
::shm_unlink("/rpcs3_vm");
std::abort();
}
mapped_ptr_t base_addr(static_cast<u8*>(::mmap(nullptr, 0x100000000, PROT_NONE, MAP_SHARED, memory_handle, 0)));
mapped_ptr_t priv_addr(static_cast<u8*>(::mmap(nullptr, 0x100000000, PROT_NONE, MAP_SHARED, memory_handle, 0)));
::shm_unlink("/rpcs3_vm");
#endif
std::printf("vm::g_base_addr = %p\nvm::g_priv_addr = %p\n", base_addr.get(), priv_addr.get());
return{ std::move(base_addr), std::move(priv_addr) };
}
const auto g_addr_set = vm::initialize();
u8* const g_base_addr = g_addr_set[0].get();
u8* const g_priv_addr = g_addr_set[1].get();
std::array<atomic_t<u8>, 0x100000000ull / 4096> g_pages{}; // information about every page
std::vector<std::shared_ptr<block_t>> g_locations; // memory locations
access_violation::access_violation(u64 addr, const char* cause)
: std::runtime_error(fmt::format("Access violation %s address 0x%llx", cause, addr))
{
g_tls_fault_count &= ~(1ull << 63);
}
using reservation_mutex_t = std::mutex;
thread_ctrl* volatile g_reservation_owner = nullptr;
u32 g_reservation_addr = 0;
u32 g_reservation_size = 0;
thread_local bool g_tls_did_break_reservation = false;
reservation_mutex_t g_reservation_mutex;
void _reservation_set(u32 addr, bool no_access = false)
{
#ifdef _WIN32
DWORD old;
if (!::VirtualProtect(vm::base(addr & ~0xfff), 4096, no_access ? PAGE_NOACCESS : PAGE_READONLY, &old))
#else
if (::mprotect(vm::base(addr & ~0xfff), 4096, no_access ? PROT_NONE : PROT_READ))
#endif
{
fmt::throw_exception("System failure (addr=0x%x)" HERE, addr);
}
}
bool _reservation_break(u32 addr)
{
if (g_reservation_addr >> 12 == addr >> 12)
{
#ifdef _WIN32
DWORD old;
if (!::VirtualProtect(vm::base(addr & ~0xfff), 4096, PAGE_READWRITE, &old))
#else
if (::mprotect(vm::base(addr & ~0xfff), 4096, PROT_READ | PROT_WRITE))
#endif
if (thread)
{
fmt::throw_exception("System failure (addr=0x%x)" HERE, addr);
thread->state -= cpu_flag::is_waiting;
}
}
static cpu_thread* find_thread()
{
if (auto cpu = get_current_cpu_thread())
{
cpu->state += cpu_flag::is_waiting;
return cpu;
}
g_reservation_addr = 0;
g_reservation_size = 0;
g_reservation_owner = nullptr;
return true;
return nullptr;
}
};
return false;
// Memory pages
std::array<memory_page, 0x100000000 / 4096> g_pages{};
u64 reservation_acquire(u32 addr, u32 _size)
{
// Access reservation info: stamp and the lock bit
return g_pages[addr >> 12][addr].load(std::memory_order_acquire);
}
void reservation_break(u32 addr)
void reservation_update(u32 addr, u32 _size)
{
std::unique_lock<reservation_mutex_t> lock(g_reservation_mutex);
// Update reservation info with new timestamp (unsafe, assume allocated)
(*g_pages[addr >> 12].reservations)[(addr & 0xfff) >> 7].store(__rdtsc(), std::memory_order_release);
}
const u32 raddr = g_reservation_addr;
const u32 rsize = g_reservation_size;
void waiter::init()
{
// Register waiter
writer_lock lock(g_mutex);
if ((g_tls_did_break_reservation = _reservation_break(addr)))
g_waiters.emplace_back(this);
}
void waiter::test() const
{
if (std::memcmp(data, vm::base(addr), size) == 0)
{
lock.unlock(), vm::notify_at(raddr, rsize);
return;
}
memory_page& page = g_pages[addr >> 12];
if (page.reservations == nullptr)
{
return;
}
if (stamp >= (*page.reservations)[(addr & 0xfff) >> 7].load())
{
return;
}
if (owner)
{
owner->notify();
}
}
void reservation_acquire(void* data, u32 addr, u32 size)
waiter::~waiter()
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
const u64 align = 0x80000000ull >> cntlz32(size, true);
if (!size || !addr || size > 4096 || size != align || addr & (align - 1))
if (owner)
{
fmt::throw_exception("Invalid arguments (addr=0x%x, size=0x%x)" HERE, addr, size);
}
// Unregister waiter
writer_lock lock(g_mutex);
const u8 flags = g_pages[addr >> 12];
// Find waiter
const auto found = std::find(g_waiters.cbegin(), g_waiters.cend(), this);
if (!(flags & page_writable) || !(flags & page_allocated) || (flags & page_no_reservations))
{
fmt::throw_exception("Invalid page flags (addr=0x%x, size=0x%x, flags=0x%x)" HERE, addr, size, flags);
}
// break the reservation
g_tls_did_break_reservation = g_reservation_owner && _reservation_break(g_reservation_addr);
// change memory protection to read-only
_reservation_set(addr);
// may not be necessary
_mm_mfence();
// set additional information
g_reservation_addr = addr;
g_reservation_size = size;
g_reservation_owner = thread_ctrl::get_current();
// copy data
std::memcpy(data, vm::base(addr), size);
}
bool reservation_update(u32 addr, const void* data, u32 size)
{
std::unique_lock<reservation_mutex_t> lock(g_reservation_mutex);
const u64 align = 0x80000000ull >> cntlz32(size, true);
if (!size || !addr || size > 4096 || size != align || addr & (align - 1))
{
fmt::throw_exception("Invalid arguments (addr=0x%x, size=0x%x)" HERE, addr, size);
}
if (g_reservation_owner != thread_ctrl::get_current() || g_reservation_addr != addr || g_reservation_size != size)
{
// atomic update failed
return false;
}
// change memory protection to no access
_reservation_set(addr, true);
// update memory using privileged access
std::memcpy(vm::base_priv(addr), data, size);
// free the reservation and restore memory protection
_reservation_break(addr);
// notify waiter
lock.unlock(), vm::notify_at(addr, size);
// atomic update succeeded
return true;
}
bool reservation_query(u32 addr, u32 size, bool is_writing, std::function<bool()> callback)
{
std::unique_lock<reservation_mutex_t> lock(g_reservation_mutex);
if (!check_addr(addr))
{
return false;
}
// check if current reservation and address may overlap
if (g_reservation_addr >> 12 == addr >> 12 && is_writing)
{
const bool result = callback();
if (result && size && addr + size - 1 >= g_reservation_addr && g_reservation_addr + g_reservation_size - 1 >= addr)
if (found != g_waiters.cend())
{
const u32 raddr = g_reservation_addr;
const u32 rsize = g_reservation_size;
// break the reservation if overlap
if ((g_tls_did_break_reservation = _reservation_break(addr)))
{
lock.unlock(), vm::notify_at(raddr, rsize);
}
}
return result;
}
return true;
}
bool reservation_test(thread_ctrl* current)
{
const auto owner = g_reservation_owner;
return owner && owner == current;
}
void reservation_free()
{
auto thread = thread_ctrl::get_current();
if (reservation_test(thread))
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
if (g_reservation_owner && g_reservation_owner == thread)
{
g_tls_did_break_reservation = _reservation_break(g_reservation_addr);
g_waiters.erase(found);
}
}
}
void reservation_op(u32 addr, u32 size, std::function<void()> proc)
void notify(u32 addr, u32 size)
{
std::unique_lock<reservation_mutex_t> lock(g_reservation_mutex);
const u64 align = 0x80000000ull >> cntlz32(size, true);
if (!size || !addr || size > 4096 || size != align || addr & (align - 1))
for (const waiter* ptr : g_waiters)
{
fmt::throw_exception("Invalid arguments (addr=0x%x, size=0x%x)" HERE, addr, size);
}
g_tls_did_break_reservation = false;
// check and possibly break previous reservation
if (g_reservation_owner != thread_ctrl::get_current() || g_reservation_addr != addr || g_reservation_size != size)
{
if (g_reservation_owner)
if (ptr->addr / 128 == addr / 128)
{
_reservation_break(g_reservation_addr);
ptr->test();
}
g_tls_did_break_reservation = true;
}
}
// change memory protection to no access
_reservation_set(addr, true);
// set additional information
g_reservation_addr = addr;
g_reservation_size = size;
g_reservation_owner = thread_ctrl::get_current();
// may not be necessary
_mm_mfence();
// do the operation
proc();
// remove the reservation
_reservation_break(addr);
// notify waiter
lock.unlock(), vm::notify_at(addr, size);
void notify_all()
{
for (const waiter* ptr : g_waiters)
{
ptr->test();
}
}
void _page_map(u32 addr, u32 size, u8 flags)
@ -335,21 +199,20 @@ namespace vm
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
{
if (g_pages[i])
if (g_pages[i].flags)
{
fmt::throw_exception("Memory already mapped (addr=0x%x, size=0x%x, flags=0x%x, current_addr=0x%x)" HERE, addr, size, flags, i * 4096);
}
}
void* real_addr = vm::base(addr);
void* priv_addr = vm::base_priv(addr);
#ifdef _WIN32
auto protection = flags & page_writable ? PAGE_READWRITE : (flags & page_readable ? PAGE_READONLY : PAGE_NOACCESS);
if (!::VirtualAlloc(priv_addr, size, MEM_COMMIT, PAGE_READWRITE) || !::VirtualAlloc(real_addr, size, MEM_COMMIT, protection))
if (!::VirtualAlloc(real_addr, size, MEM_COMMIT, protection))
#else
auto protection = flags & page_writable ? PROT_WRITE | PROT_READ : (flags & page_readable ? PROT_READ : PROT_NONE);
if (::mprotect(priv_addr, size, PROT_READ | PROT_WRITE) || ::mprotect(real_addr, size, protection))
if (::mprotect(real_addr, size, protection))
#endif
{
fmt::throw_exception("System failure (addr=0x%x, size=0x%x, flags=0x%x)" HERE, addr, size, flags);
@ -357,65 +220,75 @@ namespace vm
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
{
if (g_pages[i].exchange(flags | page_allocated))
if (g_pages[i].flags.exchange(flags | page_allocated))
{
fmt::throw_exception("Concurrent access (addr=0x%x, size=0x%x, flags=0x%x, current_addr=0x%x)" HERE, addr, size, flags, i * 4096);
}
}
std::memset(priv_addr, 0, size); // ???
}
bool page_protect(u32 addr, u32 size, u8 flags_test, u8 flags_set, u8 flags_clear)
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<writer_lock> lock(g_mutex);
if (!size || (size | addr) % 4096)
{
fmt::throw_exception("Invalid arguments (addr=0x%x, size=0x%x)" HERE, addr, size);
}
const u8 flags_inv = flags_set & flags_clear;
const u8 flags_both = flags_set & flags_clear;
flags_test |= page_allocated;
flags_test |= page_allocated;
flags_set &= ~flags_both;
flags_clear &= ~flags_both;
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
{
if ((g_pages[i] & flags_test) != (flags_test | page_allocated))
if ((g_pages[i].flags & flags_test) != (flags_test | page_allocated))
{
return false;
}
}
if (!flags_inv && !flags_set && !flags_clear)
if (!flags_set && !flags_clear)
{
return true;
}
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
u8 start_value = 0xff;
for (u32 start = addr / 4096, end = start + size / 4096, i = start; i < end + 1; i++)
{
_reservation_break(i * 4096);
u8 new_val = 0xff;
const u8 f1 = g_pages[i].fetch_or(flags_set & ~flags_inv) & (page_writable | page_readable);
g_pages[i].fetch_and(~(flags_clear & ~flags_inv));
const u8 f2 = (g_pages[i] ^= flags_inv) & (page_writable | page_readable);
if (f1 != f2)
if (i < end)
{
void* real_addr = vm::base(i * 4096);
g_pages[i].flags |= flags_set;
g_pages[i].flags &= ~flags_clear;
#ifdef _WIN32
DWORD old;
new_val = g_pages[i].flags & (page_readable | page_writable);
}
auto protection = f2 & page_writable ? PAGE_READWRITE : (f2 & page_readable ? PAGE_READONLY : PAGE_NOACCESS);
if (!::VirtualProtect(real_addr, 4096, protection, &old))
#else
auto protection = f2 & page_writable ? PROT_WRITE | PROT_READ : (f2 & page_readable ? PROT_READ : PROT_NONE);
if (::mprotect(real_addr, 4096, protection))
#endif
if (new_val != start_value)
{
if (u32 page_size = (i - start) * 4096)
{
fmt::throw_exception("System failure (addr=0x%x, size=0x%x, flags_test=0x%x, flags_set=0x%x, flags_clear=0x%x)" HERE, addr, size, flags_test, flags_set, flags_clear);
#ifdef _WIN32
DWORD old;
auto protection = start_value & page_writable ? PAGE_READWRITE : (start_value & page_readable ? PAGE_READONLY : PAGE_NOACCESS);
if (!::VirtualProtect(vm::base(start * 4096), page_size, protection, &old))
#else
auto protection = start_value & page_writable ? PROT_WRITE | PROT_READ : (start_value & page_readable ? PROT_READ : PROT_NONE);
if (::mprotect(vm::base(start * 4096), page_size, protection))
#endif
{
fmt::throw_exception("System failure (addr=0x%x, size=0x%x, flags_test=0x%x, flags_set=0x%x, flags_clear=0x%x)" HERE, addr, size, flags_test, flags_set, flags_clear);
}
}
start_value = new_val;
start = i;
}
}
@ -431,7 +304,7 @@ namespace vm
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
{
if ((g_pages[i] & page_allocated) == 0)
if ((g_pages[i].flags & page_allocated) == 0)
{
fmt::throw_exception("Memory not mapped (addr=0x%x, size=0x%x, current_addr=0x%x)" HERE, addr, size, i * 4096);
}
@ -439,39 +312,29 @@ namespace vm
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
{
_reservation_break(i * 4096);
if (!(g_pages[i].exchange(0) & page_allocated))
if (!(g_pages[i].flags.exchange(0) & page_allocated))
{
fmt::throw_exception("Concurrent access (addr=0x%x, size=0x%x, current_addr=0x%x)" HERE, addr, size, i * 4096);
}
}
void* real_addr = vm::base(addr);
void* priv_addr = vm::base_priv(addr);
#ifdef _WIN32
DWORD old;
if (!::VirtualProtect(real_addr, size, PAGE_NOACCESS, &old) || !::VirtualProtect(priv_addr, size, PAGE_NOACCESS, &old))
if (!::VirtualFree(real_addr, size, MEM_DECOMMIT))
#else
if (::mprotect(real_addr, size, PROT_NONE) || ::mprotect(priv_addr, size, PROT_NONE))
if (::madvise(real_addr, size, MADV_REMOVE) || ::mprotect(real_addr, size, PROT_NONE))
#endif
{
fmt::throw_exception("System failure (addr=0x%x, size=0x%x)" HERE, addr, size);
}
}
bool check_addr(u32 addr, u32 size)
bool check_addr(u32 addr, u32 size, u8 flags)
{
if (addr + (size - 1) < addr)
{
return false;
}
for (u32 i = addr / 4096; i <= (addr + size - 1) / 4096; i++)
{
if ((g_pages[i] & page_allocated) == 0)
if (UNLIKELY((g_pages[i % g_pages.size()].flags & flags) != flags))
{
return false;
}
@ -533,19 +396,19 @@ namespace vm
}
}
bool block_t::try_alloc(u32 addr, u32 size, u32 sup)
bool block_t::try_alloc(u32 addr, u32 size, u8 flags, u32 sup)
{
// Check if memory area is already mapped
for (u32 i = addr / 4096; i <= (addr + size - 1) / 4096; i++)
{
if (g_pages[i])
if (g_pages[i].flags)
{
return false;
}
}
// Map "real" memory pages
_page_map(addr, size, page_readable | page_writable);
_page_map(addr, size, flags);
// Add entry
m_map[addr] = size;
@ -565,7 +428,7 @@ namespace vm
block_t::~block_t()
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<writer_lock> lock(g_mutex);
// Deallocate all memory
for (auto& entry : m_map)
@ -576,7 +439,7 @@ namespace vm
u32 block_t::alloc(u32 size, u32 align, u32 sup)
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<writer_lock> lock(g_mutex);
// Align to minimal page size
size = ::align(size, 4096);
@ -593,10 +456,21 @@ namespace vm
return 0;
}
u8 pflags = page_readable | page_writable;
if (align >= 0x100000)
{
pflags |= page_1m_size;
}
else if (align >= 0x10000)
{
pflags |= page_64k_size;
}
// Search for an appropriate place (unoptimized)
for (u32 addr = ::align(this->addr, align); addr < this->addr + this->size - 1; addr += align)
{
if (try_alloc(addr, size, sup))
if (try_alloc(addr, size, pflags, sup))
{
return addr;
}
@ -607,7 +481,7 @@ namespace vm
u32 block_t::falloc(u32 addr, u32 size, u32 sup)
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<writer_lock> lock(g_mutex);
// align to minimal page size
size = ::align(size, 4096);
@ -618,7 +492,7 @@ namespace vm
return 0;
}
if (!try_alloc(addr, size, sup))
if (!try_alloc(addr, size, page_readable | page_writable, sup))
{
return 0;
}
@ -628,7 +502,7 @@ namespace vm
u32 block_t::dealloc(u32 addr, u32* sup_out)
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<writer_lock> lock(g_mutex);
const auto found = m_map.find(addr);
@ -656,7 +530,7 @@ namespace vm
u32 block_t::used()
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<reader_lock> lock(g_mutex);
u32 result = 0;
@ -670,7 +544,7 @@ namespace vm
std::shared_ptr<block_t> map(u32 addr, u32 size, u64 flags)
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<writer_lock> lock(g_mutex);
if (!size || (size | addr) % 4096)
{
@ -692,7 +566,7 @@ namespace vm
for (u32 i = addr / 4096; i < addr / 4096 + size / 4096; i++)
{
if (g_pages[i])
if (g_pages[i].flags)
{
fmt::throw_exception("Unexpected pages allocated (current_addr=0x%x)" HERE, i * 4096);
}
@ -707,7 +581,7 @@ namespace vm
std::shared_ptr<block_t> unmap(u32 addr, bool must_be_empty)
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<writer_lock> lock(g_mutex);
for (auto it = g_locations.begin(); it != g_locations.end(); it++)
{
@ -729,7 +603,7 @@ namespace vm
std::shared_ptr<block_t> get(memory_location_t location, u32 addr)
{
std::lock_guard<reservation_mutex_t> lock(g_reservation_mutex);
mem_lock<reader_lock> lock(g_mutex);
if (location != any)
{
@ -754,8 +628,6 @@ namespace vm
return nullptr;
}
extern void start();
namespace ps3
{
void init()
@ -768,8 +640,6 @@ namespace vm
std::make_shared<block_t>(0xD0000000, 0x10000000), // stack
std::make_shared<block_t>(0xE0000000, 0x20000000), // SPU reserved
};
vm::start();
}
}
@ -784,8 +654,6 @@ namespace vm
std::make_shared<block_t>(0xC0000000, 0x10000000), // video (arbitrarily)
std::make_shared<block_t>(0xD0000000, 0x10000000), // stack (arbitrarily)
};
vm::start();
}
}
@ -803,19 +671,14 @@ namespace vm
std::make_shared<block_t>(0x00010000, 0x00004000), // scratchpad
std::make_shared<block_t>(0x88000000, 0x00800000), // kernel
};
vm::start();
}
}
void close()
{
g_locations.clear();
}
[[noreturn]] void throw_access_violation(u64 addr, const char* cause)
{
throw access_violation(addr, cause);
memory_helper::free_reserved_memory(g_base_addr, 0x100000000);
}
}
@ -844,30 +707,29 @@ void fmt_class_string<vm::_ptr_base<const char>>::format(std::string& out, u64 a
const auto start = out.size();
try
{
out += u8"";
out += u8"";
for (vm::_ptr_base<const volatile char> ptr = vm::cast(arg);; ptr++)
for (vm::_ptr_base<const volatile char> ptr = vm::cast(arg);; ptr++)
{
if (!vm::check_addr(ptr.addr()))
{
if (const char ch = *ptr)
{
out += ch;
}
else
{
break;
}
// TODO: optimize checks
out.resize(start);
out += u8"«INVALID_ADDRESS:";
fmt_class_string<u32>::format(out, arg);
out += u8"»";
return;
}
out += u8"";
}
catch (const vm::access_violation&)
{
// Recover from invalid memory access
out.resize(start);
out += u8"«INVALID_ADDRESS:";
fmt_class_string<u32>::format(out, arg);
out += u8"»";
if (const char ch = *ptr)
{
out += ch;
}
else
{
break;
}
}
out += u8"";
}

View File

@ -4,12 +4,15 @@
#include <functional>
#include <memory>
class thread_ctrl;
#include "Utilities/mutex.h"
class named_thread;
namespace vm
{
extern u8* const g_base_addr;
extern u8* const g_priv_addr;
extern shared_mutex g_mutex;
enum memory_location_t : uint
{
@ -30,55 +33,50 @@ namespace vm
page_fault_notification = (1 << 3),
page_no_reservations = (1 << 4),
page_64k_size = (1 << 5),
page_1m_size = (1 << 6),
page_allocated = (1 << 7),
};
struct waiter
{
named_thread* owner;
u32 addr;
u32 size;
u64 stamp;
const void* data;
waiter() = default;
waiter(const waiter&) = delete;
void init();
void test() const;
~waiter();
};
// Address type
enum addr_t : u32 {};
struct access_violation : std::runtime_error
{
access_violation(u64 addr, const char* cause);
};
// Get reservation status for further atomic update: last update timestamp
u64 reservation_acquire(u32 addr, u32 size);
[[noreturn]] void throw_access_violation(u64 addr, const char* cause);
// End atomic update
void reservation_update(u32 addr, u32 size);
// This flag is changed by various reservation functions and may have different meaning.
// reservation_break() - true if the reservation was successfully broken.
// reservation_acquire() - true if another existing reservation was broken.
// reservation_free() - true if this thread's reservation was successfully removed.
// reservation_op() - false if reservation_update() would succeed if called instead.
// Write access to reserved memory - only set to true if the reservation was broken.
extern thread_local bool g_tls_did_break_reservation;
// Check and notify memory change at address
void notify(u32 addr, u32 size);
// Unconditionally break the reservation at specified address
void reservation_break(u32 addr);
// Reserve memory at the specified address for further atomic update
void reservation_acquire(void* data, u32 addr, u32 size);
// Attempt to atomically update previously reserved memory
bool reservation_update(u32 addr, const void* data, u32 size);
// Process a memory access error if it's caused by the reservation
bool reservation_query(u32 addr, u32 size, bool is_writing, std::function<bool()> callback);
// Returns true if the current thread owns reservation
bool reservation_test(thread_ctrl* current);
// Break all reservations created by the current thread
void reservation_free();
// Perform atomic operation unconditionally
void reservation_op(u32 addr, u32 size, std::function<void()> proc);
void notify_all();
// Change memory protection of specified memory region
bool page_protect(u32 addr, u32 size, u8 flags_test = 0, u8 flags_set = 0, u8 flags_clear = 0);
// Check if existing memory range is allocated. Checking address before using it is very unsafe.
// Return value may be wrong. Even if it's true and correct, actual memory protection may be read-only and no-access.
bool check_addr(u32 addr, u32 size = 1);
bool check_addr(u32 addr, u32 size = 1, u8 flags = page_allocated);
// Search and map memory in specified memory location (don't pass alignment smaller than 4096)
u32 alloc(u32 size, memory_location_t location, u32 align = 4096, u32 sup = 0);
@ -98,7 +96,7 @@ namespace vm
std::map<u32, u32> m_map; // Mapped memory: addr -> size
std::unordered_map<u32, u32> m_sup; // Supplementary info for allocations
bool try_alloc(u32 addr, u32 size, u32 sup);
bool try_alloc(u32 addr, u32 size, u8 flags, u32 sup);
public:
block_t(u32 addr, u32 size, u64 flags = 0);
@ -223,12 +221,6 @@ namespace vm
return g_base_addr + addr;
}
// Convert specified PS3/PSV virtual memory address to a pointer for privileged access (always readable/writable if allocated)
inline void* base_priv(u32 addr)
{
return g_priv_addr + addr;
}
inline const u8& read8(u32 addr)
{
return g_base_addr[addr];
@ -247,22 +239,12 @@ namespace vm
return static_cast<to_be_t<T>*>(base(addr));
}
template<typename T> inline to_be_t<T>* _ptr_priv(u32 addr)
{
return static_cast<to_be_t<T>*>(base_priv(addr));
}
// Convert specified PS3 address to a reference of specified (possibly converted to BE) type
template<typename T> inline to_be_t<T>& _ref(u32 addr)
{
return *_ptr<T>(addr);
}
template<typename T> inline to_be_t<T>& _ref_priv(u32 addr)
{
return *_ptr_priv<T>(addr);
}
inline const be_t<u16>& read16(u32 addr)
{
return _ref<u16>(addr);
@ -303,21 +285,11 @@ namespace vm
return static_cast<to_le_t<T>*>(base(addr));
}
template<typename T> inline to_le_t<T>* _ptr_priv(u32 addr)
{
return static_cast<to_le_t<T>*>(base_priv(addr));
}
template<typename T> inline to_le_t<T>& _ref(u32 addr)
{
return *_ptr<T>(addr);
}
template<typename T> inline to_le_t<T>& _ref_priv(u32 addr)
{
return *_ptr_priv<T>(addr);
}
inline const le_t<u16>& read16(u32 addr)
{
return _ref<u16>(addr);
@ -359,8 +331,6 @@ namespace vm
}
void close();
extern thread_local u64 g_tls_fault_count;
}
#include "vm_var.h"

View File

@ -98,11 +98,6 @@ namespace vm
return static_cast<T*>(vm::base(vm::cast(m_addr, HERE)));
}
T* get_ptr_priv() const
{
return static_cast<T*>(vm::base_priv(vm::cast(m_addr, HERE)));
}
T* operator ->() const
{
return get_ptr();

View File

@ -1,129 +1 @@
#include "stdafx.h"
#include "Emu/System.h"
#include "vm.h"
#include "wait_engine.h"
#include "Utilities/Thread.h"
#include "Utilities/mutex.h"
#include <unordered_set>
namespace vm
{
static shared_mutex s_mutex;
static std::unordered_set<waiter_base*, pointer_hash<waiter_base>> s_waiters(256);
void waiter_base::initialize(u32 addr, u32 size)
{
verify(HERE), addr, (size & (~size + 1)) == size, (addr & (size - 1)) == 0;
this->addr = addr;
this->mask = ~(size - 1);
this->thread = thread_ctrl::get_current();
struct waiter final
{
waiter_base* m_ptr;
thread_ctrl* m_thread;
waiter(waiter_base* ptr)
: m_ptr(ptr)
, m_thread(ptr->thread)
{
// Initialize waiter
writer_lock lock(s_mutex);
s_waiters.emplace(m_ptr);
}
~waiter()
{
// Reset thread
m_ptr->thread = nullptr;
// Remove waiter
writer_lock lock(s_mutex);
s_waiters.erase(m_ptr);
}
};
// Wait until thread == nullptr
waiter{this}, thread_ctrl::wait([&] { return !thread || test(); });
}
bool waiter_base::try_notify()
{
const auto _t = thread.load();
try
{
// Test predicate
if (UNLIKELY(!_t || !test()))
{
return false;
}
}
catch (...)
{
// Capture any exception thrown by the predicate
_t->set_exception(std::current_exception());
}
// Signal the thread with nullptr
if (auto _t = thread.exchange(nullptr))
{
_t->notify();
}
return true;
}
void notify_at(u32 addr, u32 size)
{
reader_lock lock(s_mutex);
for (const auto _w : s_waiters)
{
// Check address range overlapping using masks generated from size (power of 2)
if (((_w->addr ^ addr) & (_w->mask & ~(size - 1))) == 0)
{
_w->try_notify();
}
}
}
// Return amount of threads which are not notified
static std::size_t notify_all()
{
reader_lock lock(s_mutex);
std::size_t signaled = 0;
for (const auto _w : s_waiters)
{
if (_w->try_notify())
{
signaled++;
}
}
return s_waiters.size() - signaled;
}
void start()
{
thread_ctrl::spawn("vm::wait", []()
{
while (!Emu.IsStopped())
{
// Poll waiters periodically (TODO)
while (notify_all() && !Emu.IsPaused() && !Emu.IsStopped())
{
thread_ctrl::wait_for(50);
}
thread_ctrl::wait_for(1000);
}
});
}
}

View File

@ -1,50 +1 @@
#pragma once
#include "Utilities/types.h"
#include "Utilities/Atomic.h"
class thread_ctrl;
namespace vm
{
struct waiter_base
{
u32 addr;
u32 mask;
atomic_t<thread_ctrl*> thread{};
void initialize(u32 addr, u32 size);
bool try_notify();
protected:
virtual bool test() = 0;
};
// Wait until pred() returns true, addr must be aligned to size which must be a power of 2.
// It's possible for pred() to be called from any thread once the waiter is registered.
template<typename F>
auto wait_op(u32 addr, u32 size, F&& pred) -> decltype(static_cast<void>(pred()))
{
if (LIKELY(pred())) return;
struct waiter : waiter_base
{
std::conditional_t<sizeof(F) <= sizeof(void*), std::remove_reference_t<F>, F&&> func;
waiter(F&& func)
: func(std::forward<F>(func))
{
}
bool test() override
{
return func();
}
};
waiter(std::forward<F>(pred)).initialize(addr, size);
}
// Notify waiters on specific addr, addr must be aligned to size which must be a power of 2
void notify_at(u32 addr, u32 size);
}

View File

@ -994,10 +994,12 @@ void arm_interpreter::LDREX(ARMv7Thread& cpu, const u32 op, const u32 cond)
{
const u32 addr = cpu.read_gpr(n) + imm32;
u32 value;
vm::reservation_acquire(&value, addr, sizeof(value));
cpu.rtime = vm::reservation_acquire(addr, sizeof(u32));
_mm_lfence();
cpu.raddr = addr;
cpu.rdata = vm::_ref<const atomic_le_t<u32>>(addr);
cpu.write_gpr(t, value, 4);
cpu.write_gpr(t, cpu.rdata, 4);
}
}
@ -2078,7 +2080,28 @@ void arm_interpreter::STREX(ARMv7Thread& cpu, const u32 op, const u32 cond)
{
const u32 addr = cpu.read_gpr(n) + imm32;
const u32 value = cpu.read_gpr(t);
cpu.write_gpr(d, !vm::reservation_update(addr, &value, sizeof(value)), 4);
atomic_le_t<u32>& data = vm::_ref<atomic_le_t<u32>>(addr);
if (cpu.raddr != addr || cpu.rdata != data.load())
{
// Failure
cpu.raddr = 0;
cpu.write_gpr(d, true, 4);
return;
}
writer_lock lock(vm::g_mutex);
const bool result = cpu.rtime == vm::reservation_acquire(addr, cpu.rtime) && data.compare_and_swap_test(cpu.rdata, value);
if (result)
{
vm::reservation_update(addr, sizeof(u32));
}
cpu.raddr = 0;
cpu.write_gpr(d, !result, 4);
}
}

View File

@ -123,6 +123,9 @@ public:
} ITSTATE;
u32 TLS = 0;
u64 rtime = 0;
u32 raddr = 0;
u32 rdata = 0;
struct perf_counter
{

View File

@ -435,6 +435,12 @@ bool Emulator::Pause()
idm::select<ARMv7Thread>(on_select);
idm::select<RawSPUThread>(on_select);
idm::select<SPUThread>(on_select);
if (auto mfc = fxm::check<mfc_thread>())
{
on_select(0, *mfc);
}
return true;
}
@ -471,6 +477,11 @@ void Emulator::Resume()
idm::select<RawSPUThread>(on_select);
idm::select<SPUThread>(on_select);
if (auto mfc = fxm::check<mfc_thread>())
{
on_select(0, *mfc);
}
rpcs3::on_resume()();
}
@ -498,6 +509,11 @@ void Emulator::Stop()
idm::select<RawSPUThread>(on_select);
idm::select<SPUThread>(on_select);
if (auto mfc = fxm::check<mfc_thread>())
{
on_select(0, *mfc);
}
LOG_NOTICE(GENERAL, "All threads signaled...");
while (g_thread_count)

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup />
</Project>

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup />
</Project>

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup />
</Project>

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup />
</Project>