1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-23 03:02:53 +01:00

Implement vm::reservation_op

Implement vm::reservation_peek (memory load)
Implement vm::unsafe_ptr_cast helper
Example use in cellSpurs.cpp
Fix dma_lockb value and description
This commit is contained in:
Nekotekina 2020-10-07 01:14:35 +03:00
parent 59be63167f
commit 89f1248140
5 changed files with 388 additions and 71 deletions

View File

@ -160,32 +160,6 @@ extern u32 ppu_lwarx(ppu_thread&, u32);
extern bool ppu_stwcx(ppu_thread&, u32, u32);
extern bool ppu_stdcx(ppu_thread&, u32, u64);
bool do_atomic_128_load(cpu_thread& cpu, u32 addr, void* dst)
{
verify(HERE), (addr % 128) == 0;
while (!cpu.test_stopped())
{
const u64 rtime = vm::reservation_acquire(addr, 128);
if (rtime % 128)
{
continue;
}
std::memcpy(dst, vm::base(addr), 128);
if (rtime != vm::reservation_acquire(addr, 128))
{
continue;
}
return true;
}
return false;
}
error_code sys_spu_image_close(ppu_thread&, vm::ptr<sys_spu_image> img);
//----------------------------------------------------------------------------
@ -2516,7 +2490,7 @@ s32 cellSpursShutdownWorkload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, u32 wid
if (wid >= (spurs->flags1 & SF1_32_WORKLOADS ? CELL_SPURS_MAX_WORKLOAD2 : CELL_SPURS_MAX_WORKLOAD))
return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
if (spurs->exception)
return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
@ -4393,7 +4367,7 @@ s32 _spurs::check_job_chain_attribute(u32 sdkVer, vm::cptr<u64> jcEntry, u16 siz
{
if (!jcEntry)
return CELL_SPURS_JOB_ERROR_NULL_POINTER;
if (!jcEntry.aligned())
return CELL_SPURS_JOB_ERROR_ALIGN;
@ -4592,13 +4566,12 @@ s32 cellSpursGetJobChainInfo(ppu_thread& ppu, vm::ptr<CellSpursJobChain> jobChai
return err;
}
CellSpursJobChain data;
// Read the commands queue atomically
if (!do_atomic_128_load(ppu, jobChain.addr(), &data))
CellSpursJobChain data;
vm::reservation_peek(ppu, vm::unsafe_ptr_cast<CellSpursJobChain_x00>(jobChain), [&](const CellSpursJobChain_x00& jch)
{
return 0;
}
std::memcpy(&data, &jch, sizeof(jch));
});
info->linkRegister[0] = +data.linkRegister[0];
info->linkRegister[1] = +data.linkRegister[1];
@ -4896,48 +4869,25 @@ s32 cellSpursAddUrgentCommand(ppu_thread& ppu, vm::ptr<CellSpursJobChain> jobCha
if (jobChain->workloadId >= CELL_SPURS_MAX_WORKLOAD2)
return CELL_SPURS_JOB_ERROR_INVAL;
for (u32 i = 0;;)
s32 result = CELL_OK;
vm::reservation_op(vm::unsafe_ptr_cast<CellSpursJobChain_x00>(jobChain), [&](CellSpursJobChain_x00& jch)
{
if (i >= std::size(jobChain->urgentCmds))
for (auto& cmd : jch.urgentCmds)
{
// Exausted all slots
return CELL_SPURS_JOB_ERROR_BUSY;
}
u64 currCmd = ppu_ldarx(ppu, jobChain.ptr(&CellSpursJobChain::urgentCmds, i).addr());
std::atomic_thread_fence(std::memory_order_acq_rel);
bool found = false;
bool reset = false;
if (!currCmd)
{
if (i != 0 && !jobChain->urgentCmds[i - 1])
if (!cmd)
{
// Restart search, someone emptied out the previous one
reset = true;
}
else
{
found = true;
currCmd = newCmd;
cmd = newCmd;
return true;
}
}
if (reset || !ppu_stdcx(ppu, jobChain.ptr(&CellSpursJobChain::urgentCmds, i).addr(), currCmd))
{
// Someone modified the job chain or the previous slot is empty, restart search
i = 0;
continue;
}
// Considered unlikely so unoptimized
result = CELL_SPURS_JOB_ERROR_BUSY;
return false;
});
if (found)
break;
i++;
}
return CELL_OK;
return result;
}
s32 cellSpursAddUrgentCall(ppu_thread& ppu, vm::ptr<CellSpursJobChain> jobChain, vm::ptr<u64> commandList)

View File

@ -465,6 +465,25 @@ struct alignas(128) CellSpursJobChain
u8 unk5[0x100 - 0xA8];
};
struct alignas(128) CellSpursJobChain_x00
{
vm::bcptr<u64, u64> pc; // 0x00
vm::bcptr<u64, u64> linkRegister[3]; // 0x08
u8 unk0[0x3]; // 0x20
b8 isHalted; // 0x23
b8 autoReadyCount; // 0x24
u8 unk1[0x7]; // 0x25
u8 val2C; // 0x2C
u8 val2D; // 0x2D
u8 val2E; // 0x2E
u8 val2F; // 0x2F
be_t<u64> urgentCmds[4]; // 0x30
u8 unk2[0x22]; // 0x50
be_t<u16> maxGrabbedJob; // 0x72
be_t<u32> workloadId; // 0x74
vm::bptr<CellSpurs, u64> spurs; // 0x78
};
struct CellSpursJobChainInfo
{
be_t<u64> urgentCommandSlot[4]; // 0x00
@ -494,7 +513,7 @@ struct alignas(8) CellSpursJobChainAttribute
be_t<u32> maxGrabbedJob; // 0x0E
u8 priorities[8]; // 0x10
be_t<u32> maxContention; // 0x18
b8 autoSpuCount; // 0x1C
b8 autoSpuCount; // 0x1C
u8 padding[3]; // 0x1D
be_t<u32> tag1; // 0x20
be_t<u32> tag2; // 0x24
@ -1031,7 +1050,7 @@ struct alignas(16) CellSpursTaskBinInfo
struct alignas(128) CellSpursBarrier
{
be_t<u32> zero; // 0x00
be_t<u32> zero; // 0x00
be_t<u32> remained; // 0x04
u8 unk0[0x34 - 0x8];
vm::bptr<CellSpursTaskset> taskset; // 0x34

View File

@ -13,6 +13,7 @@
#include "Emu/CPU/CPUThread.h"
#include "Emu/Cell/lv2/sys_memory.h"
#include "Emu/RSX/GSRender.h"
#include "Emu/Cell/SPURecompiler.h"
#include <atomic>
#include <thread>
#include <deque>
@ -470,6 +471,52 @@ namespace vm
}
}
void reservation_op_internal(u32 addr, std::function<bool()> func)
{
const auto _cpu = get_current_cpu_thread();
// Acknowledge contender if necessary (TODO: check)
_cpu->state += cpu_flag::wait;
{
cpu_thread::suspend_all cpu_lock(_cpu);
// Wait to acquire PUTLLUC lock
while (vm::reservation_acquire(addr, 128).bts(std::countr_zero<u32>(vm::putlluc_lockb)))
{
busy_wait(100);
}
if (func())
{
// Success, release PUTLLUC and PUTLLC locks if necessary
vm::reservation_acquire(addr, 128) += 63;
}
else
{
// Fake update (TODO)
vm::reservation_acquire(addr, 128) += 63;
}
}
vm::reservation_notifier(addr, 128).notify_all();
}
void reservation_escape_internal()
{
const auto _cpu = get_current_cpu_thread();
if (_cpu && _cpu->id_type() == 1)
{
thread_ctrl::emergency_exit("vm::reservation_escape");
}
if (_cpu && _cpu->id_type() == 2)
{
spu_runtime::g_escape(static_cast<spu_thread*>(_cpu));
}
}
static void _page_map(u32 addr, u8 flags, u32 size, utils::shm* shm)
{
if (!size || (size | addr) % 4096 || flags & page_allocated)

View File

@ -323,6 +323,13 @@ namespace vm
{
return vm::cast(other.addr(), HERE);
}
// Perform reinterpret cast
template <typename CT, typename T, typename AT, typename = decltype(reinterpret_cast<to_be_t<CT>*>(std::declval<T*>()))>
inline _ptr_base<to_be_t<CT>, u32> unsafe_ptr_cast(const _ptr_base<T, AT>& other)
{
return vm::cast(other.addr(), HERE);
}
}
struct null_t

View File

@ -1,15 +1,19 @@
#pragma once
#include "vm.h"
#include "vm_locking.h"
#include "Utilities/cond.h"
#include "util/atomic.hpp"
#include <functional>
extern bool g_use_rtm;
namespace vm
{
enum reservation_lock_bit : u64
{
stcx_lockb = 1 << 0, // Exclusive conditional reservation lock
dma_lockb = 1 << 1, // Inexclusive unconditional reservation lock
dma_lockb = 1 << 5, // Exclusive unconditional reservation lock
putlluc_lockb = 1 << 6, // Exclusive unconditional reservation lock
};
@ -69,4 +73,294 @@ namespace vm
return {*res, rtime};
}
void reservation_op_internal(u32 addr, std::function<bool()> func);
template <typename T, typename AT = u32, typename F>
SAFE_BUFFERS inline auto reservation_op(_ptr_base<T, AT> ptr, F op)
{
// Atomic operation will be performed on aligned 128 bytes of data, so the data size and alignment must comply
static_assert(sizeof(T) <= 128 && alignof(T) == sizeof(T), "vm::reservation_op: unsupported type");
static_assert(std::is_trivially_copyable_v<T>, "vm::reservation_op: not triv copyable (optimization)");
// Use "super" pointer to prevent access violation handling during atomic op
const auto sptr = vm::get_super_ptr<T>(static_cast<u32>(ptr.addr()));
// Use 128-byte aligned addr
const u32 addr = static_cast<u32>(ptr.addr()) & -128;
if (g_use_rtm)
{
auto& res = vm::reservation_acquire(addr, 128);
// Stage 1: single optimistic transaction attempt
unsigned status = _XBEGIN_STARTED;
#ifndef _MSC_VER
__asm__ goto ("xbegin %l[stage2];" ::: "memory" : stage2);
#else
status = _xbegin();
if (status == _XBEGIN_STARTED)
#endif
{
if constexpr (std::is_void_v<std::invoke_result_t<F, T&>>)
{
res += 128;
std::invoke(op, *sptr);
#ifndef _MSC_VER
__asm__ volatile ("xend;" ::: "memory");
#else
_xend();
#endif
res.notify_all();
return;
}
else
{
if (auto result = std::invoke(op, *sptr))
{
res += 128;
#ifndef _MSC_VER
__asm__ volatile ("xend;" ::: "memory");
#else
_xend();
#endif
res.notify_all();
return result;
}
else
{
#ifndef _MSC_VER
__asm__ volatile ("xabort $1;" ::: "memory");
#else
_xabort(1);
#endif
// Unreachable code
return std::invoke_result_t<F, T&>();
}
}
}
stage2:
#ifndef _MSC_VER
__asm__ volatile ("movl %%eax, %0;" : "=r" (status) :: "memory");
#endif
if constexpr (!std::is_void_v<std::invoke_result_t<F, T&>>)
{
if (_XABORT_CODE(status))
{
// Unfortunately, actual function result is not recoverable in this case
return std::invoke_result_t<F, T&>();
}
}
// Touch memory if transaction failed without RETRY flag on the first attempt (TODO)
if (!(status & _XABORT_RETRY))
{
reinterpret_cast<atomic_t<u8>*>(sptr)->fetch_add(0);
}
// Stage 2: try to lock reservation first
res += stcx_lockb;
// Start lightened transaction (TODO: tweaking)
while (true)
{
#ifndef _MSC_VER
__asm__ goto ("xbegin %l[retry];" ::: "memory" : retry);
#else
status = _xbegin();
if (status != _XBEGIN_STARTED) [[unlikely]]
{
goto retry;
}
#endif
if constexpr (std::is_void_v<std::invoke_result_t<F, T&>>)
{
std::invoke(op, *sptr);
#ifndef _MSC_VER
__asm__ volatile ("xend;" ::: "memory");
#else
_xend();
#endif
res += 127;
res.notify_all();
return;
}
else
{
if (auto result = std::invoke(op, *sptr))
{
#ifndef _MSC_VER
__asm__ volatile ("xend;" ::: "memory");
#else
_xend();
#endif
res += 127;
res.notify_all();
return result;
}
else
{
#ifndef _MSC_VER
__asm__ volatile ("xabort $1;" ::: "memory");
#else
_xabort(1);
#endif
return std::invoke_result_t<F, T&>();
}
}
retry:
#ifndef _MSC_VER
__asm__ volatile ("movl %%eax, %0;" : "=r" (status) :: "memory");
#endif
if (!(status & _XABORT_RETRY)) [[unlikely]]
{
if constexpr (!std::is_void_v<std::invoke_result_t<F, T&>>)
{
if (_XABORT_CODE(status))
{
res -= 1;
return std::invoke_result_t<F, T&>();
}
}
break;
}
}
// Stage 3: all failed, heavyweight fallback (see comments at the bottom)
if constexpr (std::is_void_v<std::invoke_result_t<F, T&>>)
{
return vm::reservation_op_internal(addr, [&]
{
std::invoke(op, *sptr);
return true;
});
}
else
{
auto result = std::invoke_result_t<F, T&>();
vm::reservation_op_internal(addr, [&]
{
T buf = *sptr;
if ((result = std::invoke(op, buf)))
{
*sptr = buf;
return true;
}
else
{
return false;
}
});
return result;
}
}
// Perform under heavyweight lock
auto& res = vm::reservation_acquire(addr, 128);
res += stcx_lockb;
// Write directly if the op cannot fail
if constexpr (std::is_void_v<std::invoke_result_t<F, T&>>)
{
{
vm::writer_lock lock(addr);
std::invoke(op, *sptr);
res += 127;
}
res.notify_all();
return;
}
else
{
// Make an operational copy of data (TODO: volatile storage?)
auto result = std::invoke_result_t<F, T&>();
{
vm::writer_lock lock(addr);
T buf = *sptr;
if ((result = std::invoke(op, buf)))
{
// If operation succeeds, write the data back
*sptr = buf;
res += 127;
}
else
{
// Operation failed, no memory has been modified
res -= 1;
return std::invoke_result_t<F, T&>();
}
}
res.notify_all();
return result;
}
}
// For internal usage
void reservation_escape_internal();
// Read memory value in pseudo-atomic manner
template <typename CPU, typename T, typename AT = u32, typename F>
SAFE_BUFFERS inline auto reservation_peek(CPU&& cpu, _ptr_base<T, AT> ptr, F op)
{
// Atomic operation will be performed on aligned 128 bytes of data, so the data size and alignment must comply
static_assert(sizeof(T) <= 128 && alignof(T) == sizeof(T), "vm::reservation_peek: unsupported type");
// Use "super" pointer to prevent access violation handling during atomic op
const auto sptr = vm::get_super_ptr<const T>(static_cast<u32>(ptr.addr()));
// Use 128-byte aligned addr
const u32 addr = static_cast<u32>(ptr.addr()) & -128;
while (true)
{
if constexpr (std::is_class_v<std::remove_cvref_t<CPU>>)
{
if (cpu.test_stopped())
{
reservation_escape_internal();
}
}
const u64 rtime = vm::reservation_acquire(addr, 128);
if (rtime & 127)
{
continue;
}
// Observe data non-atomically and make sure no reservation updates were made
if constexpr (std::is_void_v<std::invoke_result_t<F, const T&>>)
{
std::invoke(op, *sptr);
if (rtime == vm::reservation_acquire(addr, 128))
{
return;
}
}
else
{
auto res = std::invoke(op, *sptr);
if (rtime == vm::reservation_acquire(addr, 128))
{
return res;
}
}
}
}
} // namespace vm