1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 12:12:50 +01:00

TSX tweaks

Allow to do more in first-chance transactions.
Give PUTLLC +1 priority (minor change).
This commit is contained in:
Nekotekina 2020-10-28 23:12:35 +03:00
parent 59b3a3d26b
commit 688a456642
2 changed files with 22 additions and 7 deletions

View File

@ -1378,10 +1378,14 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
// Begin transaction // Begin transaction
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4); Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4);
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.mov(x86::eax, _XABORT_EXPLICIT);
c.jc(fall);
c.xbegin(tx0); c.xbegin(tx0);
c.mov(x86::rax, x86::qword_ptr(x86::rbx)); c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.test(x86::eax, 127); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(skip); c.jnz(skip);
c.and_(x86::rax, -128);
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail); c.jne(fail);
@ -1479,7 +1483,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.mov(x86::eax, 1); c.mov(x86::eax, 1);
c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax); c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax);
c.test(x86::eax, vm::rsrv_unique_lock); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(fall2); c.jnz(fail2);
// Allow only first shared lock to proceed // Allow only first shared lock to proceed
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
@ -1493,6 +1497,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause)); c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.jc(fall2); c.jc(fall2);
c.mov(x86::rax, x86::qword_ptr(x86::rbx)); c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.test(x86::rax, 127 - 1);
c.jnz(fall2);
c.and_(x86::rax, -128); c.and_(x86::rax, -128);
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail2); c.jne(fail2);
@ -1681,7 +1687,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
} }
} }
return cpu_thread::suspend_all(&ppu, [&] return cpu_thread::suspend_all<+1>(&ppu, [&]
{ {
if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128))) if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref<spu_rdata_t>(addr & -128)))
{ {

View File

@ -398,10 +398,14 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
// Begin transaction // Begin transaction
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4); Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4);
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.mov(x86::eax, _XABORT_EXPLICIT);
c.jc(fall);
c.xbegin(tx0); c.xbegin(tx0);
c.mov(x86::rax, x86::qword_ptr(x86::rbx)); c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.test(x86::eax, 127); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(skip); c.jnz(skip);
c.and_(x86::rax, -128);
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail); c.jne(fail);
@ -514,7 +518,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
c.mov(x86::eax, 1); c.mov(x86::eax, 1);
c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax); c.lock().xadd(x86::qword_ptr(x86::rbx), x86::rax);
c.test(x86::eax, vm::rsrv_unique_lock); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(fail3); c.jnz(fail2);
// Allow only first shared lock to proceed // Allow only first shared lock to proceed
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
@ -528,6 +532,8 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause)); c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.jc(fall2); c.jc(fall2);
c.mov(x86::rax, x86::qword_ptr(x86::rbx)); c.mov(x86::rax, x86::qword_ptr(x86::rbx));
c.test(x86::rax, 127 - 1);
c.jnz(fall2);
c.and_(x86::rax, -128); c.and_(x86::rax, -128);
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail2); c.jne(fail2);
@ -784,6 +790,9 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
// Check pause flag // Check pause flag
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause)); c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
c.jc(fall2); c.jc(fall2);
// Check contention
c.test(x86::qword_ptr(x86::rbx), 127 - 1);
c.jc(fall2);
c.xbegin(tx1); c.xbegin(tx1);
if (s_tsx_avx) if (s_tsx_avx)
@ -2283,7 +2292,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
{ {
case UINT32_MAX: case UINT32_MAX:
{ {
const bool ok = cpu_thread::suspend_all(this, [&]() const bool ok = cpu_thread::suspend_all<+1>(this, [&]()
{ {
if ((res & -128) == rtime) if ((res & -128) == rtime)
{ {
@ -2397,7 +2406,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
if (result == 0) if (result == 0)
{ {
// Execute with increased priority // Execute with increased priority
cpu_thread::suspend_all<+1>(cpu, [&] cpu_thread::suspend_all<0>(cpu, [&]
{ {
mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write)); mov_rdata(vm::_ref<spu_rdata_t>(addr), *static_cast<const spu_rdata_t*>(to_write));
vm::reservation_acquire(addr, 128) += 127; vm::reservation_acquire(addr, 128) += 127;