1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 04:02:42 +01:00

SPU: add stx/ftx counters

Just count pure transaction successes and failures.
This commit is contained in:
Nekotekina 2020-10-29 05:01:45 +03:00
parent 688a456642
commit 8ce0819b42
5 changed files with 61 additions and 31 deletions

View File

@ -266,29 +266,6 @@ asmjit::Runtime& asmjit::get_global_runtime()
return g_rt; return g_rt;
} }
asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
{
Label fall = c.newLabel();
Label begin = c.newLabel();
c.jmp(begin);
c.bind(fall);
c.add(ctr, 1);
// Don't repeat on zero status (may indicate syscall or interrupt)
c.test(x86::eax, x86::eax);
c.jz(fallback);
// Other bad statuses are ignored regardless of repeat flag (TODO)
c.cmp(ctr, less_than);
c.jae(fallback);
c.align(kAlignCode, 16);
c.bind(begin);
return fall;
// xbegin should be issued manually, allows to add more check before entering transaction
//c.xbegin(fall);
}
#ifdef LLVM_AVAILABLE #ifdef LLVM_AVAILABLE
#include <unordered_map> #include <unordered_map>

View File

@ -56,7 +56,32 @@ namespace asmjit
asmjit::Runtime& get_global_runtime(); asmjit::Runtime& get_global_runtime();
// Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please) // Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please)
[[nodiscard]] asmjit::Label build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than); template <typename F>
[[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than, F func)
{
Label fall = c.newLabel();
Label begin = c.newLabel();
c.jmp(begin);
c.bind(fall);
// First invoked after failure
func();
c.add(ctr, 1);
// Don't repeat on zero status (may indicate syscall or interrupt)
c.test(x86::eax, x86::eax);
c.jz(fallback);
// Other bad statuses are ignored regardless of repeat flag (TODO)
c.cmp(ctr, less_than);
c.jae(fallback);
c.align(kAlignCode, 16);
c.bind(begin);
return fall;
// xbegin should be issued manually, allows to add more check before entering transaction
}
} }
// Build runtime function with asmjit::X86Assembler // Build runtime function with asmjit::X86Assembler

View File

@ -1377,7 +1377,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
} }
// Begin transaction // Begin transaction
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4); Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4, []{});
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause)); c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.mov(x86::eax, _XABORT_EXPLICIT); c.mov(x86::eax, _XABORT_EXPLICIT);
c.jc(fall); c.jc(fall);
@ -1489,7 +1489,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail2); c.jne(fail2);
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666); Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666, []{});
c.prefetchw(x86::byte_ptr(x86::rbp, 0)); c.prefetchw(x86::byte_ptr(x86::rbp, 0));
c.prefetchw(x86::byte_ptr(x86::rbp, 64)); c.prefetchw(x86::byte_ptr(x86::rbp, 64));

View File

@ -397,7 +397,10 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
} }
// Begin transaction // Begin transaction
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4); Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4, [&]()
{
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
});
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause)); c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
c.mov(x86::eax, _XABORT_EXPLICIT); c.mov(x86::eax, _XABORT_EXPLICIT);
c.jc(fall); c.jc(fall);
@ -463,6 +466,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
c.sub(x86::qword_ptr(x86::rbx), -128); c.sub(x86::qword_ptr(x86::rbx), -128);
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
c.mov(x86::eax, x86::r12d); c.mov(x86::eax, x86::r12d);
c.jmp(_ret); c.jmp(_ret);
@ -491,10 +495,12 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
c.xend(); c.xend();
c.xor_(x86::eax, x86::eax); c.xor_(x86::eax, x86::eax);
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
c.jmp(_ret); c.jmp(_ret);
c.bind(skip); c.bind(skip);
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
c.mov(x86::eax, _XABORT_EXPLICIT); c.mov(x86::eax, _XABORT_EXPLICIT);
//c.jmp(fall); //c.jmp(fall);
@ -524,7 +530,10 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
c.cmp(x86::rax, x86::r13); c.cmp(x86::rax, x86::r13);
c.jne(fail2); c.jne(fail2);
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666); Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666, [&]()
{
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
});
c.prefetchw(x86::byte_ptr(x86::rbp, 0)); c.prefetchw(x86::byte_ptr(x86::rbp, 0));
c.prefetchw(x86::byte_ptr(x86::rbp, 64)); c.prefetchw(x86::byte_ptr(x86::rbp, 64));
@ -592,6 +601,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
} }
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
c.lock().add(x86::qword_ptr(x86::rbx), 127); c.lock().add(x86::qword_ptr(x86::rbx), 127);
c.mov(x86::eax, x86::r12d); c.mov(x86::eax, x86::r12d);
c.jmp(_ret); c.jmp(_ret);
@ -620,6 +630,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
} }
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
c.jmp(fail2); c.jmp(fail2);
c.bind(fall2); c.bind(fall2);
@ -730,7 +741,10 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
} }
// Begin transaction // Begin transaction
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8); Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8, [&]()
{
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
});
c.xbegin(tx0); c.xbegin(tx0);
c.test(x86::qword_ptr(x86::rbx), vm::rsrv_unique_lock); c.test(x86::qword_ptr(x86::rbx), vm::rsrv_unique_lock);
c.jnz(skip); c.jnz(skip);
@ -756,11 +770,13 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
c.sub(x86::qword_ptr(x86::rbx), -128); c.sub(x86::qword_ptr(x86::rbx), -128);
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
c.mov(x86::eax, 1); c.mov(x86::eax, 1);
c.jmp(_ret); c.jmp(_ret);
c.bind(skip); c.bind(skip);
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
//c.jmp(fall); //c.jmp(fall);
c.bind(fall); c.bind(fall);
@ -783,7 +799,11 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
c.test(x86::eax, vm::rsrv_unique_lock); c.test(x86::eax, vm::rsrv_unique_lock);
c.jnz(fall2); c.jnz(fall2);
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666); Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666, [&]()
{
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
});
c.prefetchw(x86::byte_ptr(x86::rbp, 0)); c.prefetchw(x86::byte_ptr(x86::rbp, 0));
c.prefetchw(x86::byte_ptr(x86::rbp, 64)); c.prefetchw(x86::byte_ptr(x86::rbp, 64));
@ -815,6 +835,7 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
} }
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
c.lock().add(x86::qword_ptr(x86::rbx), 127); c.lock().add(x86::qword_ptr(x86::rbx), 127);
c.mov(x86::eax, x86::r12d); c.mov(x86::eax, x86::r12d);
c.jmp(_ret); c.jmp(_ret);
@ -884,7 +905,10 @@ const extern auto spu_getllar_tx = build_function_asm<u32(*)(u32 raddr, void* rd
c.mov(x86::r13, args[1]); c.mov(x86::r13, args[1]);
// Begin transaction // Begin transaction
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8); Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8, [&]()
{
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
});
// Check pause flag // Check pause flag
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause)); c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
@ -916,6 +940,7 @@ const extern auto spu_getllar_tx = build_function_asm<u32(*)(u32 raddr, void* rd
} }
c.xend(); c.xend();
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
// Store data // Store data
if (s_tsx_avx) if (s_tsx_avx)

View File

@ -753,6 +753,9 @@ public:
u64 saved_native_sp = 0; // Host thread's stack pointer for emulated longjmp u64 saved_native_sp = 0; // Host thread's stack pointer for emulated longjmp
u64 ftx = 0; // Failed transactions
u64 stx = 0; // Succeeded transactions (pure counters)
std::array<v128, 0x4000> stack_mirror; // Return address information std::array<v128, 0x4000> stack_mirror; // Return address information
const char* current_func{}; // Current STOP or RDCH blocking function const char* current_func{}; // Current STOP or RDCH blocking function