mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 04:02:42 +01:00
SPU: add stx/ftx counters
Just count pure transaction successes and failures.
This commit is contained in:
parent
688a456642
commit
8ce0819b42
@ -266,29 +266,6 @@ asmjit::Runtime& asmjit::get_global_runtime()
|
|||||||
return g_rt;
|
return g_rt;
|
||||||
}
|
}
|
||||||
|
|
||||||
asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than)
|
|
||||||
{
|
|
||||||
Label fall = c.newLabel();
|
|
||||||
Label begin = c.newLabel();
|
|
||||||
c.jmp(begin);
|
|
||||||
c.bind(fall);
|
|
||||||
c.add(ctr, 1);
|
|
||||||
|
|
||||||
// Don't repeat on zero status (may indicate syscall or interrupt)
|
|
||||||
c.test(x86::eax, x86::eax);
|
|
||||||
c.jz(fallback);
|
|
||||||
|
|
||||||
// Other bad statuses are ignored regardless of repeat flag (TODO)
|
|
||||||
c.cmp(ctr, less_than);
|
|
||||||
c.jae(fallback);
|
|
||||||
c.align(kAlignCode, 16);
|
|
||||||
c.bind(begin);
|
|
||||||
return fall;
|
|
||||||
|
|
||||||
// xbegin should be issued manually, allows to add more check before entering transaction
|
|
||||||
//c.xbegin(fall);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef LLVM_AVAILABLE
|
#ifdef LLVM_AVAILABLE
|
||||||
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
@ -56,7 +56,32 @@ namespace asmjit
|
|||||||
asmjit::Runtime& get_global_runtime();
|
asmjit::Runtime& get_global_runtime();
|
||||||
|
|
||||||
// Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please)
|
// Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please)
|
||||||
[[nodiscard]] asmjit::Label build_transaction_enter(X86Assembler& c, Label fallback, const X86Gp& ctr, uint less_than);
|
template <typename F>
|
||||||
|
[[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, const asmjit::X86Gp& ctr, uint less_than, F func)
|
||||||
|
{
|
||||||
|
Label fall = c.newLabel();
|
||||||
|
Label begin = c.newLabel();
|
||||||
|
c.jmp(begin);
|
||||||
|
c.bind(fall);
|
||||||
|
|
||||||
|
// First invoked after failure
|
||||||
|
func();
|
||||||
|
|
||||||
|
c.add(ctr, 1);
|
||||||
|
|
||||||
|
// Don't repeat on zero status (may indicate syscall or interrupt)
|
||||||
|
c.test(x86::eax, x86::eax);
|
||||||
|
c.jz(fallback);
|
||||||
|
|
||||||
|
// Other bad statuses are ignored regardless of repeat flag (TODO)
|
||||||
|
c.cmp(ctr, less_than);
|
||||||
|
c.jae(fallback);
|
||||||
|
c.align(kAlignCode, 16);
|
||||||
|
c.bind(begin);
|
||||||
|
return fall;
|
||||||
|
|
||||||
|
// xbegin should be issued manually, allows to add more check before entering transaction
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build runtime function with asmjit::X86Assembler
|
// Build runtime function with asmjit::X86Assembler
|
||||||
|
@ -1377,7 +1377,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Begin transaction
|
// Begin transaction
|
||||||
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4);
|
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4, []{});
|
||||||
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
||||||
c.mov(x86::eax, _XABORT_EXPLICIT);
|
c.mov(x86::eax, _XABORT_EXPLICIT);
|
||||||
c.jc(fall);
|
c.jc(fall);
|
||||||
@ -1489,7 +1489,7 @@ const auto ppu_stcx_accurate_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime
|
|||||||
c.cmp(x86::rax, x86::r13);
|
c.cmp(x86::rax, x86::r13);
|
||||||
c.jne(fail2);
|
c.jne(fail2);
|
||||||
|
|
||||||
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666);
|
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666, []{});
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||||
|
|
||||||
|
@ -397,7 +397,10 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Begin transaction
|
// Begin transaction
|
||||||
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4);
|
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 4, [&]()
|
||||||
|
{
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
|
||||||
|
});
|
||||||
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
||||||
c.mov(x86::eax, _XABORT_EXPLICIT);
|
c.mov(x86::eax, _XABORT_EXPLICIT);
|
||||||
c.jc(fall);
|
c.jc(fall);
|
||||||
@ -463,6 +466,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
|||||||
|
|
||||||
c.sub(x86::qword_ptr(x86::rbx), -128);
|
c.sub(x86::qword_ptr(x86::rbx), -128);
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
||||||
c.mov(x86::eax, x86::r12d);
|
c.mov(x86::eax, x86::r12d);
|
||||||
c.jmp(_ret);
|
c.jmp(_ret);
|
||||||
|
|
||||||
@ -491,10 +495,12 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
|||||||
|
|
||||||
c.xend();
|
c.xend();
|
||||||
c.xor_(x86::eax, x86::eax);
|
c.xor_(x86::eax, x86::eax);
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
||||||
c.jmp(_ret);
|
c.jmp(_ret);
|
||||||
|
|
||||||
c.bind(skip);
|
c.bind(skip);
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
||||||
c.mov(x86::eax, _XABORT_EXPLICIT);
|
c.mov(x86::eax, _XABORT_EXPLICIT);
|
||||||
//c.jmp(fall);
|
//c.jmp(fall);
|
||||||
|
|
||||||
@ -524,7 +530,10 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
|||||||
c.cmp(x86::rax, x86::r13);
|
c.cmp(x86::rax, x86::r13);
|
||||||
c.jne(fail2);
|
c.jne(fail2);
|
||||||
|
|
||||||
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666);
|
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666, [&]()
|
||||||
|
{
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
|
||||||
|
});
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||||
|
|
||||||
@ -592,6 +601,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
||||||
c.lock().add(x86::qword_ptr(x86::rbx), 127);
|
c.lock().add(x86::qword_ptr(x86::rbx), 127);
|
||||||
c.mov(x86::eax, x86::r12d);
|
c.mov(x86::eax, x86::r12d);
|
||||||
c.jmp(_ret);
|
c.jmp(_ret);
|
||||||
@ -620,6 +630,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
||||||
c.jmp(fail2);
|
c.jmp(fail2);
|
||||||
|
|
||||||
c.bind(fall2);
|
c.bind(fall2);
|
||||||
@ -730,7 +741,10 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Begin transaction
|
// Begin transaction
|
||||||
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8);
|
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8, [&]()
|
||||||
|
{
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
|
||||||
|
});
|
||||||
c.xbegin(tx0);
|
c.xbegin(tx0);
|
||||||
c.test(x86::qword_ptr(x86::rbx), vm::rsrv_unique_lock);
|
c.test(x86::qword_ptr(x86::rbx), vm::rsrv_unique_lock);
|
||||||
c.jnz(skip);
|
c.jnz(skip);
|
||||||
@ -756,11 +770,13 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
|||||||
|
|
||||||
c.sub(x86::qword_ptr(x86::rbx), -128);
|
c.sub(x86::qword_ptr(x86::rbx), -128);
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
|
||||||
c.mov(x86::eax, 1);
|
c.mov(x86::eax, 1);
|
||||||
c.jmp(_ret);
|
c.jmp(_ret);
|
||||||
|
|
||||||
c.bind(skip);
|
c.bind(skip);
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
|
||||||
//c.jmp(fall);
|
//c.jmp(fall);
|
||||||
|
|
||||||
c.bind(fall);
|
c.bind(fall);
|
||||||
@ -783,7 +799,11 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
|||||||
c.test(x86::eax, vm::rsrv_unique_lock);
|
c.test(x86::eax, vm::rsrv_unique_lock);
|
||||||
c.jnz(fall2);
|
c.jnz(fall2);
|
||||||
|
|
||||||
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666);
|
Label tx1 = build_transaction_enter(c, fall2, x86::r12d, 666, [&]()
|
||||||
|
{
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
|
||||||
|
});
|
||||||
|
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||||
|
|
||||||
@ -815,6 +835,7 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
|
||||||
c.lock().add(x86::qword_ptr(x86::rbx), 127);
|
c.lock().add(x86::qword_ptr(x86::rbx), 127);
|
||||||
c.mov(x86::eax, x86::r12d);
|
c.mov(x86::eax, x86::r12d);
|
||||||
c.jmp(_ret);
|
c.jmp(_ret);
|
||||||
@ -884,7 +905,10 @@ const extern auto spu_getllar_tx = build_function_asm<u32(*)(u32 raddr, void* rd
|
|||||||
c.mov(x86::r13, args[1]);
|
c.mov(x86::r13, args[1]);
|
||||||
|
|
||||||
// Begin transaction
|
// Begin transaction
|
||||||
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8);
|
Label tx0 = build_transaction_enter(c, fall, x86::r12d, 8, [&]()
|
||||||
|
{
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
|
||||||
|
});
|
||||||
|
|
||||||
// Check pause flag
|
// Check pause flag
|
||||||
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
|
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
|
||||||
@ -916,6 +940,7 @@ const extern auto spu_getllar_tx = build_function_asm<u32(*)(u32 raddr, void* rd
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.xend();
|
c.xend();
|
||||||
|
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
|
||||||
|
|
||||||
// Store data
|
// Store data
|
||||||
if (s_tsx_avx)
|
if (s_tsx_avx)
|
||||||
|
@ -753,6 +753,9 @@ public:
|
|||||||
|
|
||||||
u64 saved_native_sp = 0; // Host thread's stack pointer for emulated longjmp
|
u64 saved_native_sp = 0; // Host thread's stack pointer for emulated longjmp
|
||||||
|
|
||||||
|
u64 ftx = 0; // Failed transactions
|
||||||
|
u64 stx = 0; // Succeeded transactions (pure counters)
|
||||||
|
|
||||||
std::array<v128, 0x4000> stack_mirror; // Return address information
|
std::array<v128, 0x4000> stack_mirror; // Return address information
|
||||||
|
|
||||||
const char* current_func{}; // Current STOP or RDCH blocking function
|
const char* current_func{}; // Current STOP or RDCH blocking function
|
||||||
|
Loading…
Reference in New Issue
Block a user