From a626ccfcad5b63cacbc0abe4686b81bc8e3f28d0 Mon Sep 17 00:00:00 2001 From: Eladash Date: Fri, 1 Sep 2023 19:38:06 +0300 Subject: [PATCH] SPU LLVM: Initial precompilation of tail-calls --- rpcs3/Emu/Cell/SPUAnalyser.h | 34 +++++++----- rpcs3/Emu/Cell/SPUDisAsm.h | 7 +++ rpcs3/Emu/Cell/SPURecompiler.cpp | 88 +++++++++++++++++++++++++++++++- rpcs3/Emu/Cell/SPUThread.cpp | 10 ++++ 4 files changed, 124 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUAnalyser.h b/rpcs3/Emu/Cell/SPUAnalyser.h index d448fca473..1d33fc7156 100644 --- a/rpcs3/Emu/Cell/SPUAnalyser.h +++ b/rpcs3/Emu/Cell/SPUAnalyser.h @@ -14,12 +14,13 @@ struct spu_itype static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values + static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR enum type : unsigned char { UNK = 0, - HEQ, + HEQ, // zregmod_tag first HEQI, HGT, HGTI, @@ -36,11 +37,21 @@ struct spu_itype NOP, SYNC, DSYNC, - MFSPR, MTSPR, + WRCH, + + STQD, // memory_tag first + STQX, + STQA, + STQR, // zregmod_tag last + LQD, + LQX, + LQA, + LQR, // memory_tag last + + MFSPR, RDCH, RCHCNT, - WRCH, BR, // branch_tag first BRA, @@ -59,15 +70,6 @@ struct spu_itype BIHZ, BIHNZ, // branch_tag last - LQD, // memory_tag first - LQX, - LQA, - LQR, - STQD, - STQX, - STQA, - STQR, // memory_tag last - ILH, // constant_tag_first ILHU, IL, @@ -267,7 +269,7 @@ struct spu_itype // Test for memory instruction friend constexpr bool operator &(type value, memory_tag) { - return value >= LQD && value <= STQR; + return value >= STQD && value <= LQR; } // Test for compare instruction @@ -293,6 +295,12 @@ struct spu_itype { return value >= ILH && value <= FSMBI; } + + // Test for non register-modifying instruction + friend constexpr bool operator &(type value, zregmod_tag) + { + return value >= HEQ && value <= STQR; + } }; struct spu_iflag diff --git a/rpcs3/Emu/Cell/SPUDisAsm.h b/rpcs3/Emu/Cell/SPUDisAsm.h index e891ed6fcc..0d5862025b 100644 --- a/rpcs3/Emu/Cell/SPUDisAsm.h +++ b/rpcs3/Emu/Cell/SPUDisAsm.h @@ -851,6 +851,13 @@ public: } void BR(spu_opcode_t op) { + if (op.rt && op.rt != 127u) + { + // Valid but makes no sense + DisAsm("br??", DisAsmBranchTarget(op.i16)); + return; + } + DisAsm("br", DisAsmBranchTarget(op.i16)); } void FSMBI(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 28e2f330c5..7ca2143bd2 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -2091,21 +2091,25 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/ std::vector spu_thread::discover_functions(u32 base_addr, std::span ls, bool is_known_addr, u32 /*entry*/) { std::vector calls; + std::vector branches; + calls.reserve(100); // Discover functions // Use the most simple method: search for instructions that calls them - // And then filter invalid cases (does not detect tail calls) + // And then filter invalid cases + // TODO: Does not detect jumptables or fixed-addr indirect calls const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax); for (u32 i = utils::align(base_addr, 0x10); i < std::min(base_addr + ls.size(), 0x3FFF0); i += 0x10) { - // Search for BRSL LR and BRASL LR + // Search for BRSL LR and BRASL LR or BR // TODO: BISL const v128 inst = read_from_ptr>(ls.data(), i - base_addr); const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7))); const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23)); const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask); + const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23)); const v128 result = eq_brsl | eq_brasl; if (!gv_testz(result)) @@ -2118,6 +2122,17 @@ std::vector spu_thread::discover_functions(u32 base_addr, std::span spu_thread::discover_functions(u32 base_addr, std::span addrs; for (u32 addr : calls) @@ -2142,6 +2163,69 @@ std::vector spu_thread::discover_functions(u32 base_addr, std::span>(ls, addr - base_addr)}; + + const u32 func = op_branch_targets(addr, op)[0]; + + if (func == umax || addr + 4 == func || func == addr || !addr) + { + continue; + } + + // Search for AI R1, +x or OR R3/4, Rx, 0 + // Reasoning: AI R1, +x means stack pointer restoration, branch after that is likely a tail call + // R3 and R4 are common function arguments because they are the first two + for (u32 back = addr - 4, it = 5; it && back >= base_addr; back -= 4) + { + const spu_opcode_t test_op{read_from_ptr>(ls, back - base_addr)}; + const auto type = g_spu_itype.decode(test_op.opcode); + + if (type & spu_itype::branch) + { + break; + } + + bool is_tail = false; + + if (type == spu_itype::AI && test_op.rt == 1u && test_op.ra == 1u) + { + if (test_op.si10 <= 0) + { + break; + } + + is_tail = true; + } + else if (!(type & spu_itype::zregmod)) + { + const u32 op_rt = type & spu_itype::_quadrop ? +test_op.rt4 : +test_op.rt; + + if (op_rt >= 80u && (type != spu_itype::LQD || test_op.ra != 1u)) + { + // Modifying non-volatile registers, not a call (and not context restoration) + break; + } + + //is_tail = op_rt == 3u || op_rt == 4u; + } + + if (!is_tail) + { + continue; + } + + if (std::count(addrs.begin(), addrs.end(), func)) + { + break; + } + + addrs.push_back(func); + break; + } + } + std::sort(addrs.begin(), addrs.end()); return addrs; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index c6fce77462..6daba511b4 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -4042,8 +4042,18 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add return false; } + if (type == spu_itype::STOP && op.rb) + { + return false; + } + if (type & spu_itype::branch) { + if (type == spu_itype::BR && op.rt && op.rt != 127u) + { + return false; + } + const auto results = op_branch_targets(addr, spu_opcode_t{op}); if (results[0] == umax)