mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
SPU LLVM: Initial precompilation of tail-calls
This commit is contained in:
parent
a9810ccb72
commit
a626ccfcad
@ -14,12 +14,13 @@ struct spu_itype
|
||||
static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions
|
||||
static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions
|
||||
static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values
|
||||
static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR
|
||||
|
||||
enum type : unsigned char
|
||||
{
|
||||
UNK = 0,
|
||||
|
||||
HEQ,
|
||||
HEQ, // zregmod_tag first
|
||||
HEQI,
|
||||
HGT,
|
||||
HGTI,
|
||||
@ -36,11 +37,21 @@ struct spu_itype
|
||||
NOP,
|
||||
SYNC,
|
||||
DSYNC,
|
||||
MFSPR,
|
||||
MTSPR,
|
||||
WRCH,
|
||||
|
||||
STQD, // memory_tag first
|
||||
STQX,
|
||||
STQA,
|
||||
STQR, // zregmod_tag last
|
||||
LQD,
|
||||
LQX,
|
||||
LQA,
|
||||
LQR, // memory_tag last
|
||||
|
||||
MFSPR,
|
||||
RDCH,
|
||||
RCHCNT,
|
||||
WRCH,
|
||||
|
||||
BR, // branch_tag first
|
||||
BRA,
|
||||
@ -59,15 +70,6 @@ struct spu_itype
|
||||
BIHZ,
|
||||
BIHNZ, // branch_tag last
|
||||
|
||||
LQD, // memory_tag first
|
||||
LQX,
|
||||
LQA,
|
||||
LQR,
|
||||
STQD,
|
||||
STQX,
|
||||
STQA,
|
||||
STQR, // memory_tag last
|
||||
|
||||
ILH, // constant_tag_first
|
||||
ILHU,
|
||||
IL,
|
||||
@ -267,7 +269,7 @@ struct spu_itype
|
||||
// Test for memory instruction
|
||||
friend constexpr bool operator &(type value, memory_tag)
|
||||
{
|
||||
return value >= LQD && value <= STQR;
|
||||
return value >= STQD && value <= LQR;
|
||||
}
|
||||
|
||||
// Test for compare instruction
|
||||
@ -293,6 +295,12 @@ struct spu_itype
|
||||
{
|
||||
return value >= ILH && value <= FSMBI;
|
||||
}
|
||||
|
||||
// Test for non register-modifying instruction
|
||||
friend constexpr bool operator &(type value, zregmod_tag)
|
||||
{
|
||||
return value >= HEQ && value <= STQR;
|
||||
}
|
||||
};
|
||||
|
||||
struct spu_iflag
|
||||
|
@ -851,6 +851,13 @@ public:
|
||||
}
|
||||
void BR(spu_opcode_t op)
|
||||
{
|
||||
if (op.rt && op.rt != 127u)
|
||||
{
|
||||
// Valid but makes no sense
|
||||
DisAsm("br??", DisAsmBranchTarget(op.i16));
|
||||
return;
|
||||
}
|
||||
|
||||
DisAsm("br", DisAsmBranchTarget(op.i16));
|
||||
}
|
||||
void FSMBI(spu_opcode_t op)
|
||||
|
@ -2091,21 +2091,25 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/
|
||||
std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/)
|
||||
{
|
||||
std::vector<u32> calls;
|
||||
std::vector<u32> branches;
|
||||
|
||||
calls.reserve(100);
|
||||
|
||||
// Discover functions
|
||||
// Use the most simple method: search for instructions that calls them
|
||||
// And then filter invalid cases (does not detect tail calls)
|
||||
// And then filter invalid cases
|
||||
// TODO: Does not detect jumptables or fixed-addr indirect calls
|
||||
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
|
||||
|
||||
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
|
||||
{
|
||||
// Search for BRSL LR and BRASL LR
|
||||
// Search for BRSL LR and BRASL LR or BR
|
||||
// TODO: BISL
|
||||
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
||||
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
|
||||
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
||||
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
||||
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
|
||||
const v128 result = eq_brsl | eq_brasl;
|
||||
|
||||
if (!gv_testz(result))
|
||||
@ -2118,6 +2122,17 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!gv_testz(eq_br))
|
||||
{
|
||||
for (u32 j = 0; j < 4; j++)
|
||||
{
|
||||
if (eq_br.u32r[j])
|
||||
{
|
||||
branches.push_back(i + j * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller)
|
||||
@ -2126,6 +2141,12 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||
return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr);
|
||||
}), calls.end());
|
||||
|
||||
branches.erase(std::remove_if(branches.begin(), branches.end(), [&](u32 caller)
|
||||
{
|
||||
// Check the validity of the callee code
|
||||
return !is_exec_code(caller, ls, base_addr);
|
||||
}), branches.end());
|
||||
|
||||
std::vector<u32> addrs;
|
||||
|
||||
for (u32 addr : calls)
|
||||
@ -2142,6 +2163,69 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||
addrs.push_back(func);
|
||||
}
|
||||
|
||||
for (u32 addr : branches)
|
||||
{
|
||||
const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls, addr - base_addr)};
|
||||
|
||||
const u32 func = op_branch_targets(addr, op)[0];
|
||||
|
||||
if (func == umax || addr + 4 == func || func == addr || !addr)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Search for AI R1, +x or OR R3/4, Rx, 0
|
||||
// Reasoning: AI R1, +x means stack pointer restoration, branch after that is likely a tail call
|
||||
// R3 and R4 are common function arguments because they are the first two
|
||||
for (u32 back = addr - 4, it = 5; it && back >= base_addr; back -= 4)
|
||||
{
|
||||
const spu_opcode_t test_op{read_from_ptr<be_t<u32>>(ls, back - base_addr)};
|
||||
const auto type = g_spu_itype.decode(test_op.opcode);
|
||||
|
||||
if (type & spu_itype::branch)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
bool is_tail = false;
|
||||
|
||||
if (type == spu_itype::AI && test_op.rt == 1u && test_op.ra == 1u)
|
||||
{
|
||||
if (test_op.si10 <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
is_tail = true;
|
||||
}
|
||||
else if (!(type & spu_itype::zregmod))
|
||||
{
|
||||
const u32 op_rt = type & spu_itype::_quadrop ? +test_op.rt4 : +test_op.rt;
|
||||
|
||||
if (op_rt >= 80u && (type != spu_itype::LQD || test_op.ra != 1u))
|
||||
{
|
||||
// Modifying non-volatile registers, not a call (and not context restoration)
|
||||
break;
|
||||
}
|
||||
|
||||
//is_tail = op_rt == 3u || op_rt == 4u;
|
||||
}
|
||||
|
||||
if (!is_tail)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::count(addrs.begin(), addrs.end(), func))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
addrs.push_back(func);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(addrs.begin(), addrs.end());
|
||||
|
||||
return addrs;
|
||||
|
@ -4042,8 +4042,18 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
|
||||
return false;
|
||||
}
|
||||
|
||||
if (type == spu_itype::STOP && op.rb)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (type & spu_itype::branch)
|
||||
{
|
||||
if (type == spu_itype::BR && op.rt && op.rt != 127u)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto results = op_branch_targets(addr, spu_opcode_t{op});
|
||||
|
||||
if (results[0] == umax)
|
||||
|
Loading…
Reference in New Issue
Block a user