mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
PPU Analyzer: Analyze whole segment 0 when patches are applied
Improve greedy instruction search.
This commit is contained in:
parent
68c70dd1b9
commit
82c5c4d285
@ -67,6 +67,8 @@ void pad_info::save(utils::serial& ar)
|
||||
USING_SERIALIZATION_VERSION(sys_io);
|
||||
|
||||
ar(max_connect, port_setting);
|
||||
|
||||
sys_io_serialize(ar);
|
||||
}
|
||||
|
||||
extern void send_sys_io_connect_event(u32 index, u32 state);
|
||||
|
@ -1390,9 +1390,9 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
block.second = _ptr.addr() - block.first;
|
||||
break;
|
||||
}
|
||||
else if (type == ppu_itype::TW || type == ppu_itype::TWI || type == ppu_itype::TD || type == ppu_itype::TDI)
|
||||
else if (type & ppu_itype::trap)
|
||||
{
|
||||
if (op.opcode != ppu_instructions::TRAP())
|
||||
if (op.bo != 31)
|
||||
{
|
||||
add_block(_ptr.addr());
|
||||
}
|
||||
@ -1618,6 +1618,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
end = 0;
|
||||
}
|
||||
|
||||
u32 per_instruction_bytes = 0;
|
||||
|
||||
for (auto&& [_, func] : as_rvalue(fmap))
|
||||
{
|
||||
if (func.attr & ppu_attr::no_size && entry)
|
||||
@ -1636,6 +1638,7 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
block.attr = ppu_attr::no_size;
|
||||
}
|
||||
|
||||
per_instruction_bytes += utils::sub_saturate<u32>(lim, func.addr);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1716,11 +1719,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
u32 exp = start;
|
||||
u32 lim = end;
|
||||
|
||||
// Start with full scan (disabled for PRX for now)
|
||||
if (entry)
|
||||
{
|
||||
block_queue.emplace_back(exp, lim);
|
||||
}
|
||||
// Start with full scan
|
||||
block_queue.emplace_back(exp, lim);
|
||||
|
||||
// Add entries from patches (on per-instruction basis)
|
||||
for (u32 addr : applied)
|
||||
@ -1754,14 +1754,17 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
{
|
||||
u32 i_pos = exp;
|
||||
|
||||
u32 block_edges[16];
|
||||
u32 edge_count = 0;
|
||||
|
||||
bool is_good = true;
|
||||
bool is_fallback = true;
|
||||
|
||||
for (; i_pos < lim; i_pos += 4)
|
||||
{
|
||||
const u32 opc = get_ref<u32>(i_pos);
|
||||
const ppu_opcode_t op{get_ref<u32>(i_pos)};
|
||||
|
||||
switch (auto type = s_ppu_itype.decode(opc))
|
||||
switch (auto type = s_ppu_itype.decode(op.opcode))
|
||||
{
|
||||
case ppu_itype::UNK:
|
||||
case ppu_itype::ECIWX:
|
||||
@ -1771,10 +1774,20 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
case ppu_itype::TD:
|
||||
case ppu_itype::TDI:
|
||||
case ppu_itype::TW:
|
||||
case ppu_itype::TWI:
|
||||
{
|
||||
if (op.ra == 1u || op.ra == 13u || op.ra == 2u)
|
||||
{
|
||||
// Non-user registers, checking them against a constant value makes no sense
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
[[fallthrough]];
|
||||
}
|
||||
case ppu_itype::TD:
|
||||
case ppu_itype::TW:
|
||||
case ppu_itype::B:
|
||||
case ppu_itype::BC:
|
||||
{
|
||||
@ -1785,14 +1798,14 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
|
||||
if (type == ppu_itype::B || type == ppu_itype::BC)
|
||||
{
|
||||
if (entry == 0 && ppu_opcode_t{opc}.aa)
|
||||
if (entry == 0 && op.aa)
|
||||
{
|
||||
// Ignore absolute branches in PIC (PRX)
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 target = (opc & 2 ? 0 : i_pos) + (type == ppu_itype::B ? +ppu_opcode_t{opc}.bt24 : +ppu_opcode_t{opc}.bt14);
|
||||
const u32 target = (op.aa ? 0 : i_pos) + (type == ppu_itype::B ? +op.bt24 : +op.bt14);
|
||||
|
||||
if (target < segs[0].addr || target >= segs[0].addr + segs[0].size)
|
||||
{
|
||||
@ -1801,9 +1814,43 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
break;
|
||||
}
|
||||
|
||||
const ppu_opcode_t test_op{get_ref<u32>(target)};
|
||||
const auto type0 = s_ppu_itype.decode(test_op.opcode);
|
||||
|
||||
if (type0 == ppu_itype::UNK)
|
||||
{
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Test another instruction just in case (testing more is unlikely to improve results by much)
|
||||
if (!(type0 & ppu_itype::branch))
|
||||
{
|
||||
if (target + 4 >= segs[0].addr + segs[0].size)
|
||||
{
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
const auto type1 = s_ppu_itype.decode(get_ref<u32>(target + 4));
|
||||
|
||||
if (type1 == ppu_itype::UNK)
|
||||
{
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (u32 target0 = (test_op.aa ? 0 : target) + (type == ppu_itype::B ? +test_op.bt24 : +test_op.bt14);
|
||||
target0 < segs[0].addr || target0 >= segs[0].addr + segs[0].size)
|
||||
{
|
||||
// Sanity check
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (target != i_pos && !fmap.contains(target))
|
||||
{
|
||||
if (block_set.count(target) == 0)
|
||||
if (block_set.count(target) == 0 && std::count(block_edges, block_edges + edge_count, target) == 0)
|
||||
{
|
||||
ppu_log.trace("Block target found: 0x%x (i_pos=0x%x)", target, i_pos);
|
||||
block_queue.emplace_back(target, 0);
|
||||
@ -1818,27 +1865,38 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
case ppu_itype::BCLR:
|
||||
case ppu_itype::SC:
|
||||
{
|
||||
if (type == ppu_itype::SC && opc != ppu_instructions::SC(0))
|
||||
if (type == ppu_itype::SC && op.opcode != ppu_instructions::SC(0))
|
||||
{
|
||||
// Strict garbage filter
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (type == ppu_itype::BCCTR && opc & 0xe000)
|
||||
if (type == ppu_itype::BCCTR && op.opcode & 0xe000)
|
||||
{
|
||||
// Garbage filter
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (type == ppu_itype::BCLR && opc & 0xe000)
|
||||
if (type == ppu_itype::BCLR && op.opcode & 0xe000)
|
||||
{
|
||||
// Garbage filter
|
||||
is_good = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((type & ppu_itype::branch && op.lk) || type & ppu_itype::trap || type == ppu_itype::BC)
|
||||
{
|
||||
// if farther instructions are valid: register all blocks
|
||||
// Otherwise, register none (all or nothing)
|
||||
if (edge_count < std::size(block_edges))
|
||||
{
|
||||
block_edges[edge_count++] = i_pos + 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Good block terminator found, add single block
|
||||
break;
|
||||
}
|
||||
@ -1869,17 +1927,23 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
|
||||
if (is_good)
|
||||
{
|
||||
auto& block = fmap[exp];
|
||||
|
||||
if (!block.addr)
|
||||
for (u32 it = 0, prev_addr = exp; it <= edge_count; it++)
|
||||
{
|
||||
block.addr = exp;
|
||||
block.size = i_pos - exp;
|
||||
ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
|
||||
const u32 block_end = it < edge_count ? block_edges[it] : i_pos;
|
||||
const u32 block_begin = std::exchange(prev_addr, block_end);
|
||||
|
||||
if (get_limit(exp) == end)
|
||||
auto& block = fmap[block_begin];
|
||||
|
||||
if (!block.addr)
|
||||
{
|
||||
block.attr += ppu_attr::no_size;
|
||||
block.addr = block_begin;
|
||||
block.size = block_end - block_begin;
|
||||
ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
|
||||
|
||||
if (get_limit(block_begin) == end)
|
||||
{
|
||||
block.attr += ppu_attr::no_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1902,9 +1966,8 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
// Convert map to vector (destructive)
|
||||
for (auto&& [_, block] : as_rvalue(std::move(fmap)))
|
||||
{
|
||||
if (block.attr & ppu_attr::no_size && block.size > 4 && entry)
|
||||
if (block.attr & ppu_attr::no_size && block.size > 4)
|
||||
{
|
||||
// Disabled for PRX for now
|
||||
ppu_log.warning("Block 0x%x will be compiled on per-instruction basis (size=0x%x)", block.addr, block.size);
|
||||
|
||||
for (u32 addr = block.addr; addr < block.addr + block.size; addr += 4)
|
||||
@ -1916,12 +1979,19 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
|
||||
i.attr = ppu_attr::no_size;
|
||||
}
|
||||
|
||||
per_instruction_bytes += block.size;
|
||||
continue;
|
||||
}
|
||||
|
||||
funcs.emplace_back(std::move(block));
|
||||
}
|
||||
|
||||
if (per_instruction_bytes)
|
||||
{
|
||||
const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= utils::aligned_div<u32>(funcs.size(), 128);
|
||||
(error ? ppu_log.error : ppu_log.notice)("%d instructions will be compiled on per-instruction basis in total", per_instruction_bytes / 4);
|
||||
}
|
||||
|
||||
ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());
|
||||
return true;
|
||||
}
|
||||
|
@ -263,6 +263,7 @@ struct ppu_pattern_matrix
|
||||
struct ppu_itype
|
||||
{
|
||||
static constexpr struct branch_tag{} branch{}; // Branch Instructions
|
||||
static constexpr struct trap_tag{} trap{}; // Branch Instructions
|
||||
|
||||
enum type
|
||||
{
|
||||
@ -425,8 +426,6 @@ struct ppu_itype
|
||||
VUPKLSB,
|
||||
VUPKLSH,
|
||||
VXOR,
|
||||
TDI,
|
||||
TWI,
|
||||
MULLI,
|
||||
SUBFIC,
|
||||
CMPLI,
|
||||
@ -461,7 +460,6 @@ struct ppu_itype
|
||||
RLDCL,
|
||||
RLDCR,
|
||||
CMP,
|
||||
TW,
|
||||
LVSL,
|
||||
LVEBX,
|
||||
SUBFC,
|
||||
@ -488,7 +486,6 @@ struct ppu_itype
|
||||
LWZUX,
|
||||
CNTLZD,
|
||||
ANDC,
|
||||
TD,
|
||||
LVEWX,
|
||||
MULHD,
|
||||
MULHW,
|
||||
@ -784,6 +781,11 @@ struct ppu_itype
|
||||
BC,
|
||||
BCLR,
|
||||
BCCTR, // branch_tag last
|
||||
|
||||
TD, // trap_tag first
|
||||
TW,
|
||||
TDI,
|
||||
TWI, // trap_tag last
|
||||
};
|
||||
|
||||
// Enable address-of operator for ppu_decoder<>
|
||||
@ -796,6 +798,11 @@ struct ppu_itype
|
||||
{
|
||||
return value >= B && value <= BCCTR;
|
||||
}
|
||||
|
||||
friend constexpr bool operator &(type value, trap_tag)
|
||||
{
|
||||
return value >= TD && value <= TWI;
|
||||
}
|
||||
};
|
||||
|
||||
struct ppu_iname
|
||||
|
@ -1356,12 +1356,6 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo
|
||||
// Initialize executable code if necessary
|
||||
if (prog.p_flags & 0x1 && !virtual_load)
|
||||
{
|
||||
if (ar)
|
||||
{
|
||||
// Disable analysis optimization for savestates (it's not compatible with savestate with patches applied)
|
||||
end = std::max(end, utils::align<u32>(addr + mem_size, 0x10000));
|
||||
}
|
||||
|
||||
ppu_register_range(addr, mem_size);
|
||||
}
|
||||
}
|
||||
@ -1651,6 +1645,36 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo
|
||||
}
|
||||
}
|
||||
|
||||
// Disabled for PRX for now (problematic and does not seem to have any benefit)
|
||||
end = 0;
|
||||
|
||||
if (!applied.empty() || ar)
|
||||
{
|
||||
// Compare memory changes in memory after executable code sections end
|
||||
if (end >= prx->segs[0].addr && end < prx->segs[0].addr + prx->segs[0].size)
|
||||
{
|
||||
for (const auto& prog : elf.progs)
|
||||
{
|
||||
// Find the first segment
|
||||
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
|
||||
{
|
||||
std::basic_string_view<uchar> elf_memory{prog.bin.data(), prog.bin.size()};
|
||||
elf_memory.remove_prefix(end - prx->segs[0].addr);
|
||||
|
||||
if (elf_memory != std::basic_string_view<uchar>{&prx->get_ref<uchar>(end), elf_memory.size()})
|
||||
{
|
||||
// There are changes, disable analysis optimization
|
||||
ppu_loader.notice("Disabling analysis optimization due to memory changes from original file");
|
||||
|
||||
end = 0;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Embedded SPU elf patching
|
||||
for (const auto& seg : prx->segs)
|
||||
{
|
||||
@ -1910,12 +1934,6 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
||||
// Initialize executable code if necessary
|
||||
if (prog.p_flags & 0x1 && !virtual_load)
|
||||
{
|
||||
if (already_loaded && ar)
|
||||
{
|
||||
// Disable analysis optimization for savestates (it's not compatible with savestate with patches applied)
|
||||
end = std::max(end, utils::align<u32>(addr + size, 0x10000));
|
||||
}
|
||||
|
||||
ppu_register_range(addr, size);
|
||||
}
|
||||
}
|
||||
@ -1969,6 +1987,33 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
||||
applied += g_fxo->get<patch_engine>().apply(Emu.GetTitleID() + '-' + hash, [&](u32 addr, u32 size) { return _main.get_ptr<u8>(addr, size); });
|
||||
}
|
||||
|
||||
if (!applied.empty() || ar)
|
||||
{
|
||||
// Compare memory changes in memory after executable code sections end
|
||||
if (end >= _main.segs[0].addr && end < _main.segs[0].addr + _main.segs[0].size)
|
||||
{
|
||||
for (const auto& prog : elf.progs)
|
||||
{
|
||||
// Find the first segment
|
||||
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
|
||||
{
|
||||
std::basic_string_view<uchar> elf_memory{prog.bin.data(), prog.bin.size()};
|
||||
elf_memory.remove_prefix(end - _main.segs[0].addr);
|
||||
|
||||
if (elf_memory != std::basic_string_view<uchar>{&_main.get_ref<u8>(end), elf_memory.size()})
|
||||
{
|
||||
// There are changes, disable analysis optimization
|
||||
ppu_loader.notice("Disabling analysis optimization due to memory changes from original file");
|
||||
|
||||
end = 0;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (applied.empty())
|
||||
{
|
||||
ppu_loader.warning("PPU executable hash: %s", hash);
|
||||
@ -2574,12 +2619,6 @@ std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_ex
|
||||
// Initialize executable code if necessary
|
||||
if (prog.p_flags & 0x1 && !virtual_load)
|
||||
{
|
||||
if (ar)
|
||||
{
|
||||
// Disable analysis optimization for savestates (it's not compatible with savestate with patches applied)
|
||||
end = std::max(end, utils::align<u32>(addr + size, 0x10000));
|
||||
}
|
||||
|
||||
ppu_register_range(addr, size);
|
||||
}
|
||||
}
|
||||
@ -2631,6 +2670,33 @@ std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_ex
|
||||
applied += g_fxo->get<patch_engine>().apply(Emu.GetTitleID() + '-' + hash, [ovlm](u32 addr, u32 size) { return ovlm->get_ptr<u8>(addr, size); });
|
||||
}
|
||||
|
||||
if (!applied.empty() || ar)
|
||||
{
|
||||
// Compare memory changes in memory after executable code sections end
|
||||
if (end >= ovlm->segs[0].addr && end < ovlm->segs[0].addr + ovlm->segs[0].size)
|
||||
{
|
||||
for (const auto& prog : elf.progs)
|
||||
{
|
||||
// Find the first segment
|
||||
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
|
||||
{
|
||||
std::basic_string_view<uchar> elf_memory{prog.bin.data(), prog.bin.size()};
|
||||
elf_memory.remove_prefix(end - ovlm->segs[0].addr);
|
||||
|
||||
if (elf_memory != std::basic_string_view<uchar>{&ovlm->get_ref<u8>(end), elf_memory.size()})
|
||||
{
|
||||
// There are changes, disable analysis optimization
|
||||
ppu_loader.notice("Disabling analysis optimization due to memory changes from original file");
|
||||
|
||||
end = 0;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Embedded SPU elf patching
|
||||
for (const auto& seg : ovlm->segs)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user