1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 04:02:42 +01:00

SPU Analyzer: Fix dereferencing freed reference

This commit is contained in:
Elad Ashkenazi 2024-09-13 16:05:38 +03:00 committed by Elad
parent 743f9a9f51
commit 8131f40269

View File

@ -5028,8 +5028,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
} }
auto& vregs = is_form_block ? infos[bpc]->local_state : *true_state_walkby; auto& vregs = is_form_block ? infos[bpc]->local_state : *true_state_walkby;
auto& atomic16 = is_pattern_match ? ::at32(reg_state_it, wi).atomic16 : dummy16; const auto atomic16 = is_pattern_match ? &::at32(reg_state_it, wi).atomic16 : &dummy16;
auto& rchcnt_loop = is_pattern_match ? ::at32(reg_state_it, wi).rchcnt_loop : dummy_loop; const auto rchcnt_loop = is_pattern_match ? &::at32(reg_state_it, wi).rchcnt_loop : &dummy_loop;
const u32 pos = wa; const u32 pos = wa;
@ -5153,8 +5153,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
const auto break_all_patterns = [&](u32 cause) const auto break_all_patterns = [&](u32 cause)
{ {
break_putllc16(cause, atomic16.discard()); break_putllc16(cause, atomic16->discard());
break_channel_pattern(cause, rchcnt_loop.discard()); break_channel_pattern(cause, rchcnt_loop->discard());
}; };
const auto calculate_absolute_ls_difference = [](u32 addr1, u32 addr2) const auto calculate_absolute_ls_difference = [](u32 addr1, u32 addr2)
@ -5429,6 +5429,18 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
} }
} }
if (wi != stackframe_it || may_return || !insert_entry)
{
// Possible function end
if (rchcnt_loop->active)
{
// Does not post-dominates channel reads
auto& pair = rchcnt_loop_all[rchcnt_loop->read_pc];
pair.failed = true;
pair.active = false;
}
}
// Backup analyser information // Backup analyser information
const auto atomic16_info = reg_state_it[stackframe_it].atomic16; const auto atomic16_info = reg_state_it[stackframe_it].atomic16;
const auto rchcnt_loop_info = reg_state_it[stackframe_it].rchcnt_loop; const auto rchcnt_loop_info = reg_state_it[stackframe_it].rchcnt_loop;
@ -5450,18 +5462,6 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
} }
} }
if (wi != stackframe_it || may_return || !insert_entry)
{
// Possible function end
if (rchcnt_loop.active)
{
// Does not post-dominates channel reads
auto& pair = rchcnt_loop_all[rchcnt_loop.read_pc];
pair.failed = true;
pair.active = false;
}
}
if (insert_entry) if (insert_entry)
{ {
const usz target_size = get_block_targets(stackframe_pc).size(); const usz target_size = get_block_targets(stackframe_pc).size();
@ -5484,10 +5484,10 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
break_putllc16(39, atomic16_info); break_putllc16(39, atomic16_info);
} }
if (rchcnt_loop.active) if (rchcnt_loop_info.active)
{ {
// Does not post-dominates channel read // Does not post-dominates channel read
auto& pair = rchcnt_loop_all[rchcnt_loop.read_pc]; auto& pair = rchcnt_loop_all[rchcnt_loop_info.read_pc];
pair.failed = true; pair.failed = true;
pair.active = false; pair.active = false;
} }
@ -5648,20 +5648,20 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
} }
} }
if (atomic16.active) if (atomic16->active)
{ {
for (auto state : {&atomic16.lsa, &atomic16.ls, &atomic16.ls_offs}) for (auto state : {&atomic16->lsa, &atomic16->ls, &atomic16->ls_offs})
{ {
state->invalidate_if_created(pos); state->invalidate_if_created(pos);
} }
} }
if (rchcnt_loop.active) if (rchcnt_loop->active)
{ {
if (rchcnt_loop.origins.find_first_of(pos) != umax) if (rchcnt_loop->origins.find_first_of(pos) != umax)
{ {
rchcnt_loop.failed = true; rchcnt_loop->failed = true;
rchcnt_loop.active = false; rchcnt_loop->active = false;
} }
} }
@ -5719,7 +5719,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
dis_asm.last_opcode.pop_back(); dis_asm.last_opcode.pop_back();
} }
spu_log.always()("[SPU=0%x, it=%d] %s%s [%d]", pos, reg_state_it[wi].iterator_id, dis_asm.last_opcode, consts, atomic16.active); spu_log.always()("[SPU=0%x, it=%d] %s%s [%d]", pos, reg_state_it[wi].iterator_id, dis_asm.last_opcode, consts, atomic16->active);
} }
// Analyse instruction // Analyse instruction
@ -5798,22 +5798,22 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
const u32 next_pc = spu_branch_target(pos, 1); const u32 next_pc = spu_branch_target(pos, 1);
const u32 target = spu_branch_target(pos, op.i16); const u32 target = spu_branch_target(pos, op.i16);
if (rchcnt_loop.active) if (rchcnt_loop->active)
{ {
const reg_state_t& rt = vregs[op.rt]; const reg_state_t& rt = vregs[op.rt];
if (rt.is_instruction && (rchcnt_loop.ch_state.origin == rt.origin || rchcnt_loop.ch_product.origin == rt.origin)) if (rt.is_instruction && (rchcnt_loop->ch_state.origin == rt.origin || rchcnt_loop->ch_product.origin == rt.origin))
{ {
if (rchcnt_loop.conditioned) if (rchcnt_loop->conditioned)
{ {
// Let's not make it complicated, have a single branch determining the condition // Let's not make it complicated, have a single branch determining the condition
break_channel_pattern(54, rchcnt_loop.discard()); break_channel_pattern(54, rchcnt_loop->discard());
break; break;
} }
rchcnt_loop.conditioned = true; rchcnt_loop->conditioned = true;
rchcnt_loop.branch_pc = pos; rchcnt_loop->branch_pc = pos;
rchcnt_loop.branch_target = rchcnt_loop.product_test_negate != (type == spu_itype::BRZ) ? target : next_pc; rchcnt_loop->branch_target = rchcnt_loop->product_test_negate != (type == spu_itype::BRZ) ? target : next_pc;
break; break;
} }
} }
@ -5845,7 +5845,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
case spu_itype::WRCH: case spu_itype::WRCH:
{ {
break_channel_pattern(56, rchcnt_loop.discard()); break_channel_pattern(56, rchcnt_loop->discard());
switch (op.ra) switch (op.ra)
{ {
@ -5861,7 +5861,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (is_pattern_match) if (is_pattern_match)
{ {
atomic16.lsa_last_pc = pos; atomic16->lsa_last_pc = pos;
} }
break; break;
@ -5897,27 +5897,27 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
auto lsa = get_reg(s_reg_mfc_lsa); auto lsa = get_reg(s_reg_mfc_lsa);
vregs[s_reg_mfc_lsa] = old_lsa; vregs[s_reg_mfc_lsa] = old_lsa;
const u32 lsa_pc = atomic16.lsa_last_pc == SPU_LS_SIZE ? bpc : atomic16.lsa_last_pc; const u32 lsa_pc = atomic16->lsa_last_pc == SPU_LS_SIZE ? bpc : atomic16->lsa_last_pc;
if (atomic16.active) if (atomic16->active)
{ {
if (atomic16.lsa_pc != lsa_pc || atomic16.get_pc != pos || atomic16.lsa != lsa) if (atomic16->lsa_pc != lsa_pc || atomic16->get_pc != pos || atomic16->lsa != lsa)
{ {
break_putllc16(30, atomic16.discard()); break_putllc16(30, atomic16->discard());
} }
} }
// If LSA write has not happened, use block start // If LSA write has not happened, use block start
atomic16.lsa_pc = lsa_pc; atomic16->lsa_pc = lsa_pc;
atomic16.get_pc = pos; atomic16->get_pc = pos;
atomic16.active = true; atomic16->active = true;
atomic16.lsa = lsa; atomic16->lsa = lsa;
if (likely_putllc_loop) if (likely_putllc_loop)
{ {
// Register loop entry // Register loop entry
if (getllar_starts.emplace(atomic16.lsa_pc, false).second) if (getllar_starts.emplace(atomic16->lsa_pc, false).second)
{ {
g_fxo->get<putllc16_statistics_t>().all++; g_fxo->get<putllc16_statistics_t>().all++;
spu_log.notice("[0x%05x] GETLLAR pattern entry point", pos); spu_log.notice("[0x%05x] GETLLAR pattern entry point", pos);
@ -5928,17 +5928,17 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
} }
case MFC_PUTLLC_CMD: case MFC_PUTLLC_CMD:
{ {
if (atomic16.active) if (atomic16->active)
{ {
const auto _lsa = get_reg(s_reg_mfc_lsa); const auto _lsa = get_reg(s_reg_mfc_lsa);
// Search the value of LS address stoire/load in latest register file // Search the value of LS address stoire/load in latest register file
if (atomic16.ls_access && atomic16.ls_write && !atomic16.ls_pc_rel && !atomic16.ls.is_const()) if (atomic16->ls_access && atomic16->ls_write && !atomic16->ls_pc_rel && !atomic16->ls.is_const())
{ {
usz reg_it = umax; usz reg_it = umax;
u32 regs[2]{s_reg_max, s_reg_max}; u32 regs[2]{s_reg_max, s_reg_max};
for (auto val : {&atomic16.ls, &atomic16.ls_offs}) for (auto val : {&atomic16->ls, &atomic16->ls_offs})
{ {
reg_it++; reg_it++;
@ -5968,68 +5968,68 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (regs[0] == s_reg_max || regs[1] == s_reg_max) if (regs[0] == s_reg_max || regs[1] == s_reg_max)
{ {
break_putllc16(3, atomic16.discard()); break_putllc16(3, atomic16->discard());
break; break;
} }
atomic16.reg = regs[0]; atomic16->reg = regs[0];
if (!atomic16.ls_offs.is_const()) if (!atomic16->ls_offs.is_const())
{ {
atomic16.reg2 = regs[1]; atomic16->reg2 = regs[1];
} }
} }
if (atomic16.ls_access && atomic16.ls_write && !atomic16.lsa.compare_with_mask_indifference(_lsa, SPU_LS_MASK_128)) if (atomic16->ls_access && atomic16->ls_write && !atomic16->lsa.compare_with_mask_indifference(_lsa, SPU_LS_MASK_128))
{ {
// LSA latest value mismatches with the one written with GETLLAR // LSA latest value mismatches with the one written with GETLLAR
if (atomic16.lsa.flag != _lsa.flag) if (atomic16->lsa.flag != _lsa.flag)
{ {
break_putllc16(1, atomic16.discard()); break_putllc16(1, atomic16->discard());
} }
else else
{ {
break_putllc16(2, atomic16.discard()); break_putllc16(2, atomic16->discard());
} }
break; break;
} }
if (atomic16.ls_access && atomic16.ls_write) if (atomic16->ls_access && atomic16->ls_write)
{ {
atomic16.select_16_or_0_at_runtime = false; atomic16->select_16_or_0_at_runtime = false;
bool ok = false; bool ok = false;
if (atomic16.ls_pc_rel || !atomic16.ls_offs.is_const()) if (atomic16->ls_pc_rel || !atomic16->ls_offs.is_const())
{ {
// //
} }
else if (atomic16.lsa.is_const()) else if (atomic16->lsa.is_const())
{ {
if (atomic16.ls.is_const()) if (atomic16->ls.is_const())
{ {
if (atomic16.ls_offs.value != 0) if (atomic16->ls_offs.value != 0)
{ {
// Rebase constant so we can get rid of ls_offs // Rebase constant so we can get rid of ls_offs
atomic16.ls.value = spu_ls_target(atomic16.ls_offs.value + atomic16.ls.value); atomic16->ls.value = spu_ls_target(atomic16->ls_offs.value + atomic16->ls.value);
atomic16.ls_offs = reg_state_t::from_value(0); atomic16->ls_offs = reg_state_t::from_value(0);
} }
if (atomic16.ls.compare_with_mask_indifference(atomic16.lsa, SPU_LS_MASK_128)) if (atomic16->ls.compare_with_mask_indifference(atomic16->lsa, SPU_LS_MASK_128))
{ {
ok = true; ok = true;
} }
} }
else if (atomic16.ls_offs.compare_with_mask_indifference(atomic16.lsa, SPU_LS_MASK_128) && atomic16.ls.is_less_than(128 - (atomic16.ls_offs.value & 127))) else if (atomic16->ls_offs.compare_with_mask_indifference(atomic16->lsa, SPU_LS_MASK_128) && atomic16->ls.is_less_than(128 - (atomic16->ls_offs.value & 127)))
{ {
// Relative memory access with offset less than 128 bytes // Relative memory access with offset less than 128 bytes
// Common around SPU utilities which have less strict restrictions about memory alignment // Common around SPU utilities which have less strict restrictions about memory alignment
ok = true; ok = true;
} }
} }
else if (atomic16.lsa.compare_with_mask_indifference(atomic16.ls, SPU_LS_MASK_128) && atomic16.ls_offs == 0) else if (atomic16->lsa.compare_with_mask_indifference(atomic16->ls, SPU_LS_MASK_128) && atomic16->ls_offs == 0)
{ {
// Unknown value with known offset of less than 128 bytes // Unknown value with known offset of less than 128 bytes
ok = true; ok = true;
@ -6039,37 +6039,37 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{ {
// This is quite common.. let's try to select between putllc16 and putllc0 at runtime! // This is quite common.. let's try to select between putllc16 and putllc0 at runtime!
// break_putllc16(100); // break_putllc16(100);
// atomic16.discard(); // atomic16->discard();
// break; // break;
atomic16.select_16_or_0_at_runtime = true; atomic16->select_16_or_0_at_runtime = true;
} }
} }
if (!atomic16.get_rdatomic) if (!atomic16->get_rdatomic)
{ {
// MFC_RdAtomicStat must have been read, otherwise GETLLAR may not be executed (according to HW tests) // MFC_RdAtomicStat must have been read, otherwise GETLLAR may not be executed (according to HW tests)
break_putllc16(21, atomic16.discard()); break_putllc16(21, atomic16->discard());
} }
atomic16.put_pc = pos; atomic16->put_pc = pos;
atomic16.put_active = true; atomic16->put_active = true;
} }
break; break;
} }
default: default:
{ {
break_putllc16(4, atomic16.discard()); break_putllc16(4, atomic16->discard());
break; break;
} }
} }
} }
else else
{ {
break_putllc16(5, atomic16.discard()); break_putllc16(5, atomic16->discard());
} }
if (!atomic16.active) if (!atomic16->active)
{ {
// Propagate failure // Propagate failure
for (auto& atm : atomic16_all) for (auto& atm : atomic16_all)
@ -6113,7 +6113,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (it != rchcnt_loop_all.end()) if (it != rchcnt_loop_all.end())
{ {
if (rchcnt_loop.failed || !rchcnt_loop.conditioned || rchcnt_loop.read_pc != pos) if (rchcnt_loop->failed || !rchcnt_loop->conditioned || rchcnt_loop->read_pc != pos)
{ {
// Propagate faiure // Propagate faiure
it->second.failed = true; it->second.failed = true;
@ -6125,14 +6125,14 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
it->second.active = false; it->second.active = false;
} }
rchcnt_loop.active = false; rchcnt_loop->active = false;
} }
if (rchcnt_loop.active) if (rchcnt_loop->active)
{ {
if (rchcnt_loop.read_pc != pos) if (rchcnt_loop->read_pc != pos)
{ {
break_channel_pattern(53, rchcnt_loop.discard()); break_channel_pattern(53, rchcnt_loop->discard());
} }
} }
@ -6145,13 +6145,13 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
break; break;
} }
if (atomic16.active) if (atomic16->active)
{ {
if (atomic16.put_active) if (atomic16->put_active)
{ {
if (getllar_starts.contains(atomic16.lsa_pc) && getllar_starts[atomic16.lsa_pc]) if (getllar_starts.contains(atomic16->lsa_pc) && getllar_starts[atomic16->lsa_pc])
{ {
break_putllc16(24, atomic16.discard()); break_putllc16(24, atomic16->discard());
break; break;
} }
@ -6178,53 +6178,53 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
return a.second == b.second; return a.second == b.second;
}; };
if (existing.lsa_pc != atomic16.lsa_pc || existing.put_pc != atomic16.put_pc || !existing.lsa.compare_with_mask_indifference(atomic16.lsa, SPU_LS_MASK_128)) if (existing.lsa_pc != atomic16->lsa_pc || existing.put_pc != atomic16->put_pc || !existing.lsa.compare_with_mask_indifference(atomic16->lsa, SPU_LS_MASK_128))
{ {
// Register twice // Register twice
break_putllc16(22, atomic16.discard()); break_putllc16(22, atomic16->discard());
break_putllc16(22, existing.discard()); break_putllc16(22, existing.discard());
} }
if (existing.active && existing.ls_access && atomic16.ls_access && (!compare_tag_and_reg({&existing.ls, existing.reg}, {&atomic16.ls, atomic16.reg}) || existing.ls_offs != atomic16.ls_offs || existing.reg2 != atomic16.reg2)) if (existing.active && existing.ls_access && atomic16->ls_access && (!compare_tag_and_reg({&existing.ls, existing.reg}, {&atomic16->ls, atomic16->reg}) || existing.ls_offs != atomic16->ls_offs || existing.reg2 != atomic16->reg2))
{ {
// Conflicting loads with stores in more than one code path // Conflicting loads with stores in more than one code path
break_putllc16(27, atomic16.set_invalid_ls(existing.ls_write || atomic16.ls_write)); break_putllc16(27, atomic16->set_invalid_ls(existing.ls_write || atomic16->ls_write));
if (!atomic16.active) if (!atomic16->active)
{ {
existing.active = false; existing.active = false;
} }
} }
if (existing.active && (existing.ls_write || atomic16.ls_write) && (existing.ls_invalid || atomic16.ls_invalid)) if (existing.active && (existing.ls_write || atomic16->ls_write) && (existing.ls_invalid || atomic16->ls_invalid))
{ {
// Conflicting loads with stores in more than one code path // Conflicting loads with stores in more than one code path
break_putllc16(33, atomic16.discard()); break_putllc16(33, atomic16->discard());
existing.active = false; existing.active = false;
existing.ls_invalid = true; existing.ls_invalid = true;
} }
if (existing.active && !existing.ls_access && atomic16.ls_access) if (existing.active && !existing.ls_access && atomic16->ls_access)
{ {
// Propagate LS access // Propagate LS access
existing.ls = atomic16.ls; existing.ls = atomic16->ls;
existing.reg = atomic16.reg; existing.reg = atomic16->reg;
existing.reg2 = atomic16.reg2; existing.reg2 = atomic16->reg2;
existing.ls_offs = atomic16.ls_offs; existing.ls_offs = atomic16->ls_offs;
} }
existing.ls_write |= atomic16.ls_write; existing.ls_write |= atomic16->ls_write;
existing.ls_invalid |= atomic16.ls_invalid; existing.ls_invalid |= atomic16->ls_invalid;
existing.ls_access |= atomic16.ls_access; existing.ls_access |= atomic16->ls_access;
existing.mem_count = std::max<u32>(existing.mem_count, atomic16.mem_count); existing.mem_count = std::max<u32>(existing.mem_count, atomic16->mem_count);
existing.select_16_or_0_at_runtime |= atomic16.select_16_or_0_at_runtime; existing.select_16_or_0_at_runtime |= atomic16->select_16_or_0_at_runtime;
} }
atomic16.discard(); atomic16->discard();
} }
else if (!atomic16.get_rdatomic) else if (!atomic16->get_rdatomic)
{ {
atomic16.get_rdatomic = true; atomic16->get_rdatomic = true;
// Go above and beyond and also set the constant for it // Go above and beyond and also set the constant for it
set_const_value(op.rt, MFC_GETLLAR_SUCCESS); set_const_value(op.rt, MFC_GETLLAR_SUCCESS);
@ -6244,30 +6244,30 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{ {
if (is_read) if (is_read)
{ {
break_putllc16(28, atomic16.discard()); break_putllc16(28, atomic16->discard());
} }
else else
{ {
break_putllc16(29, atomic16.discard()); break_putllc16(29, atomic16->discard());
} }
if (!is_pattern_match || is_read) if (!is_pattern_match || is_read)
{ {
// //
} }
else if (!rchcnt_loop.active && it == rchcnt_loop_all.end()) else if (!rchcnt_loop->active && it == rchcnt_loop_all.end())
{ {
rchcnt_loop.read_pc = pos; rchcnt_loop->read_pc = pos;
rchcnt_loop.channel = op.ra; rchcnt_loop->channel = op.ra;
rchcnt_loop.active = true; rchcnt_loop->active = true;
unconst(op.rt, pos); unconst(op.rt, pos);
rchcnt_loop.ch_state = vregs[op.rt]; rchcnt_loop->ch_state = vregs[op.rt];
invalidate = false; invalidate = false;
} }
else if (rchcnt_loop.active) else if (rchcnt_loop->active)
{ {
// Success // Success
rchcnt_loop.active = false; rchcnt_loop->active = false;
if (it == rchcnt_loop_all.end()) if (it == rchcnt_loop_all.end())
{ {
@ -6295,20 +6295,20 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{ {
const bool is_store = type == spu_itype::STQR; const bool is_store = type == spu_itype::STQR;
if (atomic16.active) if (atomic16->active)
{ {
atomic16.mem_count++; atomic16->mem_count++;
// Do not clear lower 16 bytes addressing because the program can move on 4-byte basis // Do not clear lower 16 bytes addressing because the program can move on 4-byte basis
const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16); const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16);
if (atomic16.lsa.is_const() && [&]() if (atomic16->lsa.is_const() && [&]()
{ {
bool hack = false; bool hack = false;
if (offs % 16 == 0 && (pos - result.lower_bound + op.si16 * 4) == offs) if (offs % 16 == 0 && (pos - result.lower_bound + op.si16 * 4) == offs)
{ {
const u32 reservation_bound = (atomic16.lsa.value | 127); const u32 reservation_bound = (atomic16->lsa.value | 127);
const u32 min_offs = offs; const u32 min_offs = offs;
// Hack: assume there is no overflow in relative instruction offset // Hack: assume there is no overflow in relative instruction offset
@ -6325,37 +6325,37 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{ {
// Ignore memory access in this case // Ignore memory access in this case
} }
else if (atomic16.ls_invalid && is_store) else if (atomic16->ls_invalid && is_store)
{ {
break_putllc16(35, atomic16.set_invalid_ls(is_store)); break_putllc16(35, atomic16->set_invalid_ls(is_store));
} }
else if (atomic16.ls_access && atomic16.ls != start_program_count) else if (atomic16->ls_access && atomic16->ls != start_program_count)
{ {
break_putllc16(7, atomic16.set_invalid_ls(is_store)); break_putllc16(7, atomic16->set_invalid_ls(is_store));
} }
else if (atomic16.ls_access && offs != atomic16.ls_offs) else if (atomic16->ls_access && offs != atomic16->ls_offs)
{ {
if (atomic16.ls_offs.compare_with_mask_indifference(offs, SPU_LS_MASK_1)) if (atomic16->ls_offs.compare_with_mask_indifference(offs, SPU_LS_MASK_1))
{ {
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
} }
else else
{ {
// Sad // Sad
break_putllc16(8, atomic16.set_invalid_ls(is_store)); break_putllc16(8, atomic16->set_invalid_ls(is_store));
} }
} }
else else
{ {
atomic16.ls = start_program_count; atomic16->ls = start_program_count;
atomic16.ls_offs = reg_state_t::from_value(offs); atomic16->ls_offs = reg_state_t::from_value(offs);
atomic16.ls_pc_rel = true; atomic16->ls_pc_rel = true;
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
atomic16.ls_access = true; atomic16->ls_access = true;
} }
// atomic16.ls_reg[offs % 128 / 16] = start_program_count; // atomic16->ls_reg[offs % 128 / 16] = start_program_count;
// atomic16.ls_offs[offs % 128 / 16] = offs; // atomic16->ls_offs[offs % 128 / 16] = offs;
} }
if (is_store) if (is_store)
@ -6373,9 +6373,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{ {
const bool is_store = type == spu_itype::STQX; const bool is_store = type == spu_itype::STQX;
if (atomic16.active) if (atomic16->active)
{ {
atomic16.mem_count++; atomic16->mem_count++;
auto ra = get_reg(op.ra); auto ra = get_reg(op.ra);
ra.value &= SPU_LS_MASK_1; ra.value &= SPU_LS_MASK_1;
@ -6393,37 +6393,37 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
add_res.value &= SPU_LS_MASK_16; add_res.value &= SPU_LS_MASK_16;
add_res.tag = umax; add_res.tag = umax;
if (atomic16.lsa.unequal_with_mask_indifference(add_res, SPU_LS_MASK_128)) if (atomic16->lsa.unequal_with_mask_indifference(add_res, SPU_LS_MASK_128))
{ {
// Unrelated, ignore // Unrelated, ignore
} }
else if (atomic16.ls_invalid && is_store) else if (atomic16->ls_invalid && is_store)
{ {
break_putllc16(20, atomic16.set_invalid_ls(is_store)); break_putllc16(20, atomic16->set_invalid_ls(is_store));
} }
else if (atomic16.ls_access && add_res != atomic16.ls) else if (atomic16->ls_access && add_res != atomic16->ls)
{ {
if (atomic16.ls.unequal_with_mask_indifference(add_res, SPU_LS_MASK_128) && atomic16.ls_offs == 0) if (atomic16->ls.unequal_with_mask_indifference(add_res, SPU_LS_MASK_128) && atomic16->ls_offs == 0)
{ {
// Ok // Ok
} }
else if (atomic16.ls_pc_rel) else if (atomic16->ls_pc_rel)
{ {
break_putllc16(8, atomic16.set_invalid_ls(is_store)); break_putllc16(8, atomic16->set_invalid_ls(is_store));
} }
else else
{ {
// Sad // Sad
break_putllc16(9, atomic16.set_invalid_ls(is_store)); break_putllc16(9, atomic16->set_invalid_ls(is_store));
} }
} }
else else
{ {
atomic16.ls = reg_state_t::from_value(add_res.value); atomic16->ls = reg_state_t::from_value(add_res.value);
atomic16.ls_offs = reg_state_t::from_value(0); atomic16->ls_offs = reg_state_t::from_value(0);
atomic16.ls_pc_rel = false; atomic16->ls_pc_rel = false;
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
atomic16.ls_access = true; atomic16->ls_access = true;
} }
break; break;
@ -6431,7 +6431,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
case 1: case 1:
{ {
const auto& state = ra.is_const() ? rb : ra; const auto& state = ra.is_const() ? rb : ra;
const auto& _lsa = atomic16.lsa; const auto& _lsa = atomic16->lsa;
const u32 offs = (ra.is_const() ? ra.value : rb.value) & SPU_LS_MASK_1; const u32 offs = (ra.is_const() ? ra.value : rb.value) & SPU_LS_MASK_1;
const u32 abs_diff = calculate_absolute_ls_difference(offs, 0); const u32 abs_diff = calculate_absolute_ls_difference(offs, 0);
@ -6443,82 +6443,82 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// We already know it's an unrelated load/store // We already know it's an unrelated load/store
// The reason for SPU_LS_SIZE - 128 check is that in case LSA is not aligned, it detects the possible wraparound // The reason for SPU_LS_SIZE - 128 check is that in case LSA is not aligned, it detects the possible wraparound
} }
else if (atomic16.ls_invalid && is_store) else if (atomic16->ls_invalid && is_store)
{ {
break_putllc16(23, atomic16.set_invalid_ls(is_store)); break_putllc16(23, atomic16->set_invalid_ls(is_store));
} }
else if (atomic16.ls_access && atomic16.ls != state) else if (atomic16->ls_access && atomic16->ls != state)
{ {
if (atomic16.ls.unequal_with_mask_indifference(state, SPU_LS_MASK_128) && offs == 0) if (atomic16->ls.unequal_with_mask_indifference(state, SPU_LS_MASK_128) && offs == 0)
{ {
// Ok // Ok
} }
else if (atomic16.ls_pc_rel) else if (atomic16->ls_pc_rel)
{ {
break_putllc16(36, atomic16.set_invalid_ls(is_store)); break_putllc16(36, atomic16->set_invalid_ls(is_store));
} }
else else
{ {
// Sad // Sad
break_putllc16(11, atomic16.set_invalid_ls(is_store)); break_putllc16(11, atomic16->set_invalid_ls(is_store));
} }
} }
else if (atomic16.ls_access) else if (atomic16->ls_access)
{ {
ensure(!atomic16.ls.is_const()); ensure(!atomic16->ls.is_const());
if (atomic16.ls_offs.compare_with_mask_indifference(offs, SPU_LS_MASK_1)) if (atomic16->ls_offs.compare_with_mask_indifference(offs, SPU_LS_MASK_1))
{ {
// Ok // Ok
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
} }
else if (atomic16.ls_offs.is_const() && atomic16.ls_offs.value / 16 == offs / 16 && state.get_known_zeroes() % 16 >= std::max<u32>(offs % 16, atomic16.ls_offs.value % 16)) else if (atomic16->ls_offs.is_const() && atomic16->ls_offs.value / 16 == offs / 16 && state.get_known_zeroes() % 16 >= std::max<u32>(offs % 16, atomic16->ls_offs.value % 16))
{ {
// For special case observed in games (offset cannot cause the address to roll over the next 16 bytes) // For special case observed in games (offset cannot cause the address to roll over the next 16 bytes)
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
} }
else else
{ {
break_putllc16(12, atomic16.set_invalid_ls(is_store)); break_putllc16(12, atomic16->set_invalid_ls(is_store));
} }
} }
else else
{ {
atomic16.ls = state; atomic16->ls = state;
atomic16.ls_offs = reg_state_t::from_value(offs); atomic16->ls_offs = reg_state_t::from_value(offs);
atomic16.ls_pc_rel = false; atomic16->ls_pc_rel = false;
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
atomic16.ls_access = true; atomic16->ls_access = true;
} }
break; break;
} }
case 0: case 0:
{ {
const bool is_ra_first = atomic16.ls_access ? ra == atomic16.ls : op.ra <= op.rb; const bool is_ra_first = atomic16->ls_access ? ra == atomic16->ls : op.ra <= op.rb;
const auto& state1 = is_ra_first ? ra : rb; const auto& state1 = is_ra_first ? ra : rb;
const auto& state2 = is_ra_first ? rb : ra; const auto& state2 = is_ra_first ? rb : ra;
if (atomic16.ls_access && (atomic16.ls != state1 || atomic16.ls_offs != state2)) if (atomic16->ls_access && (atomic16->ls != state1 || atomic16->ls_offs != state2))
{ {
if (atomic16.ls_pc_rel) if (atomic16->ls_pc_rel)
{ {
break_putllc16(32, atomic16.set_invalid_ls(is_store)); break_putllc16(32, atomic16->set_invalid_ls(is_store));
} }
else else
{ {
// Sad // Sad
break_putllc16(13, atomic16.set_invalid_ls(is_store)); break_putllc16(13, atomic16->set_invalid_ls(is_store));
} }
} }
else else
{ {
atomic16.ls = state1; atomic16->ls = state1;
atomic16.ls_offs = state2; atomic16->ls_offs = state2;
atomic16.ls_pc_rel = false; atomic16->ls_pc_rel = false;
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
atomic16.ls_access = true; atomic16->ls_access = true;
} }
break; break;
@ -6541,43 +6541,43 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{ {
const bool is_store = type == spu_itype::STQA; const bool is_store = type == spu_itype::STQA;
if (atomic16.active) if (atomic16->active)
{ {
atomic16.mem_count++; atomic16->mem_count++;
const reg_state_t ca = reg_state_t::from_value(spu_ls_target(0, op.i16)); const reg_state_t ca = reg_state_t::from_value(spu_ls_target(0, op.i16));
if (atomic16.lsa.unequal_with_mask_indifference(ca, SPU_LS_MASK_128)) if (atomic16->lsa.unequal_with_mask_indifference(ca, SPU_LS_MASK_128))
{ {
// We already know it's an unrelated load/store // We already know it's an unrelated load/store
} }
else if (atomic16.ls_invalid && is_store) else if (atomic16->ls_invalid && is_store)
{ {
break_putllc16(37, atomic16.set_invalid_ls(is_store)); break_putllc16(37, atomic16->set_invalid_ls(is_store));
} }
else if (atomic16.ls_access && ca != atomic16.ls) else if (atomic16->ls_access && ca != atomic16->ls)
{ {
if (atomic16.ls.unequal_with_mask_indifference(ca, SPU_LS_MASK_128) && atomic16.ls_offs == 0) if (atomic16->ls.unequal_with_mask_indifference(ca, SPU_LS_MASK_128) && atomic16->ls_offs == 0)
{ {
// Ok // Ok
} }
else if (atomic16.ls_pc_rel) else if (atomic16->ls_pc_rel)
{ {
break_putllc16(14, atomic16.set_invalid_ls(is_store)); break_putllc16(14, atomic16->set_invalid_ls(is_store));
} }
else else
{ {
// Sad // Sad
break_putllc16(15, atomic16.set_invalid_ls(is_store)); break_putllc16(15, atomic16->set_invalid_ls(is_store));
} }
} }
else else
{ {
atomic16.ls = ca; atomic16->ls = ca;
atomic16.ls_offs = reg_state_t::from_value(0); atomic16->ls_offs = reg_state_t::from_value(0);
atomic16.ls_pc_rel = false; atomic16->ls_pc_rel = false;
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
atomic16.ls_access = true; atomic16->ls_access = true;
} }
} }
@ -6596,12 +6596,12 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
{ {
const bool is_store = type == spu_itype::STQD; const bool is_store = type == spu_itype::STQD;
if (atomic16.active) if (atomic16->active)
{ {
atomic16.mem_count++; atomic16->mem_count++;
auto ra = get_reg(op.ra); auto ra = get_reg(op.ra);
const auto& _lsa = atomic16.lsa; const auto& _lsa = atomic16->lsa;
ra.value = ra.is_const() ? spu_ls_target(ra.value, op.si10 * 4) : 0; ra.value = ra.is_const() ? spu_ls_target(ra.value, op.si10 * 4) : 0;
const u32 offs = ra.is_const() ? 0 : spu_ls_target(0, op.si10 * 4); const u32 offs = ra.is_const() ? 0 : spu_ls_target(0, op.si10 * 4);
@ -6615,49 +6615,49 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// We already know it's an unrelated load/store // We already know it's an unrelated load/store
// The reason for SPU_LS_SIZE - 128 check is that in case LSA is not aligned, it detects the possible wraparound // The reason for SPU_LS_SIZE - 128 check is that in case LSA is not aligned, it detects the possible wraparound
} }
else if (atomic16.ls_invalid && is_store) else if (atomic16->ls_invalid && is_store)
{ {
break_putllc16(34, atomic16.set_invalid_ls(is_store)); break_putllc16(34, atomic16->set_invalid_ls(is_store));
} }
else if (atomic16.ls_access && atomic16.ls != ra) else if (atomic16->ls_access && atomic16->ls != ra)
{ {
if (atomic16.ls.unequal_with_mask_indifference(ra, SPU_LS_MASK_128) && (offs == 0 && atomic16.ls_offs == 0)) if (atomic16->ls.unequal_with_mask_indifference(ra, SPU_LS_MASK_128) && (offs == 0 && atomic16->ls_offs == 0))
{ {
// Ok // Ok
} }
else if (atomic16.ls_pc_rel) else if (atomic16->ls_pc_rel)
{ {
break_putllc16(16, atomic16.set_invalid_ls(is_store)); break_putllc16(16, atomic16->set_invalid_ls(is_store));
} }
else else
{ {
// Sad // Sad
break_putllc16(17, atomic16.set_invalid_ls(is_store)); break_putllc16(17, atomic16->set_invalid_ls(is_store));
} }
} }
else if (atomic16.ls_access) else if (atomic16->ls_access)
{ {
if (atomic16.ls_offs.compare_with_mask_indifference(offs, SPU_LS_MASK_1)) if (atomic16->ls_offs.compare_with_mask_indifference(offs, SPU_LS_MASK_1))
{ {
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
} }
else if (atomic16.ls_offs.is_const() && atomic16.ls_offs.value / 16 == offs / 16 && ra.get_known_zeroes() % 16 >= std::max<u32>(offs % 16, atomic16.ls_offs.value % 16)) else if (atomic16->ls_offs.is_const() && atomic16->ls_offs.value / 16 == offs / 16 && ra.get_known_zeroes() % 16 >= std::max<u32>(offs % 16, atomic16->ls_offs.value % 16))
{ {
// For special case observed in games (offset cannot cause the address to roll over the next 16 bytes) // For special case observed in games (offset cannot cause the address to roll over the next 16 bytes)
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
} }
else else
{ {
break_putllc16(18, atomic16.set_invalid_ls(is_store)); break_putllc16(18, atomic16->set_invalid_ls(is_store));
} }
} }
else else
{ {
atomic16.ls = ra; atomic16->ls = ra;
atomic16.ls_offs = reg_state_t::from_value(offs); atomic16->ls_offs = reg_state_t::from_value(offs);
atomic16.ls_pc_rel = false; atomic16->ls_pc_rel = false;
atomic16.ls_write |= is_store; atomic16->ls_write |= is_store;
atomic16.ls_access = true; atomic16->ls_access = true;
} }
} }
@ -6947,18 +6947,18 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
inherit_const_value(op.rt, ra, ra, av == op.si10 + 0u, pos); inherit_const_value(op.rt, ra, ra, av == op.si10 + 0u, pos);
if (rchcnt_loop.active) if (rchcnt_loop->active)
{ {
if (ra.is_instruction && ra.origin == rchcnt_loop.ch_state.origin) if (ra.is_instruction && ra.origin == rchcnt_loop->ch_state.origin)
{ {
if (op.si10 != 0 && op.si10 != 1) if (op.si10 != 0 && op.si10 != 1)
{ {
break_channel_pattern(55, rchcnt_loop.discard()); break_channel_pattern(55, rchcnt_loop->discard());
break; break;
} }
rchcnt_loop.ch_product = vregs[op.rt]; rchcnt_loop->ch_product = vregs[op.rt];
rchcnt_loop.product_test_negate = op.si10 == 1; rchcnt_loop->product_test_negate = op.si10 == 1;
} }
} }
@ -7030,11 +7030,11 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
unconst(op_rt, reg_pos == SPU_LS_SIZE ? pos : reg_pos); unconst(op_rt, reg_pos == SPU_LS_SIZE ? pos : reg_pos);
if (rchcnt_loop.active) if (rchcnt_loop->active)
{ {
if (rchcnt_loop.origins.find_first_of(vregs[op_rt].origin) == umax) if (rchcnt_loop->origins.find_first_of(vregs[op_rt].origin) == umax)
{ {
rchcnt_loop.origins.push_back(vregs[op_rt].origin); rchcnt_loop->origins.push_back(vregs[op_rt].origin);
} }
} }
} }