mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
PPU LLVM/RawSPU: Implement MMIO violation elimination pass
This commit is contained in:
parent
e7ac84fca3
commit
598fe85243
@ -674,6 +674,7 @@ namespace ppu_instructions
|
||||
|
||||
inline u32 ADDI(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0eu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
|
||||
inline u32 ADDIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x0fu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
|
||||
inline u32 XORIS(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x1bu << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
|
||||
inline u32 ORI(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x18u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; }
|
||||
inline u32 ORIS(u32 rt, u32 ra, u32 ui) { ppu_opcode_t op{ 0x19u << 26 }; op.rd = rt; op.ra = ra; op.uimm16 = ui; return op.opcode; }
|
||||
inline u32 OR(u32 ra, u32 rs, u32 rb, bool rc = false) { ppu_opcode_t op{ 0x1fu << 26 | 0x1bcu << 1 }; op.rs = rs; op.ra = ra; op.rb = rb; op.rc = rc; return op.opcode; }
|
||||
@ -685,6 +686,7 @@ namespace ppu_instructions
|
||||
inline u32 MFSPR(u32 rt, u32 spr) { ppu_opcode_t op{ 0x1fu << 26 | 0x153u << 1 }; op.rd = rt; op.spr = spr; return op.opcode; }
|
||||
inline u32 MTSPR(u32 spr, u32 rs) { ppu_opcode_t op{ 0x1fu << 26 | 0x1d3u << 1 }; op.rs = rs; op.spr = spr; return op.opcode; }
|
||||
inline u32 LWZ(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x20u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
|
||||
inline u32 STW(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x24u << 26 }; op.rd = rt; op.ra = ra; op.simm16 = si; return op.opcode; }
|
||||
inline u32 STD(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; }
|
||||
inline u32 STDU(u32 rs, u32 ra, s32 si) { ppu_opcode_t op{ 0x3eu << 26 | 1 }; op.rs = rs; op.ra = ra; op.ds = si / 4; return op.opcode; }
|
||||
inline u32 LD(u32 rt, u32 ra, s32 si) { ppu_opcode_t op{ 0x3au << 26 }; op.rd = rt; op.ra = ra; op.ds = si / 4; return op.opcode; }
|
||||
@ -697,6 +699,7 @@ namespace ppu_instructions
|
||||
inline u32 STVX(u32 vs, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 231 << 1 }; op.vs = vs; op.ra = ra; op.rb = rb; return op.opcode; }
|
||||
inline u32 LFD(u32 frd, u32 ra, s32 si) { ppu_opcode_t op{ 50u << 26 }; op.frd = frd; op.ra = ra; op.simm16 = si; return op.opcode; }
|
||||
inline u32 LVX(u32 vd, u32 ra, u32 rb) { ppu_opcode_t op{ 31 << 26 | 103 << 1 }; op.vd = vd; op.ra = ra; op.rb = rb; return op.opcode; }
|
||||
inline constexpr u32 EIEIO() { return 0x7c0006ac; }
|
||||
|
||||
namespace implicts
|
||||
{
|
||||
@ -734,7 +737,7 @@ namespace ppu_instructions
|
||||
inline u32 CLRLDI(u32 x, u32 y, u32 n) { return RLDICL(x, y, 0, n, false); }
|
||||
inline u32 CLRRDI(u32 x, u32 y, u32 n) { return RLDICR(x, y, 0, 63 - n, false); }
|
||||
|
||||
inline u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0
|
||||
inline constexpr u32 TRAP() { return 0x7FE00008; } // tw 31,r0,r0
|
||||
}
|
||||
|
||||
using namespace implicts;
|
||||
|
@ -62,7 +62,9 @@
|
||||
#include <thread>
|
||||
#include <cfenv>
|
||||
#include <cctype>
|
||||
#include <span>
|
||||
#include <optional>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
@ -485,6 +487,167 @@ void ppu_reservation_fallback(ppu_thread& ppu)
|
||||
}
|
||||
}
|
||||
|
||||
u32 ppu_read_mmio_aware_u32(u8* vm_base, u32 eal)
|
||||
{
|
||||
if (eal >= RAW_SPU_BASE_ADDR)
|
||||
{
|
||||
// RawSPU MMIO
|
||||
auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));
|
||||
|
||||
if (!thread)
|
||||
{
|
||||
// Access Violation
|
||||
}
|
||||
else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access
|
||||
{
|
||||
}
|
||||
else if (u32 value{}; thread->read_reg(eal, value))
|
||||
{
|
||||
return std::bit_cast<be_t<u32>>(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal);
|
||||
}
|
||||
}
|
||||
|
||||
// Value is assumed to be swapped
|
||||
return read_from_ptr<u32>(vm_base + eal);
|
||||
}
|
||||
|
||||
void ppu_write_mmio_aware_u32(u8* vm_base, u32 eal, u32 value)
|
||||
{
|
||||
if (eal >= RAW_SPU_BASE_ADDR)
|
||||
{
|
||||
// RawSPU MMIO
|
||||
auto thread = idm::get<named_thread<spu_thread>>(spu_thread::find_raw_spu((eal - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET));
|
||||
|
||||
if (!thread)
|
||||
{
|
||||
// Access Violation
|
||||
}
|
||||
else if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + sizeof(u32) - 1 < SPU_LS_SIZE) // LS access
|
||||
{
|
||||
}
|
||||
else if (thread->write_reg(eal, std::bit_cast<be_t<u32>>(value)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Invalid RawSPU MMIO offset (addr=0x%x)", eal);
|
||||
}
|
||||
}
|
||||
|
||||
// Value is assumed swapped
|
||||
write_to_ptr<u32>(vm_base + eal, value);
|
||||
}
|
||||
|
||||
extern bool ppu_test_address_may_be_mmio(std::span<const be_t<u32>> insts)
|
||||
{
|
||||
std::set<u32> reg_offsets;
|
||||
bool found_raw_spu_base = false;
|
||||
bool found_spu_area_offset_element = false;
|
||||
|
||||
for (u32 inst : insts)
|
||||
{
|
||||
// Common around MMIO (orders IO)
|
||||
if (inst == ppu_instructions::EIEIO())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
const u32 op_imm16 = (inst & 0xfc00ffff);
|
||||
|
||||
// RawSPU MMIO base
|
||||
// 0xe00000000 is a common constant so try to find an ORIS 0x10 or ADDIS 0x10 nearby (for multiplying SPU ID by it)
|
||||
if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x2000) || op_imm16 == ppu_instructions::ORIS({}, {}, 0xe000) || op_imm16 == ppu_instructions::XORIS({}, {}, 0xe000))
|
||||
{
|
||||
found_raw_spu_base = true;
|
||||
|
||||
if (found_spu_area_offset_element)
|
||||
{
|
||||
// Found both
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0x10) || op_imm16 == ppu_instructions::ADDIS({}, {}, 0x10))
|
||||
{
|
||||
found_spu_area_offset_element = true;
|
||||
|
||||
if (found_raw_spu_base)
|
||||
{
|
||||
// Found both
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// RawSPU MMIO base + problem state offset
|
||||
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffc))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe004))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe004))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// RawSPU MMIO base + problem state offset + 64k of SNR1 offset
|
||||
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffb))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe005))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe005))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// RawSPU MMIO base + problem state offset + 264k of SNR2 offset (STW allows 32K+- offset so in order to access SNR2 it needs to first add another 64k)
|
||||
// SNR2 is the only register currently implemented that has its 0x80000 bit is set so its the only one its hardcoded access is done this way
|
||||
else if (op_imm16 == ppu_instructions::ADDIS({}, {}, -0x1ffa))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (op_imm16 == ppu_instructions::ORIS({}, {}, 0xe006))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (op_imm16 == ppu_instructions::XORIS({}, {}, 0xe006))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// Try to detect a function that receives RawSPU problem state base pointer as an argument
|
||||
else if ((op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0) ||
|
||||
(op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0) ||
|
||||
(op_imm16 & ~0xffff) == ppu_instructions::ADDI({}, {}, 0))
|
||||
{
|
||||
const bool is_load = (op_imm16 & ~0xffff) == ppu_instructions::LWZ({}, {}, 0);
|
||||
const bool is_store = (op_imm16 & ~0xffff) == ppu_instructions::STW({}, {}, 0);
|
||||
const bool is_neither = !is_store && !is_load;
|
||||
const bool is_snr = (is_store || is_neither) && ((op_imm16 & 0xffff) == (SPU_RdSigNotify2_offs & 0xffff) || (op_imm16 & 0xffff) == (SPU_RdSigNotify1_offs & 0xffff));
|
||||
|
||||
if (is_snr || spu_thread::test_is_problem_state_register_offset(op_imm16 & 0xffff, is_load || is_neither, is_store || is_neither))
|
||||
{
|
||||
reg_offsets.insert(op_imm16 & 0xffff);
|
||||
|
||||
if (reg_offsets.size() >= 2)
|
||||
{
|
||||
// Assume high MMIO likelyhood if more than one offset appears in nearby code
|
||||
// Such as common IN_MBOX + OUT_MBOX
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ppu_toc_manager
|
||||
{
|
||||
std::unordered_map<u32, u32> toc_map;
|
||||
@ -3529,6 +3692,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
|
||||
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
|
||||
{ "__escape", reinterpret_cast<u64>(+ppu_escape) },
|
||||
{ "__read_maybe_mmio32", reinterpret_cast<u64>(+ppu_read_mmio_aware_u32) },
|
||||
{ "__write_maybe_mmio32", reinterpret_cast<u64>(+ppu_write_mmio_aware_u32) },
|
||||
};
|
||||
|
||||
for (u64 index = 0; index < 1024; index++)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "Emu/Cell/Common.h"
|
||||
#include "PPUTranslator.h"
|
||||
#include "PPUThread.h"
|
||||
#include "SPUThread.h"
|
||||
|
||||
#include "util/types.hpp"
|
||||
#include "util/endian.hpp"
|
||||
@ -12,6 +13,7 @@
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -129,6 +131,7 @@ Type* PPUTranslator::GetContextType()
|
||||
}
|
||||
|
||||
u32 ppu_get_far_jump(u32 pc);
|
||||
bool ppu_test_address_may_be_mmio(std::span<const be_t<u32>> insts);
|
||||
|
||||
Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
{
|
||||
@ -239,6 +242,9 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
m_rel = nullptr;
|
||||
}
|
||||
|
||||
// Reset MMIO hint
|
||||
m_may_be_mmio = true;
|
||||
|
||||
const u32 op = *ensure(m_info.get_ptr<u32>(m_addr + base));
|
||||
|
||||
(this->*(s_ppu_decoder.decode(op)))({op});
|
||||
@ -600,15 +606,53 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
|
||||
{
|
||||
const u32 size = ::narrow<u32>(+type->getPrimitiveSizeInBits());
|
||||
|
||||
if (m_may_be_mmio && size == 32)
|
||||
{
|
||||
// Test for MMIO patterns
|
||||
struct instructions_to_test
|
||||
{
|
||||
be_t<u32> insts[128];
|
||||
};
|
||||
|
||||
m_may_be_mmio = false;
|
||||
|
||||
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(m_addr, sizeof(instructions_to_test) / 2))))
|
||||
{
|
||||
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
|
||||
{
|
||||
m_may_be_mmio = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_be ^ m_is_be && size > 8)
|
||||
{
|
||||
llvm::Value* value{};
|
||||
|
||||
// Read, byteswap, bitcast
|
||||
const auto int_type = m_ir->getIntNTy(size);
|
||||
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align});
|
||||
value->setVolatile(true);
|
||||
|
||||
if (m_may_be_mmio && size == 32)
|
||||
{
|
||||
ppu_log.notice("LLVM: Detected potential MMIO32 read at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
|
||||
value = Call(GetType<u32>(), "__read_maybe_mmio32", m_base, addr);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto inst = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align});
|
||||
inst->setVolatile(true);
|
||||
value = inst;
|
||||
}
|
||||
|
||||
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
|
||||
}
|
||||
|
||||
if (m_may_be_mmio && size == 32)
|
||||
{
|
||||
ppu_log.notice("LLVM: Detected potential MMIO32 read at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
|
||||
return Call(GetType<u32>(), "__read_maybe_mmio32", m_base, addr);
|
||||
}
|
||||
|
||||
// Read normally
|
||||
const auto r = m_ir->CreateAlignedLoad(type, GetMemory(addr), llvm::MaybeAlign{align});
|
||||
r->setVolatile(true);
|
||||
@ -627,6 +671,25 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
|
||||
value = Call(int_type, fmt::format("llvm.bswap.i%u", size), bitcast(value, int_type));
|
||||
}
|
||||
|
||||
if (m_may_be_mmio && size == 32)
|
||||
{
|
||||
// Test for MMIO patterns
|
||||
struct instructions_to_test
|
||||
{
|
||||
be_t<u32> insts[128];
|
||||
};
|
||||
|
||||
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(m_addr, sizeof(instructions_to_test) / 2))))
|
||||
{
|
||||
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
|
||||
{
|
||||
ppu_log.notice("LLVM: Detected potential MMIO32 write at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
|
||||
Call(GetType<void>(), "__write_maybe_mmio32", m_base, addr, value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write
|
||||
m_ir->CreateAlignedStore(value, GetMemory(addr), llvm::MaybeAlign{align})->setVolatile(true);
|
||||
}
|
||||
@ -2527,6 +2590,7 @@ void PPUTranslator::LDX(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::LWZX(ppu_opcode_t op)
|
||||
{
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u32>()));
|
||||
}
|
||||
|
||||
@ -2601,6 +2665,9 @@ void PPUTranslator::DCBST(ppu_opcode_t)
|
||||
|
||||
void PPUTranslator::LWZUX(ppu_opcode_t op)
|
||||
{
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
|
||||
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
|
||||
SetGpr(op.rd, ReadMemory(addr, GetType<u32>()));
|
||||
SetGpr(op.ra, addr);
|
||||
@ -2811,6 +2878,7 @@ void PPUTranslator::STWCX(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::STWX(ppu_opcode_t op)
|
||||
{
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32));
|
||||
}
|
||||
|
||||
@ -2830,6 +2898,7 @@ void PPUTranslator::STDUX(ppu_opcode_t op)
|
||||
void PPUTranslator::STWUX(ppu_opcode_t op)
|
||||
{
|
||||
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
WriteMemory(addr, GetGpr(op.rs, 32));
|
||||
SetGpr(op.ra, addr);
|
||||
}
|
||||
@ -3213,6 +3282,7 @@ void PPUTranslator::LWBRX(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::LFSX(ppu_opcode_t op)
|
||||
{
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u && op.rb != 1u && op.rb != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<f32>()));
|
||||
}
|
||||
|
||||
@ -3541,6 +3611,44 @@ void PPUTranslator::LWZ(ppu_opcode_t op)
|
||||
m_rel = nullptr;
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
|
||||
if (m_may_be_mmio && !op.simm16)
|
||||
{
|
||||
struct instructions_data
|
||||
{
|
||||
be_t<u32> insts[2];
|
||||
};
|
||||
|
||||
// Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO
|
||||
if (auto ptr = m_info.get_ptr<instructions_data>(m_addr + 4 + (m_reloc ? m_reloc->addr : 0)))
|
||||
{
|
||||
for (u32 inst : ptr->insts)
|
||||
{
|
||||
ppu_opcode_t test_op{inst};
|
||||
|
||||
if (test_op.simm16 == op.simm16 || test_op.ra != op.ra)
|
||||
{
|
||||
// Same offset (at least according to this test) or different register
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (g_ppu_itype.decode(inst))
|
||||
{
|
||||
case ppu_itype::LWZ:
|
||||
case ppu_itype::STW:
|
||||
{
|
||||
// Not MMIO
|
||||
m_may_be_mmio = false;
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<u32>()));
|
||||
}
|
||||
|
||||
@ -3554,6 +3662,9 @@ void PPUTranslator::LWZU(ppu_opcode_t op)
|
||||
m_rel = nullptr;
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
|
||||
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
|
||||
SetGpr(op.rd, ReadMemory(addr, GetType<u32>()));
|
||||
SetGpr(op.ra, addr);
|
||||
@ -3597,6 +3708,44 @@ void PPUTranslator::STW(ppu_opcode_t op)
|
||||
m_rel = nullptr;
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
|
||||
if (m_may_be_mmio && !op.simm16)
|
||||
{
|
||||
struct instructions_data
|
||||
{
|
||||
be_t<u32> insts[3];
|
||||
};
|
||||
|
||||
// Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO
|
||||
if (auto ptr = m_info.get_ptr<instructions_data>(m_addr + 4 + (m_reloc ? m_reloc->addr : 0)))
|
||||
{
|
||||
for (u32 inst : ptr->insts)
|
||||
{
|
||||
ppu_opcode_t test_op{inst};
|
||||
|
||||
if (test_op.simm16 == op.simm16 || test_op.ra != op.ra)
|
||||
{
|
||||
// Same offset (at least according to this test) or different register
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (g_ppu_itype.decode(inst))
|
||||
{
|
||||
case ppu_itype::LWZ:
|
||||
case ppu_itype::STW:
|
||||
{
|
||||
// Not MMIO
|
||||
m_may_be_mmio = false;
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto value = GetGpr(op.rs, 32);
|
||||
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm;
|
||||
WriteMemory(addr, value);
|
||||
@ -3621,6 +3770,9 @@ void PPUTranslator::STWU(ppu_opcode_t op)
|
||||
m_rel = nullptr;
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u);// Stack register and TLS address register are unlikely to be used in MMIO address calculatio
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
|
||||
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
|
||||
WriteMemory(addr, GetGpr(op.rs, 32));
|
||||
SetGpr(op.ra, addr);
|
||||
@ -3740,6 +3892,8 @@ void PPUTranslator::STHU(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::LMW(ppu_opcode_t op)
|
||||
{
|
||||
m_may_be_mmio &= op.rd == 31u && (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
|
||||
|
||||
for (u32 i = 0; i < 32 - op.rd; i++)
|
||||
{
|
||||
SetGpr(i + op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetType<u32>()));
|
||||
@ -3748,6 +3902,8 @@ void PPUTranslator::LMW(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::STMW(ppu_opcode_t op)
|
||||
{
|
||||
m_may_be_mmio &= op.rs == 31u && (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
|
||||
|
||||
for (u32 i = 0; i < 32 - op.rs; i++)
|
||||
{
|
||||
WriteMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetGpr(i + op.rs, 32));
|
||||
@ -3764,6 +3920,9 @@ void PPUTranslator::LFS(ppu_opcode_t op)
|
||||
m_rel = nullptr;
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
|
||||
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<f32>()));
|
||||
}
|
||||
|
||||
@ -3777,6 +3936,9 @@ void PPUTranslator::LFSU(ppu_opcode_t op)
|
||||
m_rel = nullptr;
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculatio
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
|
||||
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
|
||||
SetFpr(op.frd, ReadMemory(addr, GetType<f32>()));
|
||||
SetGpr(op.ra, addr);
|
||||
@ -3819,7 +3981,12 @@ void PPUTranslator::STFS(ppu_opcode_t op)
|
||||
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
|
||||
m_rel = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetFpr(op.frs, 32));
|
||||
}
|
||||
|
||||
@ -3832,6 +3999,12 @@ void PPUTranslator::STFSU(ppu_opcode_t op)
|
||||
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
|
||||
m_rel = nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset
|
||||
}
|
||||
|
||||
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
|
||||
|
||||
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
|
||||
WriteMemory(addr, GetFpr(op.frs, 32));
|
||||
|
@ -68,6 +68,7 @@ class PPUTranslator final : public cpu_translator
|
||||
llvm::Value** const m_fc = m_locals + 131; // FPSCR bits (used partially)
|
||||
|
||||
llvm::Value* nan_vec4;
|
||||
bool m_may_be_mmio = false;
|
||||
|
||||
#define DEF_VALUE(loc, glb, pos)\
|
||||
llvm::Value*& loc = m_locals[pos];\
|
||||
|
@ -318,6 +318,50 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool spu_thread::test_is_problem_state_register_offset(u32 offset, bool for_read, bool for_write) noexcept
|
||||
{
|
||||
if (for_read)
|
||||
{
|
||||
switch (offset)
|
||||
{
|
||||
case MFC_CMDStatus_offs:
|
||||
case MFC_QStatus_offs:
|
||||
case SPU_Out_MBox_offs:
|
||||
case SPU_MBox_Status_offs:
|
||||
case SPU_Status_offs:
|
||||
case Prxy_TagStatus_offs:
|
||||
case SPU_NPC_offs:
|
||||
case SPU_RunCntl_offs:
|
||||
return true;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
if (for_write)
|
||||
{
|
||||
switch (offset)
|
||||
{
|
||||
case MFC_LSA_offs:
|
||||
case MFC_EAH_offs:
|
||||
case MFC_EAL_offs:
|
||||
case MFC_Size_Tag_offs:
|
||||
case MFC_Class_CMD_offs:
|
||||
case Prxy_QueryType_offs:
|
||||
case Prxy_QueryMask_offs:
|
||||
case SPU_In_MBox_offs:
|
||||
case SPU_RunCntl_offs:
|
||||
case SPU_NPC_offs:
|
||||
case SPU_RdSigNotify1_offs:
|
||||
case SPU_RdSigNotify2_offs:
|
||||
case (SPU_RdSigNotify2_offs & 0xffff): // Fow now accept both (this is used for an optimization so it can be imperfect)
|
||||
return true;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void spu_load_exec(const spu_exec_object& elf)
|
||||
{
|
||||
spu_thread::g_raw_spu_ctr++;
|
||||
|
@ -865,6 +865,7 @@ public:
|
||||
|
||||
bool read_reg(const u32 addr, u32& value);
|
||||
bool write_reg(const u32 addr, const u32 value);
|
||||
static bool test_is_problem_state_register_offset(u32 offset, bool for_read, bool for_write) noexcept;
|
||||
|
||||
static atomic_t<u32> g_raw_spu_ctr;
|
||||
static atomic_t<u32> g_raw_spu_id[5];
|
||||
|
Loading…
Reference in New Issue
Block a user