1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-21 18:22:33 +01:00

SPU: analyser v4 and fixes

Build SPU cache after PPU, fix mixing progress
SPU ASMJIT: add support for Giga mode
SPU ASMJIT: use the same spu.log location as SPU LLVM
SPU: improve spu.log disasm
SPU: improve trampolines, unify with SPU ASMJIT
SPU: decode interrupt handler address from BR/BRA at 0x0
SPU LLVM: support Mega/Giga modes
SPU LLVM: implement function chunks
SPU LLVM: use PHI nodes, value visibility across basic blocks
SPU LLVM: implement function chunk table
New simple memory manager for LLVM (bugfix)
This commit is contained in:
Nekotekina 2018-06-10 15:46:01 +03:00
parent 3e433ef05c
commit e4da284176
7 changed files with 1577 additions and 572 deletions

View File

@ -308,6 +308,83 @@ struct MemoryManager : llvm::RTDyldMemoryManager
}
};
// Simple memory manager
struct MemoryManager2 : llvm::RTDyldMemoryManager
{
// Reserve 2 GiB
void* const m_memory = utils::memory_reserve(0x80000000);
u8* const m_code = static_cast<u8*>(m_memory) + 0x00000000;
u8* const m_data = static_cast<u8*>(m_memory) + 0x40000000;
u64 m_code_pos = 0;
u64 m_data_pos = 0;
MemoryManager2() = default;
~MemoryManager2() override
{
utils::memory_release(m_memory, 0x80000000);
}
u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
{
// Simple allocation
const u64 old = m_code_pos;
const u64 pos = ::align(m_code_pos, align);
m_code_pos = ::align(pos + size, align);
if (m_code_pos > 0x40000000)
{
LOG_FATAL(GENERAL, "LLVM: Out of code memory (size=0x%x, align=0x%x)", size, align);
return nullptr;
}
const u64 olda = ::align(old, 0x10000);
const u64 newa = ::align(m_code_pos, 0x10000);
if (olda != newa)
{
// Commit more memory
utils::memory_commit(m_code + olda, newa - olda, utils::protection::wx);
}
LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%x, align=0x%x)", sec_id, sec_name.data(), m_code + pos, size, align);
return m_code + pos;
}
u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
{
// Simple allocation
const u64 old = m_data_pos;
const u64 pos = ::align(m_data_pos, align);
m_data_pos = ::align(pos + size, align);
if (m_data_pos > 0x40000000)
{
LOG_FATAL(GENERAL, "LLVM: Out of data memory (size=0x%x, align=0x%x)", size, align);
return nullptr;
}
const u64 olda = ::align(old, 0x10000);
const u64 newa = ::align(m_data_pos, 0x10000);
if (olda != newa)
{
// Commit more memory
utils::memory_commit(m_data + olda, newa - olda);
}
LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%x, align=0x%x, %s)", sec_id, sec_name.data(), m_data + pos, size, align, is_ro ? "ro" : "rw");
return m_data + pos;
}
bool finalizeMemory(std::string* = nullptr) override
{
return false;
}
};
// Helper class
struct EventListener : llvm::JITEventListener
{
@ -383,7 +460,7 @@ public:
std::string name = m_path;
name.append(module->getName());
fs::file(name, fs::rewrite).write(obj.getBufferStart(), obj.getBufferSize());
LOG_SUCCESS(GENERAL, "LLVM: Created module: %s", module->getName().data());
LOG_NOTICE(GENERAL, "LLVM: Created module: %s", module->getName().data());
}
static std::unique_ptr<llvm::MemoryBuffer> load(const std::string& path)
@ -405,7 +482,7 @@ public:
if (auto buf = load(path))
{
LOG_SUCCESS(GENERAL, "LLVM: Loaded module: %s", module->getName().data());
LOG_NOTICE(GENERAL, "LLVM: Loaded module: %s", module->getName().data());
return buf;
}
@ -464,6 +541,7 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
m_engine.reset(llvm::EngineBuilder(std::make_unique<llvm::Module>("null_", m_context))
.setErrorStr(&result)
.setEngineKind(llvm::EngineKind::JIT)
.setMCJITMemoryManager(std::make_unique<MemoryManager2>())
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setCodeModel(large ? llvm::CodeModel::Large : llvm::CodeModel::Small)
.setMCPU(m_cpu)

View File

@ -1225,9 +1225,6 @@ extern void ppu_initialize()
fmt::throw_exception("Failed to create cache directory: %s (%s)", _main->cache, fs::g_tls_error);
}
// Initialize SPU cache
spu_cache::initialize();
if (Emu.IsStopped())
{
return;
@ -1248,6 +1245,9 @@ extern void ppu_initialize()
{
ppu_initialize(*ptr);
}
// Initialize SPU cache
spu_cache::initialize();
}
extern void ppu_initialize(const ppu_module& info)

View File

@ -7,6 +7,7 @@
#include "SPUThread.h"
#include "SPUInterpreter.h"
#include "Utilities/sysinfo.h"
#include "PPUAnalyser.h"
#include <cmath>
#include <mutex>
@ -32,6 +33,13 @@ std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_asmjit_recompiler
spu_runtime::spu_runtime()
{
m_cache_path = fxm::check_unlocked<ppu_module>()->cache;
if (g_cfg.core.spu_debug)
{
fs::file(m_cache_path + "spu.log", fs::rewrite);
}
LOG_SUCCESS(SPU, "SPU Recompiler Runtime (ASMJIT) initialized...");
// Initialize lookup table
@ -97,7 +105,12 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
using namespace asmjit;
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
dis_asm.offset = reinterpret_cast<const u8*>(func.data() + 1) - func[0];
dis_asm.offset = reinterpret_cast<const u8*>(func.data() + 1);
if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
{
dis_asm.offset -= func[0];
}
StringLogger logger;
logger.addOptions(Logger::kOptionBinaryForm);
@ -163,15 +176,16 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Start compilation
m_pos = func[0];
const u32 start = m_pos;
const u32 end = m_pos + (func.size() - 1) * 4;
m_size = ::size32(func) * 4 - 4;
const u32 start = m_pos * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
const u32 end = start + m_size;
// Create instruction labels (TODO: some of them are unnecessary)
for (u32 i = 1; i < func.size(); i++)
{
if (func[i])
{
instr_labels[i * 4 - 4 + m_pos] = c->newLabel();
instr_labels[i * 4 - 4 + start] = c->newLabel();
}
}
@ -210,15 +224,15 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
{
// Disable check (unsafe)
}
else if (func.size() - 1 == 1)
else if (m_size == 4)
{
c->cmp(x86::dword_ptr(*ls, m_pos), func[1]);
c->cmp(x86::dword_ptr(*ls, start), func[1]);
c->jnz(label_diff);
}
else if (func.size() - 1 == 2)
else if (m_size == 8)
{
c->mov(*qw1, static_cast<u64>(func[2]) << 32 | func[1]);
c->cmp(*qw1, x86::qword_ptr(*ls, m_pos));
c->cmp(*qw1, x86::qword_ptr(*ls, start));
c->jnz(label_diff);
}
else if (utils::has_512() && false)
@ -226,16 +240,15 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// AVX-512 optimized check using 512-bit registers (disabled)
words_align = 64;
const u32 starta = m_pos & -64;
const u32 starta = start & -64;
const u32 enda = ::align(end, 64);
const u32 sizea = (enda - starta) / 64;
verify(HERE), sizea;
// Initialize pointers
c->lea(x86::rax, x86::qword_ptr(label_code));
c->lea(*qw1, x86::qword_ptr(*ls, starta));
u32 code_off = 0;
u32 ls_off = starta;
u32 ls_off = -8192;
for (u32 j = starta; j < enda; j += 64)
{
@ -246,6 +259,8 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
continue;
}
const bool first = ls_off == -8192;
// Ensure small distance for disp8*N
if (j - ls_off >= 8192)
{
@ -279,7 +294,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
c->vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
}
if (j == starta)
if (first)
{
c->vpcmpud(x86::k1, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
}
@ -291,7 +306,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
for (u32 i = j; i < j + 64; i += 4)
{
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
}
code_off += 64;
@ -305,7 +320,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// AVX-512 optimized check using 256-bit registers
words_align = 32;
const u32 starta = m_pos & -32;
const u32 starta = start & -32;
const u32 enda = ::align(end, 32);
const u32 sizea = (enda - starta) / 32;
verify(HERE), sizea;
@ -330,10 +345,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
for (u32 i = starta; i < enda; i += 4)
{
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
}
}
else if (sizea == 2 && (end - m_pos) <= 32)
else if (sizea == 2 && (end - start) <= 32)
{
const u32 cmask0 = get_code_mask(starta, starta + 32);
const u32 cmask1 = get_code_mask(starta + 32, enda);
@ -347,7 +362,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
for (u32 i = starta; i < starta + 32; i += 4)
{
words.push_back(i >= m_pos ? func[(i - m_pos) / 4 + 1] : i + 32 < end ? func[(i + 32 - m_pos) / 4 + 1] : 0);
words.push_back(i >= start ? func[(i - start) / 4 + 1] : i + 32 < end ? func[(i + 32 - start) / 4 + 1] : 0);
}
}
else
@ -356,9 +371,8 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Initialize pointers
c->lea(x86::rax, x86::qword_ptr(label_code));
c->lea(*qw1, x86::qword_ptr(*ls, starta));
u32 code_off = 0;
u32 ls_off = starta;
u32 ls_off = -4096;
for (u32 j = starta; j < enda; j += 32)
{
@ -369,6 +383,8 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
continue;
}
const bool first = ls_off == -4096;
// Ensure small distance for disp8*N
if (j - ls_off >= 4096)
{
@ -398,7 +414,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
}
// Perform bitwise comparison and accumulate
if (j == starta)
if (first)
{
c->vpxor(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off));
}
@ -409,7 +425,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
for (u32 i = j; i < j + 32; i += 4)
{
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
}
code_off += 32;
@ -424,7 +440,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Mainstream AVX
words_align = 32;
const u32 starta = m_pos & -32;
const u32 starta = start & -32;
const u32 enda = ::align(end, 32);
const u32 sizea = (enda - starta) / 32;
verify(HERE), sizea;
@ -449,10 +465,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
for (u32 i = starta; i < enda; i += 4)
{
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
}
}
else if (sizea == 2 && (end - m_pos) <= 32)
else if (sizea == 2 && (end - start) <= 32)
{
const u32 cmask0 = get_code_mask(starta, starta + 32);
const u32 cmask1 = get_code_mask(starta + 32, enda);
@ -466,7 +482,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
for (u32 i = starta; i < starta + 32; i += 4)
{
words.push_back(i >= m_pos ? func[(i - m_pos) / 4 + 1] : i + 32 < end ? func[(i + 32 - m_pos) / 4 + 1] : 0);
words.push_back(i >= start ? func[(i - start) / 4 + 1] : i + 32 < end ? func[(i + 32 - start) / 4 + 1] : 0);
}
}
else
@ -541,7 +557,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
for (u32 i = j; i < j + 32; i += 4)
{
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
}
code_off += 32;
@ -568,7 +584,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Compatible SSE2
words_align = 16;
const u32 starta = m_pos & -16;
const u32 starta = start & -16;
const u32 enda = ::align(end, 16);
const u32 sizea = (enda - starta) / 16;
verify(HERE), sizea;
@ -614,10 +630,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
}
// Determine which value will be duplicated at hole positions
const u32 w3 = func.at((j - m_pos + ~::cntlz32(cmask, true) % 4 * 4) / 4 + 1);
words.push_back(cmask & 1 ? func[(j - m_pos + 0) / 4 + 1] : w3);
words.push_back(cmask & 2 ? func[(j - m_pos + 4) / 4 + 1] : w3);
words.push_back(cmask & 4 ? func[(j - m_pos + 8) / 4 + 1] : w3);
const u32 w3 = func.at((j - start + ~::cntlz32(cmask, true) % 4 * 4) / 4 + 1);
words.push_back(cmask & 1 ? func[(j - start + 0) / 4 + 1] : w3);
words.push_back(cmask & 2 ? func[(j - start + 4) / 4 + 1] : w3);
words.push_back(cmask & 4 ? func[(j - start + 8) / 4 + 1] : w3);
words.push_back(w3);
// PSHUFD immediate table for all possible hole mask values, holes repeat highest valid word
@ -641,7 +657,9 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
0b11100100, // full
};
const auto& dest = !order++ ? reg0 : reg1;
const bool first = !order++;
const auto& dest = first ? reg0 : reg1;
// Load aligned code block from LS
if (cmask != 0xf)
@ -656,7 +674,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Perform bitwise comparison and accumulate
c->xorps(dest, x86::dqword_ptr(x86::rax, code_off));
if (j != starta && j != starta + 16)
if (first)
{
c->orps(reg0, dest);
}
@ -690,24 +708,38 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
c->vzeroupper();
}
c->inc(SPU_OFF_64(block_counter));
// Acknowledge success and add statistics
c->add(SPU_OFF_64(block_counter), ::size32(words) / (words_align / 4));
if (g_cfg.core.spu_block_size == spu_block_size_type::giga && m_pos != start)
{
// Jump to the entry point if necessary
c->jmp(instr_labels[m_pos]);
m_pos = -1;
}
for (u32 i = 1; i < func.size(); i++)
{
const u32 pos = start + (i - 1) * 4;
const u32 op = se_storage<u32>::swap(func[i]);
if (g_cfg.core.spu_debug)
{
// Disasm
dis_asm.dump_pc = pos;
dis_asm.disasm(pos);
compiler.comment(dis_asm.last_opcode.c_str());
log += dis_asm.last_opcode;
log += '\n';
}
// Get opcode
const u32 op = se_storage<u32>::swap(func[i]);
if (op)
{
log += '>';
log += dis_asm.last_opcode;
log += '\n';
}
else
{
fmt::append(log, ">[%08x] xx xx xx xx: <hole>\n", pos);
}
}
if (!op)
{
@ -738,6 +770,12 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
c->bind(found->second);
}
if (g_cfg.core.spu_debug)
{
// Disasm inside the ASMJIT log
compiler.comment(dis_asm.last_opcode.c_str());
}
// Execute recompiler function
(this->*s_spu_decoder.decode(op))({op});
@ -751,6 +789,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
if (g_cfg.core.spu_debug)
{
log += '\n';
this->dump(log);
}
// Make fallthrough if necessary
@ -784,6 +823,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
c->align(kAlignData, 8);
c->bind(instr_table);
// Get actual instruction table bounds
const u32 start = instr_labels.begin()->first;
const u32 end = instr_labels.rbegin()->first + 4;
for (u32 addr = start; addr < end; addr += 4)
{
const auto found = instr_labels.find(addr);
@ -825,6 +868,22 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Register function
fn_location = fn;
if (g_cfg.core.spu_debug)
{
// Add ASMJIT logs
fmt::append(log, "Address: %p\n\n", fn);
log += logger.getString();
log += "\n\n\n";
// Append log file
fs::file(m_spurt->m_cache_path + "spu.log", fs::write + fs::append).write(log);
}
if (m_cache && g_cfg.core.spu_cache)
{
m_cache->add(func);
}
// Generate a dispatcher (übertrampoline)
std::vector<u32> addrv{func[0]};
const auto beg = m_spurt->m_map.lower_bound(addrv);
@ -886,6 +945,12 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
it = it2;
size1 = w.size - size2;
if (w.level >= w.beg->first.size())
{
// Cannot split: smallest function is a prefix of bigger ones (TODO)
break;
}
const u32 x1 = w.beg->first.at(w.level);
if (!x1)
@ -914,6 +979,20 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
}
}
if (w.label.isValid())
{
c->align(kAlignCode, 16);
c->bind(w.label);
}
if (w.level >= w.beg->first.size())
{
// If functions cannot be compared, assume smallest function
LOG_ERROR(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level);
c->jmp(imm_ptr(w.beg->second ? w.beg->second : &dispatch));
continue;
}
// Value for comparison
const u32 x = it->first.at(w.level);
@ -933,13 +1012,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
size2++;
}
if (w.label.isValid())
{
c->align(kAlignCode, 16);
c->bind(w.label);
}
c->cmp(x86::dword_ptr(*ls, func[0] + (w.level - 1) * 4), x);
c->cmp(x86::dword_ptr(*ls, start + (w.level - 1) * 4), x);
// Low subrange target label
Label label_below;
@ -1044,22 +1117,6 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
m_spurt->m_dispatcher[func[0] / 4] = tr;
}
if (g_cfg.core.spu_debug)
{
// Add ASMJIT logs
fmt::append(log, "Address: %p (%p)\n\n", fn, +m_spurt->m_dispatcher[func[0] / 4]);
log += logger.getString();
log += "\n\n\n";
// Append log file
fs::file(Emu.GetCachePath() + "SPUJIT.log", fs::write + fs::append).write(log);
}
if (m_cache && g_cfg.core.spu_cache)
{
m_cache->add(func);
}
return fn;
}
@ -1131,17 +1188,6 @@ static void check_state(SPUThread* _spu, spu_function_t _ret)
_ret = &check_state_ret;
}
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
{
// Get stack pointer, try to use native return address (check SPU return address)
const auto x = _spu->stack_mirror[(_spu->gpr[1]._u32[3] & 0x3fff0) >> 4];
if (x._u32[2] == _spu->pc)
{
_ret = reinterpret_cast<spu_function_t>(x._u64[0]);
}
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
@ -1195,36 +1241,12 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
{
using namespace asmjit;
if (g_cfg.core.spu_block_size != spu_block_size_type::giga && !jt)
{
// Simply external call (return or indirect call)
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
c->xor_(qw0->r32(), qw0->r32());
}
else
{
if (!instr_table.isValid())
{
// Request instruction table
instr_table = c->newLabel();
}
const u32 start = instr_labels.begin()->first;
const u32 end = instr_labels.rbegin()->first + 4;
// Load indirect jump address, choose between local and external
c->lea(x86::r10, x86::qword_ptr(instr_table));
c->lea(*qw1, x86::qword_ptr(*addr, 0 - start));
c->xor_(qw0->r32(), qw0->r32());
c->cmp(qw1->r32(), end - start);
c->cmovae(qw1->r32(), qw0->r32());
c->cmovb(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
c->cmovae(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
}
// Initialize third arg to zero
c->xor_(qw0->r32(), qw0->r32());
if (op.d)
{
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_8(interrupts_enabled), 0);
}
else if (op.e)
{
@ -1232,7 +1254,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
Label intr = c->newLabel();
Label fail = c->newLabel();
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_8(interrupts_enabled), 1);
c->mov(qw1->r32(), SPU_OFF_32(ch_event_mask));
c->test(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
c->jnz(fail);
@ -1244,19 +1266,50 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->mov(SPU_OFF_32(pc), *addr);
c->mov(addr->r64(), reinterpret_cast<u64>(vm::base(0xffdead00)));
c->mov(asmjit::x86::dword_ptr(addr->r64()), "INTR"_u32);
// Save addr in srr0 and disable interrupts
c->bind(intr);
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_32(srr0), *addr);
c->mov(*addr, qw0->r32());
c->mov(x86::r10, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher)));
// Test for BR/BRA instructions (they are equivalent at zero pc)
c->mov(*addr, x86::dword_ptr(*ls));
c->and_(*addr, 0xfffffffd);
c->xor_(*addr, 0x30);
c->bswap(*addr);
c->test(*addr, 0xff80007f);
c->cmovnz(*addr, qw0->r32());
c->shr(*addr, 5);
c->align(kAlignCode, 16);
c->bind(no_intr);
}
Label label_check = c->newLabel();
c->mov(SPU_OFF_32(pc), *addr);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_check);
if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga)
{
// Simply external call (return or indirect call)
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
}
else
{
if (!instr_table.isValid())
{
// Request instruction table
instr_table = c->newLabel();
}
// Get actual instruction table bounds
const u32 start = instr_labels.begin()->first;
const u32 end = instr_labels.rbegin()->first + 4;
// Load indirect jump address, choose between local and external
c->lea(*qw1, x86::qword_ptr(addr->r64(), 0 - start));
c->lea(x86::r10, x86::qword_ptr(instr_table));
c->cmp(qw1->r32(), end - start);
c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
c->cmovae(x86::r10, *qw1);
c->mov(x86::r10, x86::qword_ptr(x86::r10));
}
if (g_cfg.core.spu_block_size != spu_block_size_type::safe && ret)
{
@ -1268,6 +1321,10 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->cmove(x86::r10, x86::qword_ptr(*qw1));
}
Label label_check = c->newLabel();
c->mov(SPU_OFF_32(pc), *addr);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_check);
c->jmp(x86::r10);
c->bind(label_check);
c->mov(*ls, x86::r10);
@ -2856,9 +2913,9 @@ void spu_recompiler::STQX(spu_opcode_t op)
void spu_recompiler::BI(spu_opcode_t op)
{
const auto found = m_targets.find(m_pos);
const auto is_jt = found == m_targets.end() || found->second.size() != 1 || found->second.front() != -1;
const auto is_jt = found == m_targets.end() || found->second.size() > 1;
if (found == m_targets.end() || found->second.empty())
if (found == m_targets.end())
{
LOG_ERROR(SPU, "[0x%x] BI: no targets", m_pos);
}

View File

@ -19,6 +19,9 @@ class spu_runtime
// All dispatchers
std::array<atomic_t<spu_function_t>, 0x10000> m_dispatcher;
// Debug module output location
std::string m_cache_path;
friend class spu_recompiler;
public:

File diff suppressed because it is too large Load Diff

View File

@ -42,12 +42,18 @@ protected:
// GPR modified by the instruction (-1 = not set)
std::array<u8, 0x10000> m_regmod;
// List of possible targets for the instruction ({} = next instruction, {-1} = no targets)
// List of possible targets for the instruction (entry shouldn't exist for simple instructions)
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
// List of block predecessors (incomplete, doesn't include all fallthrough predecessors)
// List of block predecessors
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_preds;
// List of function entry points and return points (set after BRSL, BRASL, BISL, BISLED)
std::bitset<0x10000> m_entry_info;
// Compressed address of unique entry point for each instruction
std::array<u16, 0x10000> m_entry_map{};
std::shared_ptr<spu_cache> m_cache;
private:
@ -77,9 +83,15 @@ public:
// Get the block at specified address
std::vector<u32> block(const be_t<u32>* ls, u32 lsa);
// Print analyser internal state
void dump(std::string& out);
// Create recompiler instance (ASMJIT)
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler();
// Create recompiler instance (LLVM)
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler();
// Max number of registers (for m_regmod)
static constexpr u8 s_reg_max = 128;
};

View File

@ -527,6 +527,8 @@ void SPUThread::cpu_task()
jit_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
}
// Print some stats
LOG_NOTICE(SPU, "Stats: block %u (fails: %u);", block_counter, block_failure);
return;
}