mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
SPU: analyser v4 and fixes
Build SPU cache after PPU, fix mixing progress SPU ASMJIT: add support for Giga mode SPU ASMJIT: use the same spu.log location as SPU LLVM SPU: improve spu.log disasm SPU: improve trampolines, unify with SPU ASMJIT SPU: decode interrupt handler address from BR/BRA at 0x0 SPU LLVM: support Mega/Giga modes SPU LLVM: implement function chunks SPU LLVM: use PHI nodes, value visibility across basic blocks SPU LLVM: implement function chunk table New simple memory manager for LLVM (bugfix)
This commit is contained in:
parent
3e433ef05c
commit
e4da284176
@ -308,6 +308,83 @@ struct MemoryManager : llvm::RTDyldMemoryManager
|
||||
}
|
||||
};
|
||||
|
||||
// Simple memory manager
|
||||
struct MemoryManager2 : llvm::RTDyldMemoryManager
|
||||
{
|
||||
// Reserve 2 GiB
|
||||
void* const m_memory = utils::memory_reserve(0x80000000);
|
||||
|
||||
u8* const m_code = static_cast<u8*>(m_memory) + 0x00000000;
|
||||
u8* const m_data = static_cast<u8*>(m_memory) + 0x40000000;
|
||||
|
||||
u64 m_code_pos = 0;
|
||||
u64 m_data_pos = 0;
|
||||
|
||||
MemoryManager2() = default;
|
||||
|
||||
~MemoryManager2() override
|
||||
{
|
||||
utils::memory_release(m_memory, 0x80000000);
|
||||
}
|
||||
|
||||
u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
|
||||
{
|
||||
// Simple allocation
|
||||
const u64 old = m_code_pos;
|
||||
const u64 pos = ::align(m_code_pos, align);
|
||||
m_code_pos = ::align(pos + size, align);
|
||||
|
||||
if (m_code_pos > 0x40000000)
|
||||
{
|
||||
LOG_FATAL(GENERAL, "LLVM: Out of code memory (size=0x%x, align=0x%x)", size, align);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const u64 olda = ::align(old, 0x10000);
|
||||
const u64 newa = ::align(m_code_pos, 0x10000);
|
||||
|
||||
if (olda != newa)
|
||||
{
|
||||
// Commit more memory
|
||||
utils::memory_commit(m_code + olda, newa - olda, utils::protection::wx);
|
||||
}
|
||||
|
||||
LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%x, align=0x%x)", sec_id, sec_name.data(), m_code + pos, size, align);
|
||||
return m_code + pos;
|
||||
}
|
||||
|
||||
u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
|
||||
{
|
||||
// Simple allocation
|
||||
const u64 old = m_data_pos;
|
||||
const u64 pos = ::align(m_data_pos, align);
|
||||
m_data_pos = ::align(pos + size, align);
|
||||
|
||||
if (m_data_pos > 0x40000000)
|
||||
{
|
||||
LOG_FATAL(GENERAL, "LLVM: Out of data memory (size=0x%x, align=0x%x)", size, align);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const u64 olda = ::align(old, 0x10000);
|
||||
const u64 newa = ::align(m_data_pos, 0x10000);
|
||||
|
||||
if (olda != newa)
|
||||
{
|
||||
// Commit more memory
|
||||
utils::memory_commit(m_data + olda, newa - olda);
|
||||
}
|
||||
|
||||
LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%x, align=0x%x, %s)", sec_id, sec_name.data(), m_data + pos, size, align, is_ro ? "ro" : "rw");
|
||||
return m_data + pos;
|
||||
}
|
||||
|
||||
bool finalizeMemory(std::string* = nullptr) override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper class
|
||||
struct EventListener : llvm::JITEventListener
|
||||
{
|
||||
@ -383,7 +460,7 @@ public:
|
||||
std::string name = m_path;
|
||||
name.append(module->getName());
|
||||
fs::file(name, fs::rewrite).write(obj.getBufferStart(), obj.getBufferSize());
|
||||
LOG_SUCCESS(GENERAL, "LLVM: Created module: %s", module->getName().data());
|
||||
LOG_NOTICE(GENERAL, "LLVM: Created module: %s", module->getName().data());
|
||||
}
|
||||
|
||||
static std::unique_ptr<llvm::MemoryBuffer> load(const std::string& path)
|
||||
@ -405,7 +482,7 @@ public:
|
||||
|
||||
if (auto buf = load(path))
|
||||
{
|
||||
LOG_SUCCESS(GENERAL, "LLVM: Loaded module: %s", module->getName().data());
|
||||
LOG_NOTICE(GENERAL, "LLVM: Loaded module: %s", module->getName().data());
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -464,6 +541,7 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||
m_engine.reset(llvm::EngineBuilder(std::make_unique<llvm::Module>("null_", m_context))
|
||||
.setErrorStr(&result)
|
||||
.setEngineKind(llvm::EngineKind::JIT)
|
||||
.setMCJITMemoryManager(std::make_unique<MemoryManager2>())
|
||||
.setOptLevel(llvm::CodeGenOpt::Aggressive)
|
||||
.setCodeModel(large ? llvm::CodeModel::Large : llvm::CodeModel::Small)
|
||||
.setMCPU(m_cpu)
|
||||
|
@ -1225,9 +1225,6 @@ extern void ppu_initialize()
|
||||
fmt::throw_exception("Failed to create cache directory: %s (%s)", _main->cache, fs::g_tls_error);
|
||||
}
|
||||
|
||||
// Initialize SPU cache
|
||||
spu_cache::initialize();
|
||||
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
return;
|
||||
@ -1248,6 +1245,9 @@ extern void ppu_initialize()
|
||||
{
|
||||
ppu_initialize(*ptr);
|
||||
}
|
||||
|
||||
// Initialize SPU cache
|
||||
spu_cache::initialize();
|
||||
}
|
||||
|
||||
extern void ppu_initialize(const ppu_module& info)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "SPUThread.h"
|
||||
#include "SPUInterpreter.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
#include "PPUAnalyser.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <mutex>
|
||||
@ -32,6 +33,13 @@ std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_asmjit_recompiler
|
||||
|
||||
spu_runtime::spu_runtime()
|
||||
{
|
||||
m_cache_path = fxm::check_unlocked<ppu_module>()->cache;
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
fs::file(m_cache_path + "spu.log", fs::rewrite);
|
||||
}
|
||||
|
||||
LOG_SUCCESS(SPU, "SPU Recompiler Runtime (ASMJIT) initialized...");
|
||||
|
||||
// Initialize lookup table
|
||||
@ -97,7 +105,12 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
using namespace asmjit;
|
||||
|
||||
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
|
||||
dis_asm.offset = reinterpret_cast<const u8*>(func.data() + 1) - func[0];
|
||||
dis_asm.offset = reinterpret_cast<const u8*>(func.data() + 1);
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
{
|
||||
dis_asm.offset -= func[0];
|
||||
}
|
||||
|
||||
StringLogger logger;
|
||||
logger.addOptions(Logger::kOptionBinaryForm);
|
||||
@ -163,15 +176,16 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
// Start compilation
|
||||
m_pos = func[0];
|
||||
const u32 start = m_pos;
|
||||
const u32 end = m_pos + (func.size() - 1) * 4;
|
||||
m_size = ::size32(func) * 4 - 4;
|
||||
const u32 start = m_pos * (g_cfg.core.spu_block_size != spu_block_size_type::giga);
|
||||
const u32 end = start + m_size;
|
||||
|
||||
// Create instruction labels (TODO: some of them are unnecessary)
|
||||
for (u32 i = 1; i < func.size(); i++)
|
||||
{
|
||||
if (func[i])
|
||||
{
|
||||
instr_labels[i * 4 - 4 + m_pos] = c->newLabel();
|
||||
instr_labels[i * 4 - 4 + start] = c->newLabel();
|
||||
}
|
||||
}
|
||||
|
||||
@ -210,15 +224,15 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
{
|
||||
// Disable check (unsafe)
|
||||
}
|
||||
else if (func.size() - 1 == 1)
|
||||
else if (m_size == 4)
|
||||
{
|
||||
c->cmp(x86::dword_ptr(*ls, m_pos), func[1]);
|
||||
c->cmp(x86::dword_ptr(*ls, start), func[1]);
|
||||
c->jnz(label_diff);
|
||||
}
|
||||
else if (func.size() - 1 == 2)
|
||||
else if (m_size == 8)
|
||||
{
|
||||
c->mov(*qw1, static_cast<u64>(func[2]) << 32 | func[1]);
|
||||
c->cmp(*qw1, x86::qword_ptr(*ls, m_pos));
|
||||
c->cmp(*qw1, x86::qword_ptr(*ls, start));
|
||||
c->jnz(label_diff);
|
||||
}
|
||||
else if (utils::has_512() && false)
|
||||
@ -226,16 +240,15 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
// AVX-512 optimized check using 512-bit registers (disabled)
|
||||
words_align = 64;
|
||||
|
||||
const u32 starta = m_pos & -64;
|
||||
const u32 starta = start & -64;
|
||||
const u32 enda = ::align(end, 64);
|
||||
const u32 sizea = (enda - starta) / 64;
|
||||
verify(HERE), sizea;
|
||||
|
||||
// Initialize pointers
|
||||
c->lea(x86::rax, x86::qword_ptr(label_code));
|
||||
c->lea(*qw1, x86::qword_ptr(*ls, starta));
|
||||
u32 code_off = 0;
|
||||
u32 ls_off = starta;
|
||||
u32 ls_off = -8192;
|
||||
|
||||
for (u32 j = starta; j < enda; j += 64)
|
||||
{
|
||||
@ -246,6 +259,8 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool first = ls_off == -8192;
|
||||
|
||||
// Ensure small distance for disp8*N
|
||||
if (j - ls_off >= 8192)
|
||||
{
|
||||
@ -279,7 +294,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
c->vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
|
||||
}
|
||||
|
||||
if (j == starta)
|
||||
if (first)
|
||||
{
|
||||
c->vpcmpud(x86::k1, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
|
||||
}
|
||||
@ -291,7 +306,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
for (u32 i = j; i < j + 64; i += 4)
|
||||
{
|
||||
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
|
||||
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
|
||||
}
|
||||
|
||||
code_off += 64;
|
||||
@ -305,7 +320,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
// AVX-512 optimized check using 256-bit registers
|
||||
words_align = 32;
|
||||
|
||||
const u32 starta = m_pos & -32;
|
||||
const u32 starta = start & -32;
|
||||
const u32 enda = ::align(end, 32);
|
||||
const u32 sizea = (enda - starta) / 32;
|
||||
verify(HERE), sizea;
|
||||
@ -330,10 +345,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
for (u32 i = starta; i < enda; i += 4)
|
||||
{
|
||||
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
|
||||
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
|
||||
}
|
||||
}
|
||||
else if (sizea == 2 && (end - m_pos) <= 32)
|
||||
else if (sizea == 2 && (end - start) <= 32)
|
||||
{
|
||||
const u32 cmask0 = get_code_mask(starta, starta + 32);
|
||||
const u32 cmask1 = get_code_mask(starta + 32, enda);
|
||||
@ -347,7 +362,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
for (u32 i = starta; i < starta + 32; i += 4)
|
||||
{
|
||||
words.push_back(i >= m_pos ? func[(i - m_pos) / 4 + 1] : i + 32 < end ? func[(i + 32 - m_pos) / 4 + 1] : 0);
|
||||
words.push_back(i >= start ? func[(i - start) / 4 + 1] : i + 32 < end ? func[(i + 32 - start) / 4 + 1] : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -356,9 +371,8 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
// Initialize pointers
|
||||
c->lea(x86::rax, x86::qword_ptr(label_code));
|
||||
c->lea(*qw1, x86::qword_ptr(*ls, starta));
|
||||
u32 code_off = 0;
|
||||
u32 ls_off = starta;
|
||||
u32 ls_off = -4096;
|
||||
|
||||
for (u32 j = starta; j < enda; j += 32)
|
||||
{
|
||||
@ -369,6 +383,8 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool first = ls_off == -4096;
|
||||
|
||||
// Ensure small distance for disp8*N
|
||||
if (j - ls_off >= 4096)
|
||||
{
|
||||
@ -398,7 +414,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
}
|
||||
|
||||
// Perform bitwise comparison and accumulate
|
||||
if (j == starta)
|
||||
if (first)
|
||||
{
|
||||
c->vpxor(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off));
|
||||
}
|
||||
@ -409,7 +425,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
for (u32 i = j; i < j + 32; i += 4)
|
||||
{
|
||||
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
|
||||
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
|
||||
}
|
||||
|
||||
code_off += 32;
|
||||
@ -424,7 +440,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
// Mainstream AVX
|
||||
words_align = 32;
|
||||
|
||||
const u32 starta = m_pos & -32;
|
||||
const u32 starta = start & -32;
|
||||
const u32 enda = ::align(end, 32);
|
||||
const u32 sizea = (enda - starta) / 32;
|
||||
verify(HERE), sizea;
|
||||
@ -449,10 +465,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
for (u32 i = starta; i < enda; i += 4)
|
||||
{
|
||||
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
|
||||
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
|
||||
}
|
||||
}
|
||||
else if (sizea == 2 && (end - m_pos) <= 32)
|
||||
else if (sizea == 2 && (end - start) <= 32)
|
||||
{
|
||||
const u32 cmask0 = get_code_mask(starta, starta + 32);
|
||||
const u32 cmask1 = get_code_mask(starta + 32, enda);
|
||||
@ -466,7 +482,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
for (u32 i = starta; i < starta + 32; i += 4)
|
||||
{
|
||||
words.push_back(i >= m_pos ? func[(i - m_pos) / 4 + 1] : i + 32 < end ? func[(i + 32 - m_pos) / 4 + 1] : 0);
|
||||
words.push_back(i >= start ? func[(i - start) / 4 + 1] : i + 32 < end ? func[(i + 32 - start) / 4 + 1] : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -541,7 +557,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
|
||||
for (u32 i = j; i < j + 32; i += 4)
|
||||
{
|
||||
words.push_back(i >= m_pos && i < end ? func[(i - m_pos) / 4 + 1] : 0);
|
||||
words.push_back(i >= start && i < end ? func[(i - start) / 4 + 1] : 0);
|
||||
}
|
||||
|
||||
code_off += 32;
|
||||
@ -568,7 +584,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
// Compatible SSE2
|
||||
words_align = 16;
|
||||
|
||||
const u32 starta = m_pos & -16;
|
||||
const u32 starta = start & -16;
|
||||
const u32 enda = ::align(end, 16);
|
||||
const u32 sizea = (enda - starta) / 16;
|
||||
verify(HERE), sizea;
|
||||
@ -614,10 +630,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
}
|
||||
|
||||
// Determine which value will be duplicated at hole positions
|
||||
const u32 w3 = func.at((j - m_pos + ~::cntlz32(cmask, true) % 4 * 4) / 4 + 1);
|
||||
words.push_back(cmask & 1 ? func[(j - m_pos + 0) / 4 + 1] : w3);
|
||||
words.push_back(cmask & 2 ? func[(j - m_pos + 4) / 4 + 1] : w3);
|
||||
words.push_back(cmask & 4 ? func[(j - m_pos + 8) / 4 + 1] : w3);
|
||||
const u32 w3 = func.at((j - start + ~::cntlz32(cmask, true) % 4 * 4) / 4 + 1);
|
||||
words.push_back(cmask & 1 ? func[(j - start + 0) / 4 + 1] : w3);
|
||||
words.push_back(cmask & 2 ? func[(j - start + 4) / 4 + 1] : w3);
|
||||
words.push_back(cmask & 4 ? func[(j - start + 8) / 4 + 1] : w3);
|
||||
words.push_back(w3);
|
||||
|
||||
// PSHUFD immediate table for all possible hole mask values, holes repeat highest valid word
|
||||
@ -641,7 +657,9 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
0b11100100, // full
|
||||
};
|
||||
|
||||
const auto& dest = !order++ ? reg0 : reg1;
|
||||
const bool first = !order++;
|
||||
|
||||
const auto& dest = first ? reg0 : reg1;
|
||||
|
||||
// Load aligned code block from LS
|
||||
if (cmask != 0xf)
|
||||
@ -656,7 +674,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
// Perform bitwise comparison and accumulate
|
||||
c->xorps(dest, x86::dqword_ptr(x86::rax, code_off));
|
||||
|
||||
if (j != starta && j != starta + 16)
|
||||
if (first)
|
||||
{
|
||||
c->orps(reg0, dest);
|
||||
}
|
||||
@ -690,24 +708,38 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
c->vzeroupper();
|
||||
}
|
||||
|
||||
c->inc(SPU_OFF_64(block_counter));
|
||||
// Acknowledge success and add statistics
|
||||
c->add(SPU_OFF_64(block_counter), ::size32(words) / (words_align / 4));
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga && m_pos != start)
|
||||
{
|
||||
// Jump to the entry point if necessary
|
||||
c->jmp(instr_labels[m_pos]);
|
||||
m_pos = -1;
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < func.size(); i++)
|
||||
{
|
||||
const u32 pos = start + (i - 1) * 4;
|
||||
const u32 op = se_storage<u32>::swap(func[i]);
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Disasm
|
||||
dis_asm.dump_pc = pos;
|
||||
dis_asm.disasm(pos);
|
||||
compiler.comment(dis_asm.last_opcode.c_str());
|
||||
|
||||
if (op)
|
||||
{
|
||||
log += '>';
|
||||
log += dis_asm.last_opcode;
|
||||
log += '\n';
|
||||
}
|
||||
|
||||
// Get opcode
|
||||
const u32 op = se_storage<u32>::swap(func[i]);
|
||||
else
|
||||
{
|
||||
fmt::append(log, ">[%08x] xx xx xx xx: <hole>\n", pos);
|
||||
}
|
||||
}
|
||||
|
||||
if (!op)
|
||||
{
|
||||
@ -738,6 +770,12 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
c->bind(found->second);
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Disasm inside the ASMJIT log
|
||||
compiler.comment(dis_asm.last_opcode.c_str());
|
||||
}
|
||||
|
||||
// Execute recompiler function
|
||||
(this->*s_spu_decoder.decode(op))({op});
|
||||
|
||||
@ -751,6 +789,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
log += '\n';
|
||||
this->dump(log);
|
||||
}
|
||||
|
||||
// Make fallthrough if necessary
|
||||
@ -784,6 +823,10 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
c->align(kAlignData, 8);
|
||||
c->bind(instr_table);
|
||||
|
||||
// Get actual instruction table bounds
|
||||
const u32 start = instr_labels.begin()->first;
|
||||
const u32 end = instr_labels.rbegin()->first + 4;
|
||||
|
||||
for (u32 addr = start; addr < end; addr += 4)
|
||||
{
|
||||
const auto found = instr_labels.find(addr);
|
||||
@ -825,6 +868,22 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
// Register function
|
||||
fn_location = fn;
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Add ASMJIT logs
|
||||
fmt::append(log, "Address: %p\n\n", fn);
|
||||
log += logger.getString();
|
||||
log += "\n\n\n";
|
||||
|
||||
// Append log file
|
||||
fs::file(m_spurt->m_cache_path + "spu.log", fs::write + fs::append).write(log);
|
||||
}
|
||||
|
||||
if (m_cache && g_cfg.core.spu_cache)
|
||||
{
|
||||
m_cache->add(func);
|
||||
}
|
||||
|
||||
// Generate a dispatcher (übertrampoline)
|
||||
std::vector<u32> addrv{func[0]};
|
||||
const auto beg = m_spurt->m_map.lower_bound(addrv);
|
||||
@ -886,6 +945,12 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
it = it2;
|
||||
size1 = w.size - size2;
|
||||
|
||||
if (w.level >= w.beg->first.size())
|
||||
{
|
||||
// Cannot split: smallest function is a prefix of bigger ones (TODO)
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 x1 = w.beg->first.at(w.level);
|
||||
|
||||
if (!x1)
|
||||
@ -914,6 +979,20 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
}
|
||||
}
|
||||
|
||||
if (w.label.isValid())
|
||||
{
|
||||
c->align(kAlignCode, 16);
|
||||
c->bind(w.label);
|
||||
}
|
||||
|
||||
if (w.level >= w.beg->first.size())
|
||||
{
|
||||
// If functions cannot be compared, assume smallest function
|
||||
LOG_ERROR(SPU, "Trampoline simplified at 0x%x (level=%u)", func[0], w.level);
|
||||
c->jmp(imm_ptr(w.beg->second ? w.beg->second : &dispatch));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Value for comparison
|
||||
const u32 x = it->first.at(w.level);
|
||||
|
||||
@ -933,13 +1012,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
size2++;
|
||||
}
|
||||
|
||||
if (w.label.isValid())
|
||||
{
|
||||
c->align(kAlignCode, 16);
|
||||
c->bind(w.label);
|
||||
}
|
||||
|
||||
c->cmp(x86::dword_ptr(*ls, func[0] + (w.level - 1) * 4), x);
|
||||
c->cmp(x86::dword_ptr(*ls, start + (w.level - 1) * 4), x);
|
||||
|
||||
// Low subrange target label
|
||||
Label label_below;
|
||||
@ -1044,22 +1117,6 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
|
||||
m_spurt->m_dispatcher[func[0] / 4] = tr;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Add ASMJIT logs
|
||||
fmt::append(log, "Address: %p (%p)\n\n", fn, +m_spurt->m_dispatcher[func[0] / 4]);
|
||||
log += logger.getString();
|
||||
log += "\n\n\n";
|
||||
|
||||
// Append log file
|
||||
fs::file(Emu.GetCachePath() + "SPUJIT.log", fs::write + fs::append).write(log);
|
||||
}
|
||||
|
||||
if (m_cache && g_cfg.core.spu_cache)
|
||||
{
|
||||
m_cache->add(func);
|
||||
}
|
||||
|
||||
return fn;
|
||||
}
|
||||
|
||||
@ -1131,17 +1188,6 @@ static void check_state(SPUThread* _spu, spu_function_t _ret)
|
||||
_ret = &check_state_ret;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
// Get stack pointer, try to use native return address (check SPU return address)
|
||||
const auto x = _spu->stack_mirror[(_spu->gpr[1]._u32[3] & 0x3fff0) >> 4];
|
||||
|
||||
if (x._u32[2] == _spu->pc)
|
||||
{
|
||||
_ret = reinterpret_cast<spu_function_t>(x._u64[0]);
|
||||
}
|
||||
}
|
||||
|
||||
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
|
||||
}
|
||||
|
||||
@ -1195,36 +1241,12 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::giga && !jt)
|
||||
{
|
||||
// Simply external call (return or indirect call)
|
||||
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
// Initialize third arg to zero
|
||||
c->xor_(qw0->r32(), qw0->r32());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!instr_table.isValid())
|
||||
{
|
||||
// Request instruction table
|
||||
instr_table = c->newLabel();
|
||||
}
|
||||
|
||||
const u32 start = instr_labels.begin()->first;
|
||||
const u32 end = instr_labels.rbegin()->first + 4;
|
||||
|
||||
// Load indirect jump address, choose between local and external
|
||||
c->lea(x86::r10, x86::qword_ptr(instr_table));
|
||||
c->lea(*qw1, x86::qword_ptr(*addr, 0 - start));
|
||||
c->xor_(qw0->r32(), qw0->r32());
|
||||
c->cmp(qw1->r32(), end - start);
|
||||
c->cmovae(qw1->r32(), qw0->r32());
|
||||
c->cmovb(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
|
||||
c->cmovae(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
}
|
||||
|
||||
if (op.d)
|
||||
{
|
||||
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
|
||||
c->mov(SPU_OFF_8(interrupts_enabled), 0);
|
||||
}
|
||||
else if (op.e)
|
||||
{
|
||||
@ -1232,7 +1254,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
||||
Label intr = c->newLabel();
|
||||
Label fail = c->newLabel();
|
||||
|
||||
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
|
||||
c->mov(SPU_OFF_8(interrupts_enabled), 1);
|
||||
c->mov(qw1->r32(), SPU_OFF_32(ch_event_mask));
|
||||
c->test(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
|
||||
c->jnz(fail);
|
||||
@ -1244,19 +1266,50 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(addr->r64(), reinterpret_cast<u64>(vm::base(0xffdead00)));
|
||||
c->mov(asmjit::x86::dword_ptr(addr->r64()), "INTR"_u32);
|
||||
|
||||
// Save addr in srr0 and disable interrupts
|
||||
c->bind(intr);
|
||||
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
|
||||
c->mov(SPU_OFF_8(interrupts_enabled), 0);
|
||||
c->mov(SPU_OFF_32(srr0), *addr);
|
||||
c->mov(*addr, qw0->r32());
|
||||
c->mov(x86::r10, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher)));
|
||||
|
||||
// Test for BR/BRA instructions (they are equivalent at zero pc)
|
||||
c->mov(*addr, x86::dword_ptr(*ls));
|
||||
c->and_(*addr, 0xfffffffd);
|
||||
c->xor_(*addr, 0x30);
|
||||
c->bswap(*addr);
|
||||
c->test(*addr, 0xff80007f);
|
||||
c->cmovnz(*addr, qw0->r32());
|
||||
c->shr(*addr, 5);
|
||||
c->align(kAlignCode, 16);
|
||||
c->bind(no_intr);
|
||||
}
|
||||
|
||||
Label label_check = c->newLabel();
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->cmp(SPU_OFF_32(state), 0);
|
||||
c->jnz(label_check);
|
||||
if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
{
|
||||
// Simply external call (return or indirect call)
|
||||
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!instr_table.isValid())
|
||||
{
|
||||
// Request instruction table
|
||||
instr_table = c->newLabel();
|
||||
}
|
||||
|
||||
// Get actual instruction table bounds
|
||||
const u32 start = instr_labels.begin()->first;
|
||||
const u32 end = instr_labels.rbegin()->first + 4;
|
||||
|
||||
// Load indirect jump address, choose between local and external
|
||||
c->lea(*qw1, x86::qword_ptr(addr->r64(), 0 - start));
|
||||
c->lea(x86::r10, x86::qword_ptr(instr_table));
|
||||
c->cmp(qw1->r32(), end - start);
|
||||
c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
|
||||
c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
c->cmovae(x86::r10, *qw1);
|
||||
c->mov(x86::r10, x86::qword_ptr(x86::r10));
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe && ret)
|
||||
{
|
||||
@ -1268,6 +1321,10 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
||||
c->cmove(x86::r10, x86::qword_ptr(*qw1));
|
||||
}
|
||||
|
||||
Label label_check = c->newLabel();
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->cmp(SPU_OFF_32(state), 0);
|
||||
c->jnz(label_check);
|
||||
c->jmp(x86::r10);
|
||||
c->bind(label_check);
|
||||
c->mov(*ls, x86::r10);
|
||||
@ -2856,9 +2913,9 @@ void spu_recompiler::STQX(spu_opcode_t op)
|
||||
void spu_recompiler::BI(spu_opcode_t op)
|
||||
{
|
||||
const auto found = m_targets.find(m_pos);
|
||||
const auto is_jt = found == m_targets.end() || found->second.size() != 1 || found->second.front() != -1;
|
||||
const auto is_jt = found == m_targets.end() || found->second.size() > 1;
|
||||
|
||||
if (found == m_targets.end() || found->second.empty())
|
||||
if (found == m_targets.end())
|
||||
{
|
||||
LOG_ERROR(SPU, "[0x%x] BI: no targets", m_pos);
|
||||
}
|
||||
|
@ -19,6 +19,9 @@ class spu_runtime
|
||||
// All dispatchers
|
||||
std::array<atomic_t<spu_function_t>, 0x10000> m_dispatcher;
|
||||
|
||||
// Debug module output location
|
||||
std::string m_cache_path;
|
||||
|
||||
friend class spu_recompiler;
|
||||
|
||||
public:
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -42,12 +42,18 @@ protected:
|
||||
// GPR modified by the instruction (-1 = not set)
|
||||
std::array<u8, 0x10000> m_regmod;
|
||||
|
||||
// List of possible targets for the instruction ({} = next instruction, {-1} = no targets)
|
||||
// List of possible targets for the instruction (entry shouldn't exist for simple instructions)
|
||||
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
|
||||
|
||||
// List of block predecessors (incomplete, doesn't include all fallthrough predecessors)
|
||||
// List of block predecessors
|
||||
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_preds;
|
||||
|
||||
// List of function entry points and return points (set after BRSL, BRASL, BISL, BISLED)
|
||||
std::bitset<0x10000> m_entry_info;
|
||||
|
||||
// Compressed address of unique entry point for each instruction
|
||||
std::array<u16, 0x10000> m_entry_map{};
|
||||
|
||||
std::shared_ptr<spu_cache> m_cache;
|
||||
|
||||
private:
|
||||
@ -77,9 +83,15 @@ public:
|
||||
// Get the block at specified address
|
||||
std::vector<u32> block(const be_t<u32>* ls, u32 lsa);
|
||||
|
||||
// Print analyser internal state
|
||||
void dump(std::string& out);
|
||||
|
||||
// Create recompiler instance (ASMJIT)
|
||||
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler();
|
||||
|
||||
// Create recompiler instance (LLVM)
|
||||
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler();
|
||||
|
||||
// Max number of registers (for m_regmod)
|
||||
static constexpr u8 s_reg_max = 128;
|
||||
};
|
||||
|
@ -527,6 +527,8 @@ void SPUThread::cpu_task()
|
||||
jit_dispatcher[pc / 4](*this, vm::_ptr<u8>(offset), nullptr);
|
||||
}
|
||||
|
||||
// Print some stats
|
||||
LOG_NOTICE(SPU, "Stats: block %u (fails: %u);", block_counter, block_failure);
|
||||
return;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user