mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 20:22:30 +01:00
SPU Re: more precise jt generation
Improve analyser, set v1 Fix branch indirect conditional
This commit is contained in:
parent
737db90058
commit
be5c18cc85
@ -1156,10 +1156,18 @@ void spu_recompiler::branch_fixed(u32 target)
|
||||
c->jmp(x86::rax);
|
||||
}
|
||||
|
||||
void spu_recompiler::branch_indirect(spu_opcode_t op)
|
||||
void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::safe && !jt)
|
||||
{
|
||||
// Simply external call (return or indirect call)
|
||||
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
c->xor_(qw0->r32(), qw0->r32());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!instr_table.isValid())
|
||||
{
|
||||
// Request instruction table
|
||||
@ -1177,6 +1185,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op)
|
||||
c->cmovae(qw1->r32(), qw0->r32());
|
||||
c->cmovb(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
|
||||
c->cmovae(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
|
||||
}
|
||||
|
||||
if (op.d)
|
||||
{
|
||||
@ -2741,7 +2750,7 @@ void spu_recompiler::BI(spu_opcode_t op)
|
||||
{
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op);
|
||||
branch_indirect(op, verify(HERE, m_targets[m_pos].size()) > 2);
|
||||
m_pos = -1;
|
||||
}
|
||||
|
||||
|
@ -107,7 +107,7 @@ private:
|
||||
asmjit::X86Mem XmmConst(__m128i data);
|
||||
|
||||
void branch_fixed(u32 target);
|
||||
void branch_indirect(spu_opcode_t op);
|
||||
void branch_indirect(spu_opcode_t op, bool jt = false);
|
||||
void fall(spu_opcode_t op);
|
||||
void save_rcx();
|
||||
void load_rcx();
|
||||
|
@ -89,7 +89,7 @@ void spu_cache::initialize()
|
||||
}
|
||||
|
||||
// SPU cache file (version + block size type)
|
||||
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v0.dat";
|
||||
const std::string loc = _main->cache + u8"spu-§" + fmt::to_lower(g_cfg.core.spu_block_size.to_string()) + "-v1.dat";
|
||||
|
||||
auto cache = std::make_shared<spu_cache>(loc);
|
||||
|
||||
@ -272,14 +272,16 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
result.push_back(lsa);
|
||||
|
||||
// Initialize block entries
|
||||
std::bitset<0x10000>& blocks = m_block_info;
|
||||
blocks.reset();
|
||||
blocks.set(lsa / 4);
|
||||
m_block_info.reset();
|
||||
m_block_info.set(lsa / 4);
|
||||
|
||||
// Simple block entry workload list
|
||||
std::vector<u32> wl;
|
||||
wl.push_back(lsa);
|
||||
|
||||
m_regmod.fill(0xff);
|
||||
m_targets.clear();
|
||||
|
||||
// Value flags (TODO)
|
||||
enum class vf : u32
|
||||
{
|
||||
@ -310,9 +312,9 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
if (target > lsa)
|
||||
{
|
||||
// Check for redundancy
|
||||
if (!blocks[target / 4])
|
||||
if (!m_block_info[target / 4])
|
||||
{
|
||||
blocks[target / 4] = true;
|
||||
m_block_info[target / 4] = true;
|
||||
wl.push_back(target);
|
||||
return;
|
||||
}
|
||||
@ -325,6 +327,8 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
|
||||
wl[wi] += 4;
|
||||
|
||||
m_targets.erase(pos);
|
||||
|
||||
// Analyse instruction
|
||||
switch (const auto type = s_spu_itype.decode(data))
|
||||
{
|
||||
@ -336,7 +340,8 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
case spu_itype::DFTSV:
|
||||
{
|
||||
// Stop before invalid instructions (TODO)
|
||||
blocks[pos / 4] = true;
|
||||
m_targets[pos].push_back(-1);
|
||||
m_block_info[pos / 4] = true;
|
||||
next_block();
|
||||
continue;
|
||||
}
|
||||
@ -349,7 +354,8 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
if (data == 0 || data == 3)
|
||||
{
|
||||
// Stop before null data
|
||||
blocks[pos / 4] = true;
|
||||
m_targets[pos].push_back(-1);
|
||||
m_block_info[pos / 4] = true;
|
||||
next_block();
|
||||
continue;
|
||||
}
|
||||
@ -357,6 +363,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
{
|
||||
// Stop on special instructions (TODO)
|
||||
m_targets[pos].push_back(-1);
|
||||
next_block();
|
||||
break;
|
||||
}
|
||||
@ -366,6 +373,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
|
||||
case spu_itype::IRET:
|
||||
{
|
||||
m_targets[pos].push_back(-1);
|
||||
next_block();
|
||||
break;
|
||||
}
|
||||
@ -382,6 +390,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
|
||||
if (type == spu_itype::BISL)
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = pos + 4;
|
||||
}
|
||||
@ -389,23 +398,24 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
if (test(af, vf::is_const))
|
||||
{
|
||||
const u32 target = spu_branch_target(av);
|
||||
LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to 0x%x", lsa, pos, target);
|
||||
|
||||
if (target == pos + 4)
|
||||
{
|
||||
// Nop (unless BISL)
|
||||
break;
|
||||
LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to next!", lsa, pos);
|
||||
}
|
||||
|
||||
m_targets[pos].push_back(target);
|
||||
|
||||
if (type != spu_itype::BISL || g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
{
|
||||
LOG_WARNING(SPU, "[0x%x] At 0x%x: indirect branch to 0x%x", lsa, pos, target);
|
||||
add_block(target);
|
||||
}
|
||||
|
||||
if (type == spu_itype::BISL && target < lsa)
|
||||
if (type == spu_itype::BISL && target >= lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
{
|
||||
next_block();
|
||||
break;
|
||||
add_block(pos + 4);
|
||||
}
|
||||
}
|
||||
else if (type == spu_itype::BI && !op.d && !op.e)
|
||||
@ -488,6 +498,8 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
add_block(jt_abs[i]);
|
||||
result[(start - lsa) / 4 + 1 + i] = se_storage<u32>::swap(jt_abs[i]);
|
||||
}
|
||||
|
||||
m_targets.emplace(pos, std::move(jt_abs));
|
||||
}
|
||||
|
||||
if (jt_rel.size() >= jt_abs.size())
|
||||
@ -504,19 +516,33 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
add_block(jt_rel[i]);
|
||||
result[(start - lsa) / 4 + 1 + i] = se_storage<u32>::swap(jt_rel[i] - start);
|
||||
}
|
||||
|
||||
m_targets.emplace(pos, std::move(jt_rel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (type == spu_itype::BI || type == spu_itype::BISL || g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
if (type == spu_itype::BI || type == spu_itype::BISL)
|
||||
{
|
||||
if (type == spu_itype::BI || g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
{
|
||||
next_block();
|
||||
break;
|
||||
if (m_targets[pos].empty())
|
||||
{
|
||||
m_targets[pos].push_back(-1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
add_block(pos + 4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_targets[pos].push_back(pos + 4);
|
||||
add_block(pos + 4);
|
||||
}
|
||||
|
||||
next_block();
|
||||
break;
|
||||
}
|
||||
|
||||
@ -525,6 +551,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
{
|
||||
const u32 target = spu_branch_target(type == spu_itype::BRASL ? 0 : pos, op.i16);
|
||||
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = pos + 4;
|
||||
|
||||
@ -534,11 +561,11 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
break;
|
||||
}
|
||||
|
||||
if (target < lsa || g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
||||
m_targets[pos].push_back(target);
|
||||
|
||||
if (target >= lsa && g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
{
|
||||
// Stop on direct calls
|
||||
next_block();
|
||||
break;
|
||||
add_block(pos + 4);
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||
@ -546,6 +573,7 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
add_block(target);
|
||||
}
|
||||
|
||||
next_block();
|
||||
break;
|
||||
}
|
||||
|
||||
@ -564,15 +592,16 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
break;
|
||||
}
|
||||
|
||||
m_targets[pos].push_back(target);
|
||||
add_block(target);
|
||||
|
||||
if (type == spu_itype::BR || type == spu_itype::BRA)
|
||||
if (type != spu_itype::BR && type != spu_itype::BRA)
|
||||
{
|
||||
// Stop on direct branches
|
||||
next_block();
|
||||
break;
|
||||
m_targets[pos].push_back(pos + 4);
|
||||
add_block(pos + 4);
|
||||
}
|
||||
|
||||
next_block();
|
||||
break;
|
||||
}
|
||||
|
||||
@ -601,61 +630,131 @@ std::vector<u32> spu_recompiler_base::block(const be_t<u32>* ls, u32 lsa)
|
||||
|
||||
case spu_itype::IL:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = op.si16;
|
||||
break;
|
||||
}
|
||||
case spu_itype::ILA:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = op.i18;
|
||||
break;
|
||||
}
|
||||
case spu_itype::ILH:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = op.i16 << 16 | op.i16;
|
||||
break;
|
||||
}
|
||||
case spu_itype::ILHU:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = op.i16 << 16;
|
||||
break;
|
||||
}
|
||||
case spu_itype::IOHL:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
values[op.rt] = values[op.rt] | op.i16;
|
||||
break;
|
||||
}
|
||||
case spu_itype::ORI:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] | op.si10;
|
||||
break;
|
||||
}
|
||||
case spu_itype::OR:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vflags[op.rb] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] | values[op.rb];
|
||||
break;
|
||||
}
|
||||
case spu_itype::ANDI:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] & op.si10;
|
||||
break;
|
||||
}
|
||||
case spu_itype::AND:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vflags[op.rb] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] & values[op.rb];
|
||||
break;
|
||||
}
|
||||
case spu_itype::AI:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] + op.si10;
|
||||
break;
|
||||
}
|
||||
case spu_itype::A:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vflags[op.rb] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] + values[op.rb];
|
||||
break;
|
||||
}
|
||||
case spu_itype::SFI:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vf::is_const;
|
||||
values[op.rt] = op.si10 - values[op.ra];
|
||||
break;
|
||||
}
|
||||
case spu_itype::SF:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
vflags[op.rt] = vflags[op.ra] & vflags[op.rb] & vf::is_const;
|
||||
values[op.rt] = values[op.rb] - values[op.ra];
|
||||
break;
|
||||
}
|
||||
case spu_itype::ROTMI:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
|
||||
if (-op.i7 & 0x20)
|
||||
{
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
vflags[op.rt] = vflags[op.ra] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] >> (-op.i7 & 0x1f);
|
||||
break;
|
||||
}
|
||||
case spu_itype::SHLI:
|
||||
{
|
||||
m_regmod[pos / 4] = op.rt;
|
||||
|
||||
if (op.i7 & 0x20)
|
||||
{
|
||||
vflags[op.rt] = +vf::is_const;
|
||||
values[op.rt] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
vflags[op.rt] = vflags[op.ra] & vf::is_const;
|
||||
values[op.rt] = values[op.ra] << (op.i7 & 0x1f);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
// Unconst
|
||||
vflags[type & spu_itype::_quadrop ? +op.rt4 : +op.rt] = {};
|
||||
const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt;
|
||||
m_regmod[pos / 4] = op_rt;
|
||||
vflags[op_rt] = {};
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -783,7 +882,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||
llvm::Value* m_lsptr;
|
||||
|
||||
llvm::BasicBlock* m_stop;
|
||||
llvm::GlobalVariable* m_jt;
|
||||
|
||||
std::array<std::pair<llvm::Value*, llvm::Value*>, 128> m_gpr;
|
||||
std::array<llvm::Instruction*, 128> m_flush_gpr;
|
||||
@ -1047,27 +1145,15 @@ public:
|
||||
|
||||
m_stop = BasicBlock::Create(m_context, "", m_function);
|
||||
|
||||
const auto jtt = ArrayType::get(GetType<u8*>(), m_size / 4);
|
||||
std::vector<llvm::Constant*> jt;
|
||||
jt.reserve(m_size / 4);
|
||||
|
||||
// Create instruction blocks
|
||||
for (u32 i = 1, pos = start; i < func.size(); i++, pos += 4)
|
||||
{
|
||||
if (func[i] && m_block_info[pos / 4])
|
||||
{
|
||||
const auto b = BasicBlock::Create(m_context, "", m_function);
|
||||
jt.push_back(llvm::BlockAddress::get(b));
|
||||
m_instr_map.emplace(pos, b);
|
||||
}
|
||||
else
|
||||
{
|
||||
jt.push_back(llvm::BlockAddress::get(m_stop));
|
||||
m_instr_map.emplace(pos, BasicBlock::Create(m_context, "", m_function));
|
||||
}
|
||||
}
|
||||
|
||||
m_jt = new GlobalVariable(*module, jtt, true, GlobalValue::PrivateLinkage, llvm::ConstantArray::get(jtt, jt), "jt");
|
||||
|
||||
update_pc();
|
||||
|
||||
const auto label_test = BasicBlock::Create(m_context, "", m_function);
|
||||
@ -2764,24 +2850,43 @@ public:
|
||||
addr.value = call(&exec_check_interrupts, m_thread, addr.value);
|
||||
}
|
||||
|
||||
if (llvm::isa<llvm::ConstantInt>(addr.value))
|
||||
if (const auto _int = llvm::dyn_cast<llvm::ConstantInt>(addr.value))
|
||||
{
|
||||
return branch_fixed(llvm::cast<llvm::ConstantInt>(addr.value)->getZExtValue());
|
||||
LOG_WARNING(SPU, "[0x%x] Fixed branch to 0x%x", m_pos, _int->getZExtValue());
|
||||
return branch_fixed(_int->getZExtValue());
|
||||
}
|
||||
|
||||
m_ir->CreateStore(addr.value, spu_ptr<u32>(&SPUThread::pc));
|
||||
|
||||
const auto tfound = m_targets.find(m_pos);
|
||||
|
||||
if (tfound != m_targets.end() && tfound->second.size() >= 3)
|
||||
{
|
||||
const u32 start = m_instr_map.begin()->first;
|
||||
const auto local = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
|
||||
const std::set<u32> targets(tfound->second.begin(), tfound->second.end());
|
||||
|
||||
const auto exter = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto off = m_ir->CreateSub(addr.value, m_ir->getInt32(start));
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpULT(off, m_ir->getInt32(m_size)), local, exter);
|
||||
m_ir->SetInsertPoint(local);
|
||||
const auto table = m_ir->CreateIndirectBr(m_ir->CreateLoad(m_ir->CreateGEP(m_jt, {(llvm::Value*)m_ir->getInt32(0), m_ir->CreateLShr(off, 2)})), m_instr_map.size() + 1);
|
||||
for (const auto& pair : m_instr_map)
|
||||
table->addDestination(pair.second);
|
||||
table->addDestination(m_stop);
|
||||
|
||||
const auto sw = m_ir->CreateSwitch(m_ir->CreateLShr(addr.value, 2, "", true), exter, m_size / 4);
|
||||
|
||||
for (u32 pos = start; pos < start + m_size; pos += 4)
|
||||
{
|
||||
const auto found = m_instr_map.find(pos);
|
||||
|
||||
if (found != m_instr_map.end() && targets.count(pos))
|
||||
{
|
||||
sw->addCase(m_ir->getInt32(pos / 4), found->second);
|
||||
}
|
||||
else
|
||||
{
|
||||
sw->addCase(m_ir->getInt32(pos / 4), m_stop);
|
||||
}
|
||||
}
|
||||
|
||||
m_ir->SetInsertPoint(exter);
|
||||
}
|
||||
|
||||
const auto disp = m_ir->CreateAdd(m_thread, m_ir->getInt64(::offset32(&SPUThread::jit_dispatcher)));
|
||||
const auto type = llvm::FunctionType::get(get_type<void>(), {get_type<u64>(), get_type<u64>(), get_type<u32>()}, false)->getPointerTo()->getPointerTo();
|
||||
tail(m_ir->CreateLoad(m_ir->CreateIntToPtr(m_ir->CreateAdd(disp, zext<u64>(addr << 1).value), type)));
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <vector>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
// Helper class
|
||||
class spu_cache
|
||||
@ -35,8 +36,15 @@ protected:
|
||||
u32 m_pos;
|
||||
u32 m_size;
|
||||
|
||||
// Bit indicating start of the block
|
||||
std::bitset<0x10000> m_block_info;
|
||||
|
||||
// GPR modified by the instruction (-1 = not set)
|
||||
std::array<u8, 0x10000> m_regmod;
|
||||
|
||||
// List of possible targets for the instruction ({} = next instruction, {-1} = no targets)
|
||||
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
|
||||
|
||||
std::shared_ptr<spu_cache> m_cache;
|
||||
|
||||
public:
|
||||
|
Loading…
Reference in New Issue
Block a user