mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-23 11:13:19 +01:00
vp: Improve vertex program analyser
- Adds dead code elimination - Fix absolute branch target addresses to take base address into account - Patch branch targets relative to base address to improve hash matching - Bumps shader cache version - Enables shader logging option to write out vertex program binary, helpful when debugging problems.
This commit is contained in:
parent
bd915bfebd
commit
2ca935a26b
@ -1,5 +1,8 @@
|
||||
#include "stdafx.h"
|
||||
#include "ProgramStateCache.h"
|
||||
#include "Emu/System.h"
|
||||
|
||||
#include <stack>
|
||||
|
||||
using namespace program_hash_util;
|
||||
|
||||
@ -12,54 +15,222 @@ size_t vertex_program_utils::get_vertex_program_ucode_hash(const RSXVertexProgra
|
||||
bool end = false;
|
||||
for (unsigned i = 0; i < program.data.size() / 4; i++)
|
||||
{
|
||||
const qword inst = instbuffer[instIndex];
|
||||
hash ^= inst.dword[0];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
hash ^= inst.dword[1];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
if (program.instruction_mask[i])
|
||||
{
|
||||
const qword inst = instbuffer[instIndex];
|
||||
hash ^= inst.dword[0];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
hash ^= inst.dword[1];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
}
|
||||
|
||||
instIndex++;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const std::vector<u32>& data)
|
||||
vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const u32* data, u32 entry, RSXVertexProgram& dst_prog)
|
||||
{
|
||||
u32 ucode_size = 0;
|
||||
u32 current_instrution = 0;
|
||||
vertex_program_utils::vertex_program_metadata result;
|
||||
u32 last_instruction_address = 0;
|
||||
u32 first_instruction_address = entry;
|
||||
|
||||
std::stack<u32> call_stack;
|
||||
std::pair<u32, u32> instruction_range = { UINT32_MAX, 0 };
|
||||
std::bitset<512> instructions_to_patch;
|
||||
bool has_branch_instruction = false;
|
||||
|
||||
D3 d3;
|
||||
D2 d2;
|
||||
D1 d1;
|
||||
D0 d0;
|
||||
|
||||
for (; ucode_size < data.size(); ucode_size += 4)
|
||||
std::function<void(u32, bool)> walk_function = [&](u32 start, bool fast_exit)
|
||||
{
|
||||
d1.HEX = data[ucode_size + 1];
|
||||
d3.HEX = data[ucode_size + 3];
|
||||
u32 current_instrution = start;
|
||||
std::set<u32> conditional_targets;
|
||||
|
||||
switch (d1.sca_opcode)
|
||||
while (true)
|
||||
{
|
||||
case RSX_SCA_OPCODE_BRI:
|
||||
case RSX_SCA_OPCODE_BRB:
|
||||
case RSX_SCA_OPCODE_CAL:
|
||||
case RSX_SCA_OPCODE_CLI:
|
||||
case RSX_SCA_OPCODE_CLB:
|
||||
{
|
||||
d2.HEX = data[ucode_size + 2];
|
||||
verify(HERE), current_instrution < 512;
|
||||
|
||||
u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl) * 4;
|
||||
last_instruction_address = std::max(last_instruction_address, jump_address);
|
||||
break;
|
||||
}
|
||||
if (result.instruction_mask[current_instrution])
|
||||
{
|
||||
if (!fast_exit)
|
||||
{
|
||||
// This can be harmless if a dangling RET was encountered before
|
||||
LOG_ERROR(RSX, "vp_analyser: Possible infinite loop detected");
|
||||
current_instrution++;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Block walk, looking for earliest exit
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const qword* instruction = (const qword*)&data[current_instrution * 4];
|
||||
d1.HEX = instruction->word[1];
|
||||
d3.HEX = instruction->word[3];
|
||||
|
||||
// Touch current instruction
|
||||
result.instruction_mask[current_instrution] = 1;
|
||||
instruction_range.first = std::min(current_instrution, instruction_range.first);
|
||||
instruction_range.second = std::max(current_instrution, instruction_range.second);
|
||||
|
||||
bool static_jump = false;
|
||||
bool function_call = true;
|
||||
|
||||
switch (d1.sca_opcode)
|
||||
{
|
||||
case RSX_SCA_OPCODE_BRI:
|
||||
{
|
||||
d0.HEX = instruction->word[0];
|
||||
static_jump = (d0.cond == 0x7);
|
||||
// Fall through
|
||||
}
|
||||
case RSX_SCA_OPCODE_BRB:
|
||||
{
|
||||
function_call = false;
|
||||
// Fall through
|
||||
}
|
||||
case RSX_SCA_OPCODE_CAL:
|
||||
case RSX_SCA_OPCODE_CLI:
|
||||
case RSX_SCA_OPCODE_CLB:
|
||||
{
|
||||
// Need to patch the jump address to be consistent wherever the program is located
|
||||
instructions_to_patch[current_instrution] = true;
|
||||
has_branch_instruction = true;
|
||||
|
||||
d2.HEX = instruction->word[2];
|
||||
const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl);
|
||||
|
||||
if (function_call)
|
||||
{
|
||||
call_stack.push(current_instrution + 1);
|
||||
current_instrution = jump_address;
|
||||
continue;
|
||||
}
|
||||
else if (static_jump)
|
||||
{
|
||||
// NOTE: This will skip potential jump target blocks between current->target
|
||||
current_instrution = jump_address;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Set possible end address and proceed as usual
|
||||
conditional_targets.emplace(jump_address);
|
||||
instruction_range.second = std::max(jump_address, instruction_range.second);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RSX_SCA_OPCODE_RET:
|
||||
{
|
||||
if (call_stack.empty())
|
||||
{
|
||||
LOG_ERROR(RSX, "vp_analyser: RET found outside subroutine call");
|
||||
}
|
||||
else
|
||||
{
|
||||
current_instrution = call_stack.top();
|
||||
call_stack.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (d3.end && (fast_exit || current_instrution >= instruction_range.second) ||
|
||||
(current_instrution + 1) == 512)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
current_instrution++;
|
||||
}
|
||||
|
||||
if (d3.end && (ucode_size >= last_instruction_address))
|
||||
for (const u32 target : conditional_targets)
|
||||
{
|
||||
//Jumping over an end label is legal (verified)
|
||||
break;
|
||||
if (!result.instruction_mask[target])
|
||||
{
|
||||
walk_function(target, true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (g_cfg.video.log_programs)
|
||||
{
|
||||
fs::file dump(fs::get_config_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
|
||||
dump.write(&entry, 4);
|
||||
dump.write(data, 512 * 16);
|
||||
dump.close();
|
||||
}
|
||||
|
||||
walk_function(entry, false);
|
||||
|
||||
const u32 instruction_count = (instruction_range.second - instruction_range.first + 1);
|
||||
result.ucode_length = instruction_count * 16;
|
||||
|
||||
dst_prog.base_address = instruction_range.first;
|
||||
dst_prog.entry = entry;
|
||||
dst_prog.data.resize(instruction_count * 4);
|
||||
dst_prog.instruction_mask = (result.instruction_mask >> instruction_range.first);
|
||||
|
||||
if (!has_branch_instruction)
|
||||
{
|
||||
verify(HERE), instruction_range.first == entry;
|
||||
std::memcpy(dst_prog.data.data(), data + (instruction_range.first * 4), result.ucode_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 i = instruction_range.first, count = 0; i <= instruction_range.second; ++i, ++count)
|
||||
{
|
||||
const qword* instruction = (const qword*)&data[i * 4];
|
||||
qword* dst = (qword*)&dst_prog.data[count * 4];
|
||||
|
||||
if (result.instruction_mask[i])
|
||||
{
|
||||
dst->dword[0] = instruction->dword[0];
|
||||
dst->dword[1] = instruction->dword[1];
|
||||
|
||||
if (instructions_to_patch[i])
|
||||
{
|
||||
d2.HEX = dst->word[2];
|
||||
d3.HEX = dst->word[3];
|
||||
|
||||
u32 address = ((d2.iaddrh << 3) | d3.iaddrl);
|
||||
address -= instruction_range.first;
|
||||
|
||||
d2.iaddrh = (address >> 3);
|
||||
d3.iaddrl = (address & 0x7);
|
||||
dst->word[2] = d2.HEX;
|
||||
dst->word[3] = d3.HEX;
|
||||
|
||||
dst_prog.jump_table.emplace(address);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst->dword[0] = 0ull;
|
||||
dst->dword[1] = 0ull;
|
||||
}
|
||||
}
|
||||
|
||||
// Verification
|
||||
for (const u32 target : dst_prog.jump_table)
|
||||
{
|
||||
if (!result.instruction_mask[target])
|
||||
{
|
||||
LOG_ERROR(RSX, "vp_analyser: Failed, branch target 0x%x was not resolved", target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return{ ucode_size + 4 };
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const
|
||||
@ -75,6 +246,8 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
|
||||
return false;
|
||||
if (binary1.data.size() != binary2.data.size())
|
||||
return false;
|
||||
if (binary1.jump_table != binary2.jump_table)
|
||||
return false;
|
||||
if (!binary1.skip_vertex_input_check && !binary2.skip_vertex_input_check && binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs)
|
||||
return false;
|
||||
|
||||
@ -83,10 +256,22 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
|
||||
size_t instIndex = 0;
|
||||
for (unsigned i = 0; i < binary1.data.size() / 4; i++)
|
||||
{
|
||||
const qword& inst1 = instBuffer1[instIndex];
|
||||
const qword& inst2 = instBuffer2[instIndex];
|
||||
if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1])
|
||||
const auto active = binary1.instruction_mask[instIndex];
|
||||
if (active != binary2.instruction_mask[instIndex])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (active)
|
||||
{
|
||||
const qword& inst1 = instBuffer1[instIndex];
|
||||
const qword& inst2 = instBuffer2[instIndex];
|
||||
if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
instIndex++;
|
||||
}
|
||||
|
||||
|
@ -29,12 +29,13 @@ namespace program_hash_util
|
||||
{
|
||||
struct vertex_program_metadata
|
||||
{
|
||||
u32 ucode_size;
|
||||
std::bitset<512> instruction_mask;
|
||||
u32 ucode_length;
|
||||
};
|
||||
|
||||
static size_t get_vertex_program_ucode_hash(const RSXVertexProgram &program);
|
||||
|
||||
static vertex_program_metadata analyse_vertex_program(const std::vector<u32>& data);
|
||||
static vertex_program_metadata analyse_vertex_program(const u32* data, u32 entry, RSXVertexProgram& dst_prog);
|
||||
};
|
||||
|
||||
struct vertex_program_storage_hash
|
||||
|
@ -409,106 +409,54 @@ std::string VertexProgramDecompiler::BuildCode()
|
||||
}
|
||||
|
||||
VertexProgramDecompiler::VertexProgramDecompiler(const RSXVertexProgram& prog) :
|
||||
m_data(prog.data)
|
||||
m_prog(prog)
|
||||
{
|
||||
}
|
||||
|
||||
std::string VertexProgramDecompiler::Decompile()
|
||||
{
|
||||
for (unsigned i = 0; i < PF_PARAM_COUNT; i++)
|
||||
m_parr.params[i].clear();
|
||||
|
||||
m_instr_count = m_data.size() / 4;
|
||||
|
||||
for (int i = 0; i < m_max_instr_count; ++i)
|
||||
{
|
||||
m_instructions[i].reset();
|
||||
}
|
||||
const auto& data = m_prog.data;
|
||||
m_instr_count = data.size() / 4;
|
||||
|
||||
bool is_has_BRA = false;
|
||||
bool program_end = false;
|
||||
u32 i = 1;
|
||||
u32 last_label_addr = 0;
|
||||
|
||||
while (i < m_data.size())
|
||||
for (unsigned i = 0; i < PF_PARAM_COUNT; i++)
|
||||
{
|
||||
if (is_has_BRA)
|
||||
{
|
||||
d3.HEX = m_data[i];
|
||||
i += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
d1.HEX = m_data[i++];
|
||||
|
||||
switch (d1.sca_opcode)
|
||||
{
|
||||
case RSX_SCA_OPCODE_BRA:
|
||||
{
|
||||
LOG_ERROR(RSX, "Unimplemented VP opcode BRA");
|
||||
is_has_BRA = true;
|
||||
m_jump_lvls.clear();
|
||||
d3.HEX = m_data[++i];
|
||||
i += 4;
|
||||
break;
|
||||
}
|
||||
case RSX_SCA_OPCODE_BRB:
|
||||
case RSX_SCA_OPCODE_BRI:
|
||||
case RSX_SCA_OPCODE_CAL:
|
||||
case RSX_SCA_OPCODE_CLI:
|
||||
case RSX_SCA_OPCODE_CLB:
|
||||
{
|
||||
d2.HEX = m_data[i++];
|
||||
d3.HEX = m_data[i];
|
||||
i += 2;
|
||||
|
||||
const u32 label_addr = GetAddr();
|
||||
last_label_addr = std::max(last_label_addr, label_addr);
|
||||
m_jump_lvls.emplace(label_addr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
d3.HEX = m_data[++i];
|
||||
i += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
m_parr.params[i].clear();
|
||||
}
|
||||
|
||||
uint jump_position = 0;
|
||||
if (is_has_BRA || !m_jump_lvls.empty())
|
||||
for (int i = 0; i < m_max_instr_count; ++i)
|
||||
{
|
||||
m_cur_instr = &m_instructions[0];
|
||||
AddCode("int jump_position = 0;");
|
||||
AddCode("while (true)");
|
||||
AddCode("{");
|
||||
m_cur_instr->open_scopes++;
|
||||
m_instructions[i].reset();
|
||||
}
|
||||
|
||||
AddCode(fmt::format("if (jump_position <= %u)", jump_position++));
|
||||
AddCode("{");
|
||||
m_cur_instr->open_scopes++;
|
||||
if (m_prog.jump_table.size())
|
||||
{
|
||||
last_label_addr = *m_prog.jump_table.rbegin();
|
||||
}
|
||||
|
||||
auto find_jump_lvl = [this](u32 address)
|
||||
{
|
||||
u32 jump = 1;
|
||||
|
||||
for (auto pos : m_jump_lvls)
|
||||
for (auto pos : m_prog.jump_table)
|
||||
{
|
||||
if (address == pos)
|
||||
break;
|
||||
return jump;
|
||||
|
||||
++jump;
|
||||
}
|
||||
|
||||
return jump;
|
||||
return UINT32_MAX;
|
||||
};
|
||||
|
||||
auto do_function_call = [this, &i](const std::string& condition)
|
||||
{
|
||||
//call function
|
||||
// Call function
|
||||
// NOTE: Addresses are assumed to have been patched
|
||||
m_call_stack.push(i+1);
|
||||
AddCode(condition);
|
||||
AddCode("{");
|
||||
@ -552,17 +500,41 @@ std::string VertexProgramDecompiler::Decompile()
|
||||
}
|
||||
};
|
||||
|
||||
for (i = 0; i < m_instr_count; ++i)
|
||||
if (is_has_BRA || !m_prog.jump_table.empty())
|
||||
{
|
||||
if (m_call_stack.empty())
|
||||
m_cur_instr = &m_instructions[0];
|
||||
|
||||
u32 jump_position = 0;
|
||||
if (m_prog.entry != m_prog.base_address)
|
||||
{
|
||||
m_cur_instr = &m_instructions[i];
|
||||
jump_position = find_jump_lvl(m_prog.entry - m_prog.base_address);
|
||||
verify(HERE), jump_position != UINT32_MAX;
|
||||
}
|
||||
|
||||
d0.HEX = m_data[i * 4 + 0];
|
||||
d1.HEX = m_data[i * 4 + 1];
|
||||
d2.HEX = m_data[i * 4 + 2];
|
||||
d3.HEX = m_data[i * 4 + 3];
|
||||
AddCode(fmt::format("int jump_position = %u;", jump_position));
|
||||
AddCode("while (true)");
|
||||
AddCode("{");
|
||||
m_cur_instr->open_scopes++;
|
||||
|
||||
AddCode("if (jump_position <= 0)");
|
||||
AddCode("{");
|
||||
m_cur_instr->open_scopes++;
|
||||
}
|
||||
|
||||
for (i = 0; i < m_instr_count; ++i)
|
||||
{
|
||||
if (!m_prog.instruction_mask[i])
|
||||
{
|
||||
// Dead code, skip
|
||||
continue;
|
||||
}
|
||||
|
||||
m_cur_instr = &m_instructions[i];
|
||||
|
||||
d0.HEX = data[i * 4 + 0];
|
||||
d1.HEX = data[i * 4 + 1];
|
||||
d2.HEX = data[i * 4 + 2];
|
||||
d3.HEX = data[i * 4 + 3];
|
||||
|
||||
src[0].src0l = d2.src0l;
|
||||
src[0].src0h = d1.src0h;
|
||||
@ -570,27 +542,29 @@ std::string VertexProgramDecompiler::Decompile()
|
||||
src[2].src2l = d3.src2l;
|
||||
src[2].src2h = d2.src2h;
|
||||
|
||||
if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type)
|
||||
{
|
||||
AddCode("//Src check failed. Aborting");
|
||||
program_end = true;
|
||||
}
|
||||
|
||||
if (m_call_stack.empty())
|
||||
if (m_call_stack.empty() && i)
|
||||
{
|
||||
//TODO: Subroutines can also have arbitrary jumps!
|
||||
if (i && (is_has_BRA || std::find(m_jump_lvls.begin(), m_jump_lvls.end(), i) != m_jump_lvls.end()))
|
||||
u32 jump_position = find_jump_lvl(i);
|
||||
if (is_has_BRA || jump_position != UINT32_MAX)
|
||||
{
|
||||
m_cur_instr->close_scopes++;
|
||||
AddCode("}");
|
||||
AddCode("");
|
||||
|
||||
AddCode(fmt::format("if (jump_position <= %u)", jump_position++));
|
||||
AddCode(fmt::format("if (jump_position <= %u)", jump_position));
|
||||
AddCode("{");
|
||||
m_cur_instr->open_scopes++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type)
|
||||
{
|
||||
AddCode("//Src check failed. Aborting");
|
||||
program_end = true;
|
||||
d1.vec_opcode = d1.sca_opcode = 0;
|
||||
}
|
||||
|
||||
switch (d1.vec_opcode)
|
||||
{
|
||||
case RSX_VEC_OPCODE_NOP: break;
|
||||
@ -754,7 +728,7 @@ std::string VertexProgramDecompiler::Decompile()
|
||||
if ((i + 1) < m_instr_count)
|
||||
{
|
||||
// In rare cases, this might be harmless (large coalesced program blocks controlled via branches aka ubershaders)
|
||||
LOG_ERROR(RSX, "Vertex program aborted prematurely. Expect glitches");
|
||||
LOG_ERROR(RSX, "Vertex program block aborts prematurely. Expect glitches");
|
||||
}
|
||||
|
||||
break;
|
||||
@ -762,7 +736,7 @@ std::string VertexProgramDecompiler::Decompile()
|
||||
}
|
||||
}
|
||||
|
||||
if (is_has_BRA || !m_jump_lvls.empty())
|
||||
if (is_has_BRA || !m_prog.jump_table.empty())
|
||||
{
|
||||
m_cur_instr = &m_instructions[m_instr_count - 1];
|
||||
m_cur_instr->close_scopes++;
|
||||
@ -774,8 +748,6 @@ std::string VertexProgramDecompiler::Decompile()
|
||||
|
||||
std::string result = BuildCode();
|
||||
|
||||
m_jump_lvls.clear();
|
||||
m_body.clear();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -53,11 +53,10 @@ struct VertexProgramDecompiler
|
||||
Instruction* m_cur_instr;
|
||||
size_t m_instr_count;
|
||||
|
||||
std::set<int> m_jump_lvls;
|
||||
std::vector<std::string> m_body;
|
||||
std::stack<u32> m_call_stack;
|
||||
|
||||
const std::vector<u32>& m_data;
|
||||
const RSXVertexProgram& m_prog;
|
||||
ParamArray m_parr;
|
||||
|
||||
std::string NotZeroPositive(const std::string& code);
|
||||
|
@ -53,7 +53,7 @@ void D3D12GSRender::load_program()
|
||||
return std::make_tuple(true, native_pitch);
|
||||
};
|
||||
|
||||
get_current_vertex_program();
|
||||
get_current_vertex_program(false);
|
||||
get_current_fragment_program_legacy(rtt_lookup_func);
|
||||
|
||||
if (!current_fragment_program.valid)
|
||||
|
@ -24,7 +24,7 @@ namespace
|
||||
|
||||
GLGSRender::GLGSRender() : GSRender()
|
||||
{
|
||||
m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.3"));
|
||||
m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.5"));
|
||||
|
||||
if (g_cfg.video.disable_vertex_cache)
|
||||
m_vertex_cache.reset(new gl::null_vertex_cache());
|
||||
|
@ -1393,7 +1393,7 @@ namespace rsx
|
||||
return rsx::get_address(offset_zeta, m_context_dma_z);
|
||||
}
|
||||
|
||||
void thread::get_current_vertex_program()
|
||||
void thread::get_current_vertex_program(bool skip_vertex_inputs)
|
||||
{
|
||||
if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty))
|
||||
return;
|
||||
@ -1401,57 +1401,60 @@ namespace rsx
|
||||
m_graphics_state &= ~(rsx::pipeline_state::vertex_program_dirty);
|
||||
const u32 transform_program_start = rsx::method_registers.transform_program_start();
|
||||
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
|
||||
current_vertex_program.skip_vertex_input_check = false;
|
||||
current_vertex_program.skip_vertex_input_check = skip_vertex_inputs;
|
||||
|
||||
current_vertex_program.rsx_vertex_inputs.resize(0);
|
||||
current_vertex_program.data.resize((512 - transform_program_start) * 4);
|
||||
current_vertex_program.data.reserve(512 * 4);
|
||||
current_vertex_program.jump_table.clear();
|
||||
|
||||
u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4);
|
||||
u32* ucode_dst = current_vertex_program.data.data();
|
||||
current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program
|
||||
(
|
||||
method_registers.transform_program.data(), // Input raw block
|
||||
transform_program_start, // Address of entry point
|
||||
current_vertex_program // [out] Program object
|
||||
);
|
||||
|
||||
memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32));
|
||||
|
||||
current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
|
||||
current_vertex_program.data.resize(current_vp_metadata.ucode_size);
|
||||
|
||||
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
|
||||
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
if (!skip_vertex_inputs)
|
||||
{
|
||||
bool enabled = !!(input_mask & (1 << index));
|
||||
if (!enabled)
|
||||
continue;
|
||||
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
|
||||
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
|
||||
if (rsx::method_registers.vertex_arrays_info[index].size() > 0)
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
{
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{index,
|
||||
rsx::method_registers.vertex_arrays_info[index].size(),
|
||||
rsx::method_registers.vertex_arrays_info[index].frequency(),
|
||||
!!((modulo_mask >> index) & 0x1),
|
||||
true,
|
||||
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0});
|
||||
}
|
||||
else if (vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{ index,
|
||||
rsx::method_registers.register_vertex_info[index].size,
|
||||
1,
|
||||
false,
|
||||
true,
|
||||
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
|
||||
}
|
||||
else if (rsx::method_registers.register_vertex_info[index].size > 0)
|
||||
{
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{index,
|
||||
rsx::method_registers.register_vertex_info[index].size,
|
||||
rsx::method_registers.register_vertex_info[index].frequency,
|
||||
!!((modulo_mask >> index) & 0x1),
|
||||
false,
|
||||
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0});
|
||||
bool enabled = !!(input_mask & (1 << index));
|
||||
if (!enabled)
|
||||
continue;
|
||||
|
||||
if (rsx::method_registers.vertex_arrays_info[index].size() > 0)
|
||||
{
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{ index,
|
||||
rsx::method_registers.vertex_arrays_info[index].size(),
|
||||
rsx::method_registers.vertex_arrays_info[index].frequency(),
|
||||
!!((modulo_mask >> index) & 0x1),
|
||||
true,
|
||||
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
|
||||
}
|
||||
else if (vertex_push_buffers[index].vertex_count > 1)
|
||||
{
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{ index,
|
||||
rsx::method_registers.register_vertex_info[index].size,
|
||||
1,
|
||||
false,
|
||||
true,
|
||||
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
|
||||
}
|
||||
else if (rsx::method_registers.register_vertex_info[index].size > 0)
|
||||
{
|
||||
current_vertex_program.rsx_vertex_inputs.push_back(
|
||||
{ index,
|
||||
rsx::method_registers.register_vertex_info[index].size,
|
||||
rsx::method_registers.register_vertex_info[index].frequency,
|
||||
!!((modulo_mask >> index) & 0x1),
|
||||
false,
|
||||
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -377,7 +377,7 @@ namespace rsx
|
||||
program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {};
|
||||
program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {};
|
||||
|
||||
void get_current_vertex_program();
|
||||
void get_current_vertex_program(bool skip_vertex_inputs = true);
|
||||
|
||||
/**
|
||||
* Gets current fragment program and associated fragment state
|
||||
|
@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <set>
|
||||
|
||||
enum vp_reg_type
|
||||
{
|
||||
RSX_VP_REGISTER_TYPE_TEMP = 1,
|
||||
@ -229,4 +232,9 @@ struct RSXVertexProgram
|
||||
std::vector<rsx_vertex_input> rsx_vertex_inputs;
|
||||
u32 output_mask;
|
||||
bool skip_vertex_input_check;
|
||||
};
|
||||
|
||||
u32 base_address;
|
||||
u32 entry;
|
||||
std::bitset<512> instruction_mask;
|
||||
std::set<u32> jump_table;
|
||||
};
|
@ -626,7 +626,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
else
|
||||
m_vertex_cache.reset(new vk::weak_vertex_cache());
|
||||
|
||||
m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.3"));
|
||||
m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.5"));
|
||||
|
||||
open_command_buffer();
|
||||
|
||||
|
@ -378,6 +378,11 @@ namespace rsx
|
||||
u64 pipeline_storage_hash;
|
||||
|
||||
u32 vp_ctrl;
|
||||
u64 vp_instruction_mask[8];
|
||||
|
||||
u32 vp_base_address;
|
||||
u32 vp_entry;
|
||||
u16 vp_jump_table[32];
|
||||
|
||||
u32 fp_ctrl;
|
||||
u32 fp_texture_dimensions;
|
||||
@ -653,6 +658,12 @@ namespace rsx
|
||||
return;
|
||||
}
|
||||
|
||||
if (vp.jump_table.size() > 32)
|
||||
{
|
||||
LOG_ERROR(RSX, "shaders_cache: vertex program has more than 32 jump addresses. Entry not saved to cache");
|
||||
return;
|
||||
}
|
||||
|
||||
pipeline_data data = pack(pipeline, vp, fp);
|
||||
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
|
||||
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
|
||||
@ -723,6 +734,22 @@ namespace rsx
|
||||
pipeline_storage_type pipeline = data.pipeline_properties;
|
||||
|
||||
vp.output_mask = data.vp_ctrl;
|
||||
vp.base_address = data.vp_base_address;
|
||||
vp.entry = data.vp_entry;
|
||||
|
||||
pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask);
|
||||
|
||||
for (u8 index = 0; index < 32; ++index)
|
||||
{
|
||||
const auto address = data.vp_jump_table[index];
|
||||
if (address == UINT16_MAX)
|
||||
{
|
||||
// End of list marker
|
||||
break;
|
||||
}
|
||||
|
||||
vp.jump_table.emplace(address);
|
||||
}
|
||||
|
||||
fp.ctrl = data.fp_ctrl;
|
||||
fp.texture_dimensions = data.fp_texture_dimensions;
|
||||
@ -753,6 +780,28 @@ namespace rsx
|
||||
data_block.pipeline_storage_hash = m_storage.get_hash(pipeline);
|
||||
|
||||
data_block.vp_ctrl = vp.output_mask;
|
||||
data_block.vp_base_address = vp.base_address;
|
||||
data_block.vp_entry = vp.entry;
|
||||
|
||||
unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask);
|
||||
|
||||
u8 index = 0;
|
||||
while (index < 32)
|
||||
{
|
||||
if (!index && !vp.jump_table.empty())
|
||||
{
|
||||
for (auto &address : vp.jump_table)
|
||||
{
|
||||
data_block.vp_jump_table[index++] = (u16)address;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// End of list marker
|
||||
data_block.vp_jump_table[index] = UINT16_MAX;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
data_block.fp_ctrl = fp.ctrl;
|
||||
data_block.fp_texture_dimensions = fp.texture_dimensions;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "gcm_enums.h"
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <bitset>
|
||||
|
||||
// TODO: replace the code below by #include <optional> when C++17 or newer will be used
|
||||
#include <optional.hpp>
|
||||
@ -726,4 +727,41 @@ namespace rsx
|
||||
{
|
||||
return g_current_renderer;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
void unpack_bitset(std::bitset<N>& block, u64* values)
|
||||
{
|
||||
constexpr int count = N / 64;
|
||||
for (int n = 0; n < count; ++n)
|
||||
{
|
||||
int i = (n << 6);
|
||||
values[n] = 0;
|
||||
|
||||
for (int bit = 0; bit < 64; ++bit, ++i)
|
||||
{
|
||||
if (block[i])
|
||||
{
|
||||
values[n] |= (1 << bit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int N>
|
||||
void pack_bitset(std::bitset<N>& block, u64* values)
|
||||
{
|
||||
constexpr int count = N / 64;
|
||||
for (int n = (count - 1); n >= 0; --n)
|
||||
{
|
||||
if ((n + 1) < count)
|
||||
{
|
||||
block <<= 64;
|
||||
}
|
||||
|
||||
if (values[n])
|
||||
{
|
||||
block |= values[n];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user