1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-23 11:13:19 +01:00

vp: Improve vertex program analyser

- Adds dead code elimination
- Fix absolute branch target addresses to take base address into account
- Patch branch targets relative to base address to improve hash matching
- Bumps shader cache version
- Enables shader logging option to write out vertex program binary,
  helpful when debugging problems.
This commit is contained in:
kd-11 2018-07-01 20:37:05 +03:00 committed by kd-11
parent bd915bfebd
commit 2ca935a26b
12 changed files with 427 additions and 172 deletions

View File

@ -1,5 +1,8 @@
#include "stdafx.h"
#include "ProgramStateCache.h"
#include "Emu/System.h"
#include <stack>
using namespace program_hash_util;
@ -12,54 +15,222 @@ size_t vertex_program_utils::get_vertex_program_ucode_hash(const RSXVertexProgra
bool end = false;
for (unsigned i = 0; i < program.data.size() / 4; i++)
{
const qword inst = instbuffer[instIndex];
hash ^= inst.dword[0];
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
hash ^= inst.dword[1];
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
if (program.instruction_mask[i])
{
const qword inst = instbuffer[instIndex];
hash ^= inst.dword[0];
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
hash ^= inst.dword[1];
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
}
instIndex++;
}
return hash;
}
vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const std::vector<u32>& data)
vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const u32* data, u32 entry, RSXVertexProgram& dst_prog)
{
u32 ucode_size = 0;
u32 current_instrution = 0;
vertex_program_utils::vertex_program_metadata result;
u32 last_instruction_address = 0;
u32 first_instruction_address = entry;
std::stack<u32> call_stack;
std::pair<u32, u32> instruction_range = { UINT32_MAX, 0 };
std::bitset<512> instructions_to_patch;
bool has_branch_instruction = false;
D3 d3;
D2 d2;
D1 d1;
D0 d0;
for (; ucode_size < data.size(); ucode_size += 4)
std::function<void(u32, bool)> walk_function = [&](u32 start, bool fast_exit)
{
d1.HEX = data[ucode_size + 1];
d3.HEX = data[ucode_size + 3];
u32 current_instrution = start;
std::set<u32> conditional_targets;
switch (d1.sca_opcode)
while (true)
{
case RSX_SCA_OPCODE_BRI:
case RSX_SCA_OPCODE_BRB:
case RSX_SCA_OPCODE_CAL:
case RSX_SCA_OPCODE_CLI:
case RSX_SCA_OPCODE_CLB:
{
d2.HEX = data[ucode_size + 2];
verify(HERE), current_instrution < 512;
u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl) * 4;
last_instruction_address = std::max(last_instruction_address, jump_address);
break;
}
if (result.instruction_mask[current_instrution])
{
if (!fast_exit)
{
// This can be harmless if a dangling RET was encountered before
LOG_ERROR(RSX, "vp_analyser: Possible infinite loop detected");
current_instrution++;
continue;
}
else
{
// Block walk, looking for earliest exit
break;
}
}
const qword* instruction = (const qword*)&data[current_instrution * 4];
d1.HEX = instruction->word[1];
d3.HEX = instruction->word[3];
// Touch current instruction
result.instruction_mask[current_instrution] = 1;
instruction_range.first = std::min(current_instrution, instruction_range.first);
instruction_range.second = std::max(current_instrution, instruction_range.second);
bool static_jump = false;
bool function_call = true;
switch (d1.sca_opcode)
{
case RSX_SCA_OPCODE_BRI:
{
d0.HEX = instruction->word[0];
static_jump = (d0.cond == 0x7);
// Fall through
}
case RSX_SCA_OPCODE_BRB:
{
function_call = false;
// Fall through
}
case RSX_SCA_OPCODE_CAL:
case RSX_SCA_OPCODE_CLI:
case RSX_SCA_OPCODE_CLB:
{
// Need to patch the jump address to be consistent wherever the program is located
instructions_to_patch[current_instrution] = true;
has_branch_instruction = true;
d2.HEX = instruction->word[2];
const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl);
if (function_call)
{
call_stack.push(current_instrution + 1);
current_instrution = jump_address;
continue;
}
else if (static_jump)
{
// NOTE: This will skip potential jump target blocks between current->target
current_instrution = jump_address;
continue;
}
else
{
// Set possible end address and proceed as usual
conditional_targets.emplace(jump_address);
instruction_range.second = std::max(jump_address, instruction_range.second);
}
break;
}
case RSX_SCA_OPCODE_RET:
{
if (call_stack.empty())
{
LOG_ERROR(RSX, "vp_analyser: RET found outside subroutine call");
}
else
{
current_instrution = call_stack.top();
call_stack.pop();
continue;
}
break;
}
}
if (d3.end && (fast_exit || current_instrution >= instruction_range.second) ||
(current_instrution + 1) == 512)
{
break;
}
current_instrution++;
}
if (d3.end && (ucode_size >= last_instruction_address))
for (const u32 target : conditional_targets)
{
//Jumping over an end label is legal (verified)
break;
if (!result.instruction_mask[target])
{
walk_function(target, true);
}
}
};
if (g_cfg.video.log_programs)
{
fs::file dump(fs::get_config_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
dump.write(&entry, 4);
dump.write(data, 512 * 16);
dump.close();
}
walk_function(entry, false);
const u32 instruction_count = (instruction_range.second - instruction_range.first + 1);
result.ucode_length = instruction_count * 16;
dst_prog.base_address = instruction_range.first;
dst_prog.entry = entry;
dst_prog.data.resize(instruction_count * 4);
dst_prog.instruction_mask = (result.instruction_mask >> instruction_range.first);
if (!has_branch_instruction)
{
verify(HERE), instruction_range.first == entry;
std::memcpy(dst_prog.data.data(), data + (instruction_range.first * 4), result.ucode_length);
}
else
{
for (u32 i = instruction_range.first, count = 0; i <= instruction_range.second; ++i, ++count)
{
const qword* instruction = (const qword*)&data[i * 4];
qword* dst = (qword*)&dst_prog.data[count * 4];
if (result.instruction_mask[i])
{
dst->dword[0] = instruction->dword[0];
dst->dword[1] = instruction->dword[1];
if (instructions_to_patch[i])
{
d2.HEX = dst->word[2];
d3.HEX = dst->word[3];
u32 address = ((d2.iaddrh << 3) | d3.iaddrl);
address -= instruction_range.first;
d2.iaddrh = (address >> 3);
d3.iaddrl = (address & 0x7);
dst->word[2] = d2.HEX;
dst->word[3] = d3.HEX;
dst_prog.jump_table.emplace(address);
}
}
else
{
dst->dword[0] = 0ull;
dst->dword[1] = 0ull;
}
}
// Verification
for (const u32 target : dst_prog.jump_table)
{
if (!result.instruction_mask[target])
{
LOG_ERROR(RSX, "vp_analyser: Failed, branch target 0x%x was not resolved", target);
}
}
}
return{ ucode_size + 4 };
return result;
}
size_t vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const
@ -75,6 +246,8 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
return false;
if (binary1.data.size() != binary2.data.size())
return false;
if (binary1.jump_table != binary2.jump_table)
return false;
if (!binary1.skip_vertex_input_check && !binary2.skip_vertex_input_check && binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs)
return false;
@ -83,10 +256,22 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
size_t instIndex = 0;
for (unsigned i = 0; i < binary1.data.size() / 4; i++)
{
const qword& inst1 = instBuffer1[instIndex];
const qword& inst2 = instBuffer2[instIndex];
if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1])
const auto active = binary1.instruction_mask[instIndex];
if (active != binary2.instruction_mask[instIndex])
{
return false;
}
if (active)
{
const qword& inst1 = instBuffer1[instIndex];
const qword& inst2 = instBuffer2[instIndex];
if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1])
{
return false;
}
}
instIndex++;
}

View File

@ -29,12 +29,13 @@ namespace program_hash_util
{
struct vertex_program_metadata
{
u32 ucode_size;
std::bitset<512> instruction_mask;
u32 ucode_length;
};
static size_t get_vertex_program_ucode_hash(const RSXVertexProgram &program);
static vertex_program_metadata analyse_vertex_program(const std::vector<u32>& data);
static vertex_program_metadata analyse_vertex_program(const u32* data, u32 entry, RSXVertexProgram& dst_prog);
};
struct vertex_program_storage_hash

View File

@ -409,106 +409,54 @@ std::string VertexProgramDecompiler::BuildCode()
}
VertexProgramDecompiler::VertexProgramDecompiler(const RSXVertexProgram& prog) :
m_data(prog.data)
m_prog(prog)
{
}
std::string VertexProgramDecompiler::Decompile()
{
for (unsigned i = 0; i < PF_PARAM_COUNT; i++)
m_parr.params[i].clear();
m_instr_count = m_data.size() / 4;
for (int i = 0; i < m_max_instr_count; ++i)
{
m_instructions[i].reset();
}
const auto& data = m_prog.data;
m_instr_count = data.size() / 4;
bool is_has_BRA = false;
bool program_end = false;
u32 i = 1;
u32 last_label_addr = 0;
while (i < m_data.size())
for (unsigned i = 0; i < PF_PARAM_COUNT; i++)
{
if (is_has_BRA)
{
d3.HEX = m_data[i];
i += 4;
}
else
{
d1.HEX = m_data[i++];
switch (d1.sca_opcode)
{
case RSX_SCA_OPCODE_BRA:
{
LOG_ERROR(RSX, "Unimplemented VP opcode BRA");
is_has_BRA = true;
m_jump_lvls.clear();
d3.HEX = m_data[++i];
i += 4;
break;
}
case RSX_SCA_OPCODE_BRB:
case RSX_SCA_OPCODE_BRI:
case RSX_SCA_OPCODE_CAL:
case RSX_SCA_OPCODE_CLI:
case RSX_SCA_OPCODE_CLB:
{
d2.HEX = m_data[i++];
d3.HEX = m_data[i];
i += 2;
const u32 label_addr = GetAddr();
last_label_addr = std::max(last_label_addr, label_addr);
m_jump_lvls.emplace(label_addr);
break;
}
default:
{
d3.HEX = m_data[++i];
i += 2;
break;
}
}
}
m_parr.params[i].clear();
}
uint jump_position = 0;
if (is_has_BRA || !m_jump_lvls.empty())
for (int i = 0; i < m_max_instr_count; ++i)
{
m_cur_instr = &m_instructions[0];
AddCode("int jump_position = 0;");
AddCode("while (true)");
AddCode("{");
m_cur_instr->open_scopes++;
m_instructions[i].reset();
}
AddCode(fmt::format("if (jump_position <= %u)", jump_position++));
AddCode("{");
m_cur_instr->open_scopes++;
if (m_prog.jump_table.size())
{
last_label_addr = *m_prog.jump_table.rbegin();
}
auto find_jump_lvl = [this](u32 address)
{
u32 jump = 1;
for (auto pos : m_jump_lvls)
for (auto pos : m_prog.jump_table)
{
if (address == pos)
break;
return jump;
++jump;
}
return jump;
return UINT32_MAX;
};
auto do_function_call = [this, &i](const std::string& condition)
{
//call function
// Call function
// NOTE: Addresses are assumed to have been patched
m_call_stack.push(i+1);
AddCode(condition);
AddCode("{");
@ -552,17 +500,41 @@ std::string VertexProgramDecompiler::Decompile()
}
};
for (i = 0; i < m_instr_count; ++i)
if (is_has_BRA || !m_prog.jump_table.empty())
{
if (m_call_stack.empty())
m_cur_instr = &m_instructions[0];
u32 jump_position = 0;
if (m_prog.entry != m_prog.base_address)
{
m_cur_instr = &m_instructions[i];
jump_position = find_jump_lvl(m_prog.entry - m_prog.base_address);
verify(HERE), jump_position != UINT32_MAX;
}
d0.HEX = m_data[i * 4 + 0];
d1.HEX = m_data[i * 4 + 1];
d2.HEX = m_data[i * 4 + 2];
d3.HEX = m_data[i * 4 + 3];
AddCode(fmt::format("int jump_position = %u;", jump_position));
AddCode("while (true)");
AddCode("{");
m_cur_instr->open_scopes++;
AddCode("if (jump_position <= 0)");
AddCode("{");
m_cur_instr->open_scopes++;
}
for (i = 0; i < m_instr_count; ++i)
{
if (!m_prog.instruction_mask[i])
{
// Dead code, skip
continue;
}
m_cur_instr = &m_instructions[i];
d0.HEX = data[i * 4 + 0];
d1.HEX = data[i * 4 + 1];
d2.HEX = data[i * 4 + 2];
d3.HEX = data[i * 4 + 3];
src[0].src0l = d2.src0l;
src[0].src0h = d1.src0h;
@ -570,27 +542,29 @@ std::string VertexProgramDecompiler::Decompile()
src[2].src2l = d3.src2l;
src[2].src2h = d2.src2h;
if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type)
{
AddCode("//Src check failed. Aborting");
program_end = true;
}
if (m_call_stack.empty())
if (m_call_stack.empty() && i)
{
//TODO: Subroutines can also have arbitrary jumps!
if (i && (is_has_BRA || std::find(m_jump_lvls.begin(), m_jump_lvls.end(), i) != m_jump_lvls.end()))
u32 jump_position = find_jump_lvl(i);
if (is_has_BRA || jump_position != UINT32_MAX)
{
m_cur_instr->close_scopes++;
AddCode("}");
AddCode("");
AddCode(fmt::format("if (jump_position <= %u)", jump_position++));
AddCode(fmt::format("if (jump_position <= %u)", jump_position));
AddCode("{");
m_cur_instr->open_scopes++;
}
}
if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type)
{
AddCode("//Src check failed. Aborting");
program_end = true;
d1.vec_opcode = d1.sca_opcode = 0;
}
switch (d1.vec_opcode)
{
case RSX_VEC_OPCODE_NOP: break;
@ -754,7 +728,7 @@ std::string VertexProgramDecompiler::Decompile()
if ((i + 1) < m_instr_count)
{
// In rare cases, this might be harmless (large coalesced program blocks controlled via branches aka ubershaders)
LOG_ERROR(RSX, "Vertex program aborted prematurely. Expect glitches");
LOG_ERROR(RSX, "Vertex program block aborts prematurely. Expect glitches");
}
break;
@ -762,7 +736,7 @@ std::string VertexProgramDecompiler::Decompile()
}
}
if (is_has_BRA || !m_jump_lvls.empty())
if (is_has_BRA || !m_prog.jump_table.empty())
{
m_cur_instr = &m_instructions[m_instr_count - 1];
m_cur_instr->close_scopes++;
@ -774,8 +748,6 @@ std::string VertexProgramDecompiler::Decompile()
std::string result = BuildCode();
m_jump_lvls.clear();
m_body.clear();
return result;
}

View File

@ -53,11 +53,10 @@ struct VertexProgramDecompiler
Instruction* m_cur_instr;
size_t m_instr_count;
std::set<int> m_jump_lvls;
std::vector<std::string> m_body;
std::stack<u32> m_call_stack;
const std::vector<u32>& m_data;
const RSXVertexProgram& m_prog;
ParamArray m_parr;
std::string NotZeroPositive(const std::string& code);

View File

@ -53,7 +53,7 @@ void D3D12GSRender::load_program()
return std::make_tuple(true, native_pitch);
};
get_current_vertex_program();
get_current_vertex_program(false);
get_current_fragment_program_legacy(rtt_lookup_func);
if (!current_fragment_program.valid)

View File

@ -24,7 +24,7 @@ namespace
GLGSRender::GLGSRender() : GSRender()
{
m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.3"));
m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.5"));
if (g_cfg.video.disable_vertex_cache)
m_vertex_cache.reset(new gl::null_vertex_cache());

View File

@ -1393,7 +1393,7 @@ namespace rsx
return rsx::get_address(offset_zeta, m_context_dma_z);
}
void thread::get_current_vertex_program()
void thread::get_current_vertex_program(bool skip_vertex_inputs)
{
if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty))
return;
@ -1401,57 +1401,60 @@ namespace rsx
m_graphics_state &= ~(rsx::pipeline_state::vertex_program_dirty);
const u32 transform_program_start = rsx::method_registers.transform_program_start();
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.skip_vertex_input_check = false;
current_vertex_program.skip_vertex_input_check = skip_vertex_inputs;
current_vertex_program.rsx_vertex_inputs.resize(0);
current_vertex_program.data.resize((512 - transform_program_start) * 4);
current_vertex_program.data.reserve(512 * 4);
current_vertex_program.jump_table.clear();
u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4);
u32* ucode_dst = current_vertex_program.data.data();
current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program
(
method_registers.transform_program.data(), // Input raw block
transform_program_start, // Address of entry point
current_vertex_program // [out] Program object
);
memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32));
current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
current_vertex_program.data.resize(current_vp_metadata.ucode_size);
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
if (!skip_vertex_inputs)
{
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
continue;
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
if (rsx::method_registers.vertex_arrays_info[index].size() > 0)
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{index,
rsx::method_registers.vertex_arrays_info[index].size(),
rsx::method_registers.vertex_arrays_info[index].frequency(),
!!((modulo_mask >> index) & 0x1),
true,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0});
}
else if (vertex_push_buffers[index].vertex_count > 1)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
rsx::method_registers.register_vertex_info[index].size,
1,
false,
true,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{index,
rsx::method_registers.register_vertex_info[index].size,
rsx::method_registers.register_vertex_info[index].frequency,
!!((modulo_mask >> index) & 0x1),
false,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0});
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
continue;
if (rsx::method_registers.vertex_arrays_info[index].size() > 0)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
rsx::method_registers.vertex_arrays_info[index].size(),
rsx::method_registers.vertex_arrays_info[index].frequency(),
!!((modulo_mask >> index) & 0x1),
true,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
}
else if (vertex_push_buffers[index].vertex_count > 1)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
rsx::method_registers.register_vertex_info[index].size,
1,
false,
true,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
rsx::method_registers.register_vertex_info[index].size,
rsx::method_registers.register_vertex_info[index].frequency,
!!((modulo_mask >> index) & 0x1),
false,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
}
}
}
}

View File

@ -377,7 +377,7 @@ namespace rsx
program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {};
program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {};
void get_current_vertex_program();
void get_current_vertex_program(bool skip_vertex_inputs = true);
/**
* Gets current fragment program and associated fragment state

View File

@ -1,5 +1,8 @@
#pragma once
#include <bitset>
#include <set>
enum vp_reg_type
{
RSX_VP_REGISTER_TYPE_TEMP = 1,
@ -229,4 +232,9 @@ struct RSXVertexProgram
std::vector<rsx_vertex_input> rsx_vertex_inputs;
u32 output_mask;
bool skip_vertex_input_check;
};
u32 base_address;
u32 entry;
std::bitset<512> instruction_mask;
std::set<u32> jump_table;
};

View File

@ -626,7 +626,7 @@ VKGSRender::VKGSRender() : GSRender()
else
m_vertex_cache.reset(new vk::weak_vertex_cache());
m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.3"));
m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.5"));
open_command_buffer();

View File

@ -378,6 +378,11 @@ namespace rsx
u64 pipeline_storage_hash;
u32 vp_ctrl;
u64 vp_instruction_mask[8];
u32 vp_base_address;
u32 vp_entry;
u16 vp_jump_table[32];
u32 fp_ctrl;
u32 fp_texture_dimensions;
@ -653,6 +658,12 @@ namespace rsx
return;
}
if (vp.jump_table.size() > 32)
{
LOG_ERROR(RSX, "shaders_cache: vertex program has more than 32 jump addresses. Entry not saved to cache");
return;
}
pipeline_data data = pack(pipeline, vp, fp);
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
@ -723,6 +734,22 @@ namespace rsx
pipeline_storage_type pipeline = data.pipeline_properties;
vp.output_mask = data.vp_ctrl;
vp.base_address = data.vp_base_address;
vp.entry = data.vp_entry;
pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask);
for (u8 index = 0; index < 32; ++index)
{
const auto address = data.vp_jump_table[index];
if (address == UINT16_MAX)
{
// End of list marker
break;
}
vp.jump_table.emplace(address);
}
fp.ctrl = data.fp_ctrl;
fp.texture_dimensions = data.fp_texture_dimensions;
@ -753,6 +780,28 @@ namespace rsx
data_block.pipeline_storage_hash = m_storage.get_hash(pipeline);
data_block.vp_ctrl = vp.output_mask;
data_block.vp_base_address = vp.base_address;
data_block.vp_entry = vp.entry;
unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask);
u8 index = 0;
while (index < 32)
{
if (!index && !vp.jump_table.empty())
{
for (auto &address : vp.jump_table)
{
data_block.vp_jump_table[index++] = (u16)address;
}
}
else
{
// End of list marker
data_block.vp_jump_table[index] = UINT16_MAX;
break;
}
}
data_block.fp_ctrl = fp.ctrl;
data_block.fp_texture_dimensions = fp.texture_dimensions;

View File

@ -5,6 +5,7 @@
#include "gcm_enums.h"
#include <atomic>
#include <memory>
#include <bitset>
// TODO: replace the code below by #include <optional> when C++17 or newer will be used
#include <optional.hpp>
@ -726,4 +727,41 @@ namespace rsx
{
return g_current_renderer;
}
template <int N>
void unpack_bitset(std::bitset<N>& block, u64* values)
{
constexpr int count = N / 64;
for (int n = 0; n < count; ++n)
{
int i = (n << 6);
values[n] = 0;
for (int bit = 0; bit < 64; ++bit, ++i)
{
if (block[i])
{
values[n] |= (1 << bit);
}
}
}
}
template <int N>
void pack_bitset(std::bitset<N>& block, u64* values)
{
constexpr int count = N / 64;
for (int n = (count - 1); n >= 0; --n)
{
if ((n + 1) < count)
{
block <<= 64;
}
if (values[n])
{
block |= values[n];
}
}
}
}