1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-26 04:32:35 +01:00

rsx: Implement support for extended vertex programs

- Some games are kinda pushing it with RSX register space and spilling VP data into adjacent unused space.
This commit is contained in:
kd-11 2021-06-27 15:36:18 +03:00 committed by kd-11
parent 3cbdc3a198
commit 2c7c1c501d
12 changed files with 52 additions and 47 deletions

View File

@ -15,7 +15,7 @@ u64 GLGSRender::get_cycles()
GLGSRender::GLGSRender() : GSRender()
{
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.91");
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.92");
if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx)
m_vertex_cache = std::make_unique<gl::null_vertex_cache>();

View File

@ -36,10 +36,10 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
//u32 last_instruction_address = 0;
//u32 first_instruction_address = entry;
std::stack<u32> call_stack;
std::pair<u32, u32> instruction_range{umax, 0};
std::bitset<512> instructions_to_patch;
std::bitset<rsx::max_vertex_program_instructions> instructions_to_patch;
std::pair<u32, u32> instruction_range{ umax, 0 };
bool has_branch_instruction = false;
std::stack<u32> call_stack;
D3 d3;
D2 d2;
@ -54,7 +54,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
while (true)
{
ensure(current_instruction < 512);
ensure(current_instruction < rsx::max_vertex_program_instructions);
if (result.instruction_mask[current_instruction])
{
@ -120,8 +120,9 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
instructions_to_patch[current_instruction] = true;
has_branch_instruction = true;
d0.HEX = instruction._u32[0];
d2.HEX = instruction._u32[2];
const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl);
const u32 jump_address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
if (function_call)
{
@ -162,7 +163,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
}
if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) ||
(current_instruction + 1) == 512)
(current_instruction + 1) == rsx::max_vertex_program_instructions)
{
break;
}
@ -183,7 +184,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
{
fs::file dump(fs::get_cache_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
dump.write(&entry, 4);
dump.write(data, 512 * 16);
dump.write(data, rsx::max_vertex_program_instructions * 16);
dump.close();
}
@ -215,14 +216,17 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
if (instructions_to_patch[i])
{
d0.HEX = dst[0];
d2.HEX = dst[2];
d3.HEX = dst[3];
u32 address = ((d2.iaddrh << 3) | d3.iaddrl);
u32 address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
address -= instruction_range.first;
d2.iaddrh = (address >> 3);
d0.iaddrh2 = (address >> 9) & 0x1;
d2.iaddrh = (address >> 3) & 0x3F;
d3.iaddrl = (address & 0x7);
dst[0] = d0.HEX;
dst[2] = d2.HEX;
dst[3] = d3.HEX;

View File

@ -22,7 +22,7 @@ namespace program_hash_util
{
struct vertex_program_metadata
{
std::bitset<512> instruction_mask;
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
u32 ucode_length;
u32 referenced_textures_mask;
};

View File

@ -93,6 +93,13 @@ union D0
u32 vec_result : 1;
u32 : 1;
};
struct
{
u32 : 23;
u32 iaddrh2 : 1;
u32 : 8;
};
};
union D1
@ -239,7 +246,7 @@ struct RSXVertexProgram
u32 base_address;
u32 entry;
std::bitset<512> instruction_mask;
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
std::set<u32> jump_table;
rsx::texture_dimension_extended get_texture_dimension(u8 id) const

View File

@ -330,7 +330,7 @@ std::string VertexProgramDecompiler::AddCondReg()
u32 VertexProgramDecompiler::GetAddr() const
{
return (d2.iaddrh << 3) | d3.iaddrl;
return (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
}
void VertexProgramDecompiler::AddCode(const std::string& code)

View File

@ -53,8 +53,7 @@ struct VertexProgramDecompiler
}
};
static const usz m_max_instr_count = 512;
Instruction m_instructions[m_max_instr_count];
Instruction m_instructions[rsx::max_vertex_program_instructions];
Instruction* m_cur_instr;
usz m_instr_count;

View File

@ -5,6 +5,11 @@
namespace rsx
{
enum program_limits
{
max_vertex_program_instructions = 544
};
#pragma pack(push, 1)
// NOTE: This structure must be packed to match GPU layout.
struct fragment_program_texture_config

View File

@ -501,7 +501,7 @@ VKGSRender::VKGSRender() : GSRender()
else
m_vertex_cache = std::make_unique<vk::weak_vertex_cache>();
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.91");
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.92");
open_command_buffer();

View File

@ -30,7 +30,7 @@ namespace rsx
u32 vp_ctrl;
u32 vp_texture_dimensions;
u64 vp_instruction_mask[8];
u64 vp_instruction_mask[9];
u32 vp_base_address;
u32 vp_entry;
@ -365,7 +365,7 @@ namespace rsx
vp.base_address = data.vp_base_address;
vp.entry = data.vp_entry;
pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask);
pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
for (u8 index = 0; index < 32; ++index)
{
@ -403,7 +403,7 @@ namespace rsx
data_block.vp_base_address = vp.base_address;
data_block.vp_entry = vp.entry;
unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask);
unpack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data_block.vp_instruction_mask);
u8 index = 0;
while (index < 32)

View File

@ -467,13 +467,10 @@ namespace rsx
u32 rcount = count;
if (const u32 max = load_pos * 4 + rcount + (index % 4);
max > 512 * 4)
max > max_vertex_program_instructions * 4)
{
// PS3 seems to allow exceeding the program buffer by upto 32 instructions before crashing
// Discard the "excess" instructions to not overflow our transform program buffer
// TODO: Check if the instructions in the overflow area are executed by PS3
rsx_log.warning("Program buffer overflow!");
rcount -= max - (512 * 4);
rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4);
rcount -= max - (max_vertex_program_instructions * 4);
}
stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4]

View File

@ -8,6 +8,7 @@
#include "rsx_vertex_data.h"
#include "rsx_utils.h"
#include "Emu/Cell/timers.hpp"
#include "Program/program_util.h"
namespace rsx
{
@ -492,7 +493,7 @@ namespace rsx
std::array<vertex_texture, 4> vertex_textures;
std::array<u32, 512 * 4> transform_program{};
std::array<u32, max_vertex_program_instructions * 4> transform_program{};
std::array<u32[4], 512> transform_constants{};
draw_clause current_draw_clause{};

View File

@ -855,18 +855,17 @@ namespace rsx
template <int N>
void unpack_bitset(const std::bitset<N>& block, u64* values)
{
constexpr int count = N / 64;
for (int n = 0; n < count; ++n)
for (int bit = 0, n = -1, shift = 0; bit < N; ++bit, ++shift)
{
int i = (n << 6);
values[n] = 0;
for (int bit = 0; bit < 64; ++bit, ++i)
if ((bit % 64) == 0)
{
if (block[i])
{
values[n] |= (1ull << bit);
}
values[++n] = 0;
shift = 0;
}
if (block[bit])
{
values[n] |= (1ull << shift);
}
}
}
@ -874,18 +873,11 @@ namespace rsx
template <int N>
void pack_bitset(std::bitset<N>& block, u64* values)
{
constexpr int count = N / 64;
for (int n = (count - 1); n >= 0; --n)
for (int n = 0, shift = 0; shift < N; ++n, shift += 64)
{
if ((n + 1) < count)
{
block <<= 64;
}
if (values[n])
{
block |= values[n];
}
std::bitset<N> tmp = values[n];
tmp <<= shift;
block |= tmp;
}
}