1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

rsx: Implement support for extended vertex programs

- Some games are kinda pushing it with RSX register space and spilling VP data into adjacent unused space.
This commit is contained in:
kd-11 2021-06-27 15:36:18 +03:00 committed by kd-11
parent 3cbdc3a198
commit 2c7c1c501d
12 changed files with 52 additions and 47 deletions

View File

@ -15,7 +15,7 @@ u64 GLGSRender::get_cycles()
GLGSRender::GLGSRender() : GSRender() GLGSRender::GLGSRender() : GSRender()
{ {
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.91"); m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.92");
if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx) if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx)
m_vertex_cache = std::make_unique<gl::null_vertex_cache>(); m_vertex_cache = std::make_unique<gl::null_vertex_cache>();

View File

@ -36,10 +36,10 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
//u32 last_instruction_address = 0; //u32 last_instruction_address = 0;
//u32 first_instruction_address = entry; //u32 first_instruction_address = entry;
std::stack<u32> call_stack; std::bitset<rsx::max_vertex_program_instructions> instructions_to_patch;
std::pair<u32, u32> instruction_range{umax, 0}; std::pair<u32, u32> instruction_range{ umax, 0 };
std::bitset<512> instructions_to_patch;
bool has_branch_instruction = false; bool has_branch_instruction = false;
std::stack<u32> call_stack;
D3 d3; D3 d3;
D2 d2; D2 d2;
@ -54,7 +54,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
while (true) while (true)
{ {
ensure(current_instruction < 512); ensure(current_instruction < rsx::max_vertex_program_instructions);
if (result.instruction_mask[current_instruction]) if (result.instruction_mask[current_instruction])
{ {
@ -120,8 +120,9 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
instructions_to_patch[current_instruction] = true; instructions_to_patch[current_instruction] = true;
has_branch_instruction = true; has_branch_instruction = true;
d0.HEX = instruction._u32[0];
d2.HEX = instruction._u32[2]; d2.HEX = instruction._u32[2];
const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl); const u32 jump_address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
if (function_call) if (function_call)
{ {
@ -162,7 +163,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
} }
if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) || if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) ||
(current_instruction + 1) == 512) (current_instruction + 1) == rsx::max_vertex_program_instructions)
{ {
break; break;
} }
@ -183,7 +184,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
{ {
fs::file dump(fs::get_cache_dir() + "shaderlog/vp_analyser.bin", fs::rewrite); fs::file dump(fs::get_cache_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
dump.write(&entry, 4); dump.write(&entry, 4);
dump.write(data, 512 * 16); dump.write(data, rsx::max_vertex_program_instructions * 16);
dump.close(); dump.close();
} }
@ -215,14 +216,17 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
if (instructions_to_patch[i]) if (instructions_to_patch[i])
{ {
d0.HEX = dst[0];
d2.HEX = dst[2]; d2.HEX = dst[2];
d3.HEX = dst[3]; d3.HEX = dst[3];
u32 address = ((d2.iaddrh << 3) | d3.iaddrl); u32 address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
address -= instruction_range.first; address -= instruction_range.first;
d2.iaddrh = (address >> 3); d0.iaddrh2 = (address >> 9) & 0x1;
d2.iaddrh = (address >> 3) & 0x3F;
d3.iaddrl = (address & 0x7); d3.iaddrl = (address & 0x7);
dst[0] = d0.HEX;
dst[2] = d2.HEX; dst[2] = d2.HEX;
dst[3] = d3.HEX; dst[3] = d3.HEX;

View File

@ -22,7 +22,7 @@ namespace program_hash_util
{ {
struct vertex_program_metadata struct vertex_program_metadata
{ {
std::bitset<512> instruction_mask; std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
u32 ucode_length; u32 ucode_length;
u32 referenced_textures_mask; u32 referenced_textures_mask;
}; };

View File

@ -93,6 +93,13 @@ union D0
u32 vec_result : 1; u32 vec_result : 1;
u32 : 1; u32 : 1;
}; };
struct
{
u32 : 23;
u32 iaddrh2 : 1;
u32 : 8;
};
}; };
union D1 union D1
@ -239,7 +246,7 @@ struct RSXVertexProgram
u32 base_address; u32 base_address;
u32 entry; u32 entry;
std::bitset<512> instruction_mask; std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
std::set<u32> jump_table; std::set<u32> jump_table;
rsx::texture_dimension_extended get_texture_dimension(u8 id) const rsx::texture_dimension_extended get_texture_dimension(u8 id) const

View File

@ -330,7 +330,7 @@ std::string VertexProgramDecompiler::AddCondReg()
u32 VertexProgramDecompiler::GetAddr() const u32 VertexProgramDecompiler::GetAddr() const
{ {
return (d2.iaddrh << 3) | d3.iaddrl; return (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
} }
void VertexProgramDecompiler::AddCode(const std::string& code) void VertexProgramDecompiler::AddCode(const std::string& code)

View File

@ -53,8 +53,7 @@ struct VertexProgramDecompiler
} }
}; };
static const usz m_max_instr_count = 512; Instruction m_instructions[rsx::max_vertex_program_instructions];
Instruction m_instructions[m_max_instr_count];
Instruction* m_cur_instr; Instruction* m_cur_instr;
usz m_instr_count; usz m_instr_count;

View File

@ -5,6 +5,11 @@
namespace rsx namespace rsx
{ {
enum program_limits
{
max_vertex_program_instructions = 544
};
#pragma pack(push, 1) #pragma pack(push, 1)
// NOTE: This structure must be packed to match GPU layout. // NOTE: This structure must be packed to match GPU layout.
struct fragment_program_texture_config struct fragment_program_texture_config

View File

@ -501,7 +501,7 @@ VKGSRender::VKGSRender() : GSRender()
else else
m_vertex_cache = std::make_unique<vk::weak_vertex_cache>(); m_vertex_cache = std::make_unique<vk::weak_vertex_cache>();
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.91"); m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.92");
open_command_buffer(); open_command_buffer();

View File

@ -30,7 +30,7 @@ namespace rsx
u32 vp_ctrl; u32 vp_ctrl;
u32 vp_texture_dimensions; u32 vp_texture_dimensions;
u64 vp_instruction_mask[8]; u64 vp_instruction_mask[9];
u32 vp_base_address; u32 vp_base_address;
u32 vp_entry; u32 vp_entry;
@ -365,7 +365,7 @@ namespace rsx
vp.base_address = data.vp_base_address; vp.base_address = data.vp_base_address;
vp.entry = data.vp_entry; vp.entry = data.vp_entry;
pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask); pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
for (u8 index = 0; index < 32; ++index) for (u8 index = 0; index < 32; ++index)
{ {
@ -403,7 +403,7 @@ namespace rsx
data_block.vp_base_address = vp.base_address; data_block.vp_base_address = vp.base_address;
data_block.vp_entry = vp.entry; data_block.vp_entry = vp.entry;
unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask); unpack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data_block.vp_instruction_mask);
u8 index = 0; u8 index = 0;
while (index < 32) while (index < 32)

View File

@ -467,13 +467,10 @@ namespace rsx
u32 rcount = count; u32 rcount = count;
if (const u32 max = load_pos * 4 + rcount + (index % 4); if (const u32 max = load_pos * 4 + rcount + (index % 4);
max > 512 * 4) max > max_vertex_program_instructions * 4)
{ {
// PS3 seems to allow exceeding the program buffer by upto 32 instructions before crashing rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4);
// Discard the "excess" instructions to not overflow our transform program buffer rcount -= max - (max_vertex_program_instructions * 4);
// TODO: Check if the instructions in the overflow area are executed by PS3
rsx_log.warning("Program buffer overflow!");
rcount -= max - (512 * 4);
} }
stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4] stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4]

View File

@ -8,6 +8,7 @@
#include "rsx_vertex_data.h" #include "rsx_vertex_data.h"
#include "rsx_utils.h" #include "rsx_utils.h"
#include "Emu/Cell/timers.hpp" #include "Emu/Cell/timers.hpp"
#include "Program/program_util.h"
namespace rsx namespace rsx
{ {
@ -492,7 +493,7 @@ namespace rsx
std::array<vertex_texture, 4> vertex_textures; std::array<vertex_texture, 4> vertex_textures;
std::array<u32, 512 * 4> transform_program{}; std::array<u32, max_vertex_program_instructions * 4> transform_program{};
std::array<u32[4], 512> transform_constants{}; std::array<u32[4], 512> transform_constants{};
draw_clause current_draw_clause{}; draw_clause current_draw_clause{};

View File

@ -855,18 +855,17 @@ namespace rsx
template <int N> template <int N>
void unpack_bitset(const std::bitset<N>& block, u64* values) void unpack_bitset(const std::bitset<N>& block, u64* values)
{ {
constexpr int count = N / 64; for (int bit = 0, n = -1, shift = 0; bit < N; ++bit, ++shift)
for (int n = 0; n < count; ++n)
{ {
int i = (n << 6); if ((bit % 64) == 0)
values[n] = 0;
for (int bit = 0; bit < 64; ++bit, ++i)
{ {
if (block[i]) values[++n] = 0;
{ shift = 0;
values[n] |= (1ull << bit); }
}
if (block[bit])
{
values[n] |= (1ull << shift);
} }
} }
} }
@ -874,18 +873,11 @@ namespace rsx
template <int N> template <int N>
void pack_bitset(std::bitset<N>& block, u64* values) void pack_bitset(std::bitset<N>& block, u64* values)
{ {
constexpr int count = N / 64; for (int n = 0, shift = 0; shift < N; ++n, shift += 64)
for (int n = (count - 1); n >= 0; --n)
{ {
if ((n + 1) < count) std::bitset<N> tmp = values[n];
{ tmp <<= shift;
block <<= 64; block |= tmp;
}
if (values[n])
{
block |= values[n];
}
} }
} }