mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
rsx: Implement support for extended vertex programs
- Some games are kinda pushing it with RSX register space and spilling VP data into adjacent unused space.
This commit is contained in:
parent
3cbdc3a198
commit
2c7c1c501d
@ -15,7 +15,7 @@ u64 GLGSRender::get_cycles()
|
||||
|
||||
GLGSRender::GLGSRender() : GSRender()
|
||||
{
|
||||
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.91");
|
||||
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.92");
|
||||
|
||||
if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx)
|
||||
m_vertex_cache = std::make_unique<gl::null_vertex_cache>();
|
||||
|
@ -36,10 +36,10 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
//u32 last_instruction_address = 0;
|
||||
//u32 first_instruction_address = entry;
|
||||
|
||||
std::stack<u32> call_stack;
|
||||
std::pair<u32, u32> instruction_range{umax, 0};
|
||||
std::bitset<512> instructions_to_patch;
|
||||
std::bitset<rsx::max_vertex_program_instructions> instructions_to_patch;
|
||||
std::pair<u32, u32> instruction_range{ umax, 0 };
|
||||
bool has_branch_instruction = false;
|
||||
std::stack<u32> call_stack;
|
||||
|
||||
D3 d3;
|
||||
D2 d2;
|
||||
@ -54,7 +54,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
|
||||
while (true)
|
||||
{
|
||||
ensure(current_instruction < 512);
|
||||
ensure(current_instruction < rsx::max_vertex_program_instructions);
|
||||
|
||||
if (result.instruction_mask[current_instruction])
|
||||
{
|
||||
@ -120,8 +120,9 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
instructions_to_patch[current_instruction] = true;
|
||||
has_branch_instruction = true;
|
||||
|
||||
d0.HEX = instruction._u32[0];
|
||||
d2.HEX = instruction._u32[2];
|
||||
const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl);
|
||||
const u32 jump_address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
|
||||
|
||||
if (function_call)
|
||||
{
|
||||
@ -162,7 +163,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
}
|
||||
|
||||
if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) ||
|
||||
(current_instruction + 1) == 512)
|
||||
(current_instruction + 1) == rsx::max_vertex_program_instructions)
|
||||
{
|
||||
break;
|
||||
}
|
||||
@ -183,7 +184,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
{
|
||||
fs::file dump(fs::get_cache_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
|
||||
dump.write(&entry, 4);
|
||||
dump.write(data, 512 * 16);
|
||||
dump.write(data, rsx::max_vertex_program_instructions * 16);
|
||||
dump.close();
|
||||
}
|
||||
|
||||
@ -215,14 +216,17 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
|
||||
if (instructions_to_patch[i])
|
||||
{
|
||||
d0.HEX = dst[0];
|
||||
d2.HEX = dst[2];
|
||||
d3.HEX = dst[3];
|
||||
|
||||
u32 address = ((d2.iaddrh << 3) | d3.iaddrl);
|
||||
u32 address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
|
||||
address -= instruction_range.first;
|
||||
|
||||
d2.iaddrh = (address >> 3);
|
||||
d0.iaddrh2 = (address >> 9) & 0x1;
|
||||
d2.iaddrh = (address >> 3) & 0x3F;
|
||||
d3.iaddrl = (address & 0x7);
|
||||
dst[0] = d0.HEX;
|
||||
dst[2] = d2.HEX;
|
||||
dst[3] = d3.HEX;
|
||||
|
||||
|
@ -22,7 +22,7 @@ namespace program_hash_util
|
||||
{
|
||||
struct vertex_program_metadata
|
||||
{
|
||||
std::bitset<512> instruction_mask;
|
||||
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
|
||||
u32 ucode_length;
|
||||
u32 referenced_textures_mask;
|
||||
};
|
||||
|
@ -93,6 +93,13 @@ union D0
|
||||
u32 vec_result : 1;
|
||||
u32 : 1;
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
u32 : 23;
|
||||
u32 iaddrh2 : 1;
|
||||
u32 : 8;
|
||||
};
|
||||
};
|
||||
|
||||
union D1
|
||||
@ -239,7 +246,7 @@ struct RSXVertexProgram
|
||||
|
||||
u32 base_address;
|
||||
u32 entry;
|
||||
std::bitset<512> instruction_mask;
|
||||
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
|
||||
std::set<u32> jump_table;
|
||||
|
||||
rsx::texture_dimension_extended get_texture_dimension(u8 id) const
|
||||
|
@ -330,7 +330,7 @@ std::string VertexProgramDecompiler::AddCondReg()
|
||||
|
||||
u32 VertexProgramDecompiler::GetAddr() const
|
||||
{
|
||||
return (d2.iaddrh << 3) | d3.iaddrl;
|
||||
return (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
|
||||
}
|
||||
|
||||
void VertexProgramDecompiler::AddCode(const std::string& code)
|
||||
|
@ -53,8 +53,7 @@ struct VertexProgramDecompiler
|
||||
}
|
||||
};
|
||||
|
||||
static const usz m_max_instr_count = 512;
|
||||
Instruction m_instructions[m_max_instr_count];
|
||||
Instruction m_instructions[rsx::max_vertex_program_instructions];
|
||||
Instruction* m_cur_instr;
|
||||
usz m_instr_count;
|
||||
|
||||
|
@ -5,6 +5,11 @@
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
enum program_limits
|
||||
{
|
||||
max_vertex_program_instructions = 544
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
// NOTE: This structure must be packed to match GPU layout.
|
||||
struct fragment_program_texture_config
|
||||
|
@ -501,7 +501,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
else
|
||||
m_vertex_cache = std::make_unique<vk::weak_vertex_cache>();
|
||||
|
||||
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.91");
|
||||
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.92");
|
||||
|
||||
open_command_buffer();
|
||||
|
||||
|
@ -30,7 +30,7 @@ namespace rsx
|
||||
|
||||
u32 vp_ctrl;
|
||||
u32 vp_texture_dimensions;
|
||||
u64 vp_instruction_mask[8];
|
||||
u64 vp_instruction_mask[9];
|
||||
|
||||
u32 vp_base_address;
|
||||
u32 vp_entry;
|
||||
@ -365,7 +365,7 @@ namespace rsx
|
||||
vp.base_address = data.vp_base_address;
|
||||
vp.entry = data.vp_entry;
|
||||
|
||||
pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask);
|
||||
pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
|
||||
|
||||
for (u8 index = 0; index < 32; ++index)
|
||||
{
|
||||
@ -403,7 +403,7 @@ namespace rsx
|
||||
data_block.vp_base_address = vp.base_address;
|
||||
data_block.vp_entry = vp.entry;
|
||||
|
||||
unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask);
|
||||
unpack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data_block.vp_instruction_mask);
|
||||
|
||||
u8 index = 0;
|
||||
while (index < 32)
|
||||
|
@ -467,13 +467,10 @@ namespace rsx
|
||||
u32 rcount = count;
|
||||
|
||||
if (const u32 max = load_pos * 4 + rcount + (index % 4);
|
||||
max > 512 * 4)
|
||||
max > max_vertex_program_instructions * 4)
|
||||
{
|
||||
// PS3 seems to allow exceeding the program buffer by upto 32 instructions before crashing
|
||||
// Discard the "excess" instructions to not overflow our transform program buffer
|
||||
// TODO: Check if the instructions in the overflow area are executed by PS3
|
||||
rsx_log.warning("Program buffer overflow!");
|
||||
rcount -= max - (512 * 4);
|
||||
rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4);
|
||||
rcount -= max - (max_vertex_program_instructions * 4);
|
||||
}
|
||||
|
||||
stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4]
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "rsx_vertex_data.h"
|
||||
#include "rsx_utils.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
#include "Program/program_util.h"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
@ -492,7 +493,7 @@ namespace rsx
|
||||
std::array<vertex_texture, 4> vertex_textures;
|
||||
|
||||
|
||||
std::array<u32, 512 * 4> transform_program{};
|
||||
std::array<u32, max_vertex_program_instructions * 4> transform_program{};
|
||||
std::array<u32[4], 512> transform_constants{};
|
||||
|
||||
draw_clause current_draw_clause{};
|
||||
|
@ -855,18 +855,17 @@ namespace rsx
|
||||
template <int N>
|
||||
void unpack_bitset(const std::bitset<N>& block, u64* values)
|
||||
{
|
||||
constexpr int count = N / 64;
|
||||
for (int n = 0; n < count; ++n)
|
||||
for (int bit = 0, n = -1, shift = 0; bit < N; ++bit, ++shift)
|
||||
{
|
||||
int i = (n << 6);
|
||||
values[n] = 0;
|
||||
|
||||
for (int bit = 0; bit < 64; ++bit, ++i)
|
||||
if ((bit % 64) == 0)
|
||||
{
|
||||
if (block[i])
|
||||
{
|
||||
values[n] |= (1ull << bit);
|
||||
}
|
||||
values[++n] = 0;
|
||||
shift = 0;
|
||||
}
|
||||
|
||||
if (block[bit])
|
||||
{
|
||||
values[n] |= (1ull << shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -874,18 +873,11 @@ namespace rsx
|
||||
template <int N>
|
||||
void pack_bitset(std::bitset<N>& block, u64* values)
|
||||
{
|
||||
constexpr int count = N / 64;
|
||||
for (int n = (count - 1); n >= 0; --n)
|
||||
for (int n = 0, shift = 0; shift < N; ++n, shift += 64)
|
||||
{
|
||||
if ((n + 1) < count)
|
||||
{
|
||||
block <<= 64;
|
||||
}
|
||||
|
||||
if (values[n])
|
||||
{
|
||||
block |= values[n];
|
||||
}
|
||||
std::bitset<N> tmp = values[n];
|
||||
tmp <<= shift;
|
||||
block |= tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user