mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 18:53:28 +01:00
rsx: Implement support for extended vertex programs
- Some games are kinda pushing it with RSX register space and spilling VP data into adjacent unused space.
This commit is contained in:
parent
3cbdc3a198
commit
2c7c1c501d
@ -15,7 +15,7 @@ u64 GLGSRender::get_cycles()
|
|||||||
|
|
||||||
GLGSRender::GLGSRender() : GSRender()
|
GLGSRender::GLGSRender() : GSRender()
|
||||||
{
|
{
|
||||||
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.91");
|
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.92");
|
||||||
|
|
||||||
if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx)
|
if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx)
|
||||||
m_vertex_cache = std::make_unique<gl::null_vertex_cache>();
|
m_vertex_cache = std::make_unique<gl::null_vertex_cache>();
|
||||||
|
@ -36,10 +36,10 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
|||||||
//u32 last_instruction_address = 0;
|
//u32 last_instruction_address = 0;
|
||||||
//u32 first_instruction_address = entry;
|
//u32 first_instruction_address = entry;
|
||||||
|
|
||||||
std::stack<u32> call_stack;
|
std::bitset<rsx::max_vertex_program_instructions> instructions_to_patch;
|
||||||
std::pair<u32, u32> instruction_range{umax, 0};
|
std::pair<u32, u32> instruction_range{ umax, 0 };
|
||||||
std::bitset<512> instructions_to_patch;
|
|
||||||
bool has_branch_instruction = false;
|
bool has_branch_instruction = false;
|
||||||
|
std::stack<u32> call_stack;
|
||||||
|
|
||||||
D3 d3;
|
D3 d3;
|
||||||
D2 d2;
|
D2 d2;
|
||||||
@ -54,7 +54,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
|||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
ensure(current_instruction < 512);
|
ensure(current_instruction < rsx::max_vertex_program_instructions);
|
||||||
|
|
||||||
if (result.instruction_mask[current_instruction])
|
if (result.instruction_mask[current_instruction])
|
||||||
{
|
{
|
||||||
@ -120,8 +120,9 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
|||||||
instructions_to_patch[current_instruction] = true;
|
instructions_to_patch[current_instruction] = true;
|
||||||
has_branch_instruction = true;
|
has_branch_instruction = true;
|
||||||
|
|
||||||
|
d0.HEX = instruction._u32[0];
|
||||||
d2.HEX = instruction._u32[2];
|
d2.HEX = instruction._u32[2];
|
||||||
const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl);
|
const u32 jump_address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
|
||||||
|
|
||||||
if (function_call)
|
if (function_call)
|
||||||
{
|
{
|
||||||
@ -162,7 +163,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) ||
|
if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) ||
|
||||||
(current_instruction + 1) == 512)
|
(current_instruction + 1) == rsx::max_vertex_program_instructions)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -183,7 +184,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
|||||||
{
|
{
|
||||||
fs::file dump(fs::get_cache_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
|
fs::file dump(fs::get_cache_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
|
||||||
dump.write(&entry, 4);
|
dump.write(&entry, 4);
|
||||||
dump.write(data, 512 * 16);
|
dump.write(data, rsx::max_vertex_program_instructions * 16);
|
||||||
dump.close();
|
dump.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,14 +216,17 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
|||||||
|
|
||||||
if (instructions_to_patch[i])
|
if (instructions_to_patch[i])
|
||||||
{
|
{
|
||||||
|
d0.HEX = dst[0];
|
||||||
d2.HEX = dst[2];
|
d2.HEX = dst[2];
|
||||||
d3.HEX = dst[3];
|
d3.HEX = dst[3];
|
||||||
|
|
||||||
u32 address = ((d2.iaddrh << 3) | d3.iaddrl);
|
u32 address = (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
|
||||||
address -= instruction_range.first;
|
address -= instruction_range.first;
|
||||||
|
|
||||||
d2.iaddrh = (address >> 3);
|
d0.iaddrh2 = (address >> 9) & 0x1;
|
||||||
|
d2.iaddrh = (address >> 3) & 0x3F;
|
||||||
d3.iaddrl = (address & 0x7);
|
d3.iaddrl = (address & 0x7);
|
||||||
|
dst[0] = d0.HEX;
|
||||||
dst[2] = d2.HEX;
|
dst[2] = d2.HEX;
|
||||||
dst[3] = d3.HEX;
|
dst[3] = d3.HEX;
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ namespace program_hash_util
|
|||||||
{
|
{
|
||||||
struct vertex_program_metadata
|
struct vertex_program_metadata
|
||||||
{
|
{
|
||||||
std::bitset<512> instruction_mask;
|
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
|
||||||
u32 ucode_length;
|
u32 ucode_length;
|
||||||
u32 referenced_textures_mask;
|
u32 referenced_textures_mask;
|
||||||
};
|
};
|
||||||
|
@ -93,6 +93,13 @@ union D0
|
|||||||
u32 vec_result : 1;
|
u32 vec_result : 1;
|
||||||
u32 : 1;
|
u32 : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
u32 : 23;
|
||||||
|
u32 iaddrh2 : 1;
|
||||||
|
u32 : 8;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
union D1
|
union D1
|
||||||
@ -239,7 +246,7 @@ struct RSXVertexProgram
|
|||||||
|
|
||||||
u32 base_address;
|
u32 base_address;
|
||||||
u32 entry;
|
u32 entry;
|
||||||
std::bitset<512> instruction_mask;
|
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
|
||||||
std::set<u32> jump_table;
|
std::set<u32> jump_table;
|
||||||
|
|
||||||
rsx::texture_dimension_extended get_texture_dimension(u8 id) const
|
rsx::texture_dimension_extended get_texture_dimension(u8 id) const
|
||||||
|
@ -330,7 +330,7 @@ std::string VertexProgramDecompiler::AddCondReg()
|
|||||||
|
|
||||||
u32 VertexProgramDecompiler::GetAddr() const
|
u32 VertexProgramDecompiler::GetAddr() const
|
||||||
{
|
{
|
||||||
return (d2.iaddrh << 3) | d3.iaddrl;
|
return (d0.iaddrh2 << 9) | (d2.iaddrh << 3) | d3.iaddrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VertexProgramDecompiler::AddCode(const std::string& code)
|
void VertexProgramDecompiler::AddCode(const std::string& code)
|
||||||
|
@ -53,8 +53,7 @@ struct VertexProgramDecompiler
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static const usz m_max_instr_count = 512;
|
Instruction m_instructions[rsx::max_vertex_program_instructions];
|
||||||
Instruction m_instructions[m_max_instr_count];
|
|
||||||
Instruction* m_cur_instr;
|
Instruction* m_cur_instr;
|
||||||
usz m_instr_count;
|
usz m_instr_count;
|
||||||
|
|
||||||
|
@ -5,6 +5,11 @@
|
|||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
|
enum program_limits
|
||||||
|
{
|
||||||
|
max_vertex_program_instructions = 544
|
||||||
|
};
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
// NOTE: This structure must be packed to match GPU layout.
|
// NOTE: This structure must be packed to match GPU layout.
|
||||||
struct fragment_program_texture_config
|
struct fragment_program_texture_config
|
||||||
|
@ -501,7 +501,7 @@ VKGSRender::VKGSRender() : GSRender()
|
|||||||
else
|
else
|
||||||
m_vertex_cache = std::make_unique<vk::weak_vertex_cache>();
|
m_vertex_cache = std::make_unique<vk::weak_vertex_cache>();
|
||||||
|
|
||||||
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.91");
|
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.92");
|
||||||
|
|
||||||
open_command_buffer();
|
open_command_buffer();
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ namespace rsx
|
|||||||
|
|
||||||
u32 vp_ctrl;
|
u32 vp_ctrl;
|
||||||
u32 vp_texture_dimensions;
|
u32 vp_texture_dimensions;
|
||||||
u64 vp_instruction_mask[8];
|
u64 vp_instruction_mask[9];
|
||||||
|
|
||||||
u32 vp_base_address;
|
u32 vp_base_address;
|
||||||
u32 vp_entry;
|
u32 vp_entry;
|
||||||
@ -365,7 +365,7 @@ namespace rsx
|
|||||||
vp.base_address = data.vp_base_address;
|
vp.base_address = data.vp_base_address;
|
||||||
vp.entry = data.vp_entry;
|
vp.entry = data.vp_entry;
|
||||||
|
|
||||||
pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask);
|
pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
|
||||||
|
|
||||||
for (u8 index = 0; index < 32; ++index)
|
for (u8 index = 0; index < 32; ++index)
|
||||||
{
|
{
|
||||||
@ -403,7 +403,7 @@ namespace rsx
|
|||||||
data_block.vp_base_address = vp.base_address;
|
data_block.vp_base_address = vp.base_address;
|
||||||
data_block.vp_entry = vp.entry;
|
data_block.vp_entry = vp.entry;
|
||||||
|
|
||||||
unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask);
|
unpack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data_block.vp_instruction_mask);
|
||||||
|
|
||||||
u8 index = 0;
|
u8 index = 0;
|
||||||
while (index < 32)
|
while (index < 32)
|
||||||
|
@ -467,13 +467,10 @@ namespace rsx
|
|||||||
u32 rcount = count;
|
u32 rcount = count;
|
||||||
|
|
||||||
if (const u32 max = load_pos * 4 + rcount + (index % 4);
|
if (const u32 max = load_pos * 4 + rcount + (index % 4);
|
||||||
max > 512 * 4)
|
max > max_vertex_program_instructions * 4)
|
||||||
{
|
{
|
||||||
// PS3 seems to allow exceeding the program buffer by upto 32 instructions before crashing
|
rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4);
|
||||||
// Discard the "excess" instructions to not overflow our transform program buffer
|
rcount -= max - (max_vertex_program_instructions * 4);
|
||||||
// TODO: Check if the instructions in the overflow area are executed by PS3
|
|
||||||
rsx_log.warning("Program buffer overflow!");
|
|
||||||
rcount -= max - (512 * 4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4]
|
stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4]
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include "rsx_vertex_data.h"
|
#include "rsx_vertex_data.h"
|
||||||
#include "rsx_utils.h"
|
#include "rsx_utils.h"
|
||||||
#include "Emu/Cell/timers.hpp"
|
#include "Emu/Cell/timers.hpp"
|
||||||
|
#include "Program/program_util.h"
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
@ -492,7 +493,7 @@ namespace rsx
|
|||||||
std::array<vertex_texture, 4> vertex_textures;
|
std::array<vertex_texture, 4> vertex_textures;
|
||||||
|
|
||||||
|
|
||||||
std::array<u32, 512 * 4> transform_program{};
|
std::array<u32, max_vertex_program_instructions * 4> transform_program{};
|
||||||
std::array<u32[4], 512> transform_constants{};
|
std::array<u32[4], 512> transform_constants{};
|
||||||
|
|
||||||
draw_clause current_draw_clause{};
|
draw_clause current_draw_clause{};
|
||||||
|
@ -855,18 +855,17 @@ namespace rsx
|
|||||||
template <int N>
|
template <int N>
|
||||||
void unpack_bitset(const std::bitset<N>& block, u64* values)
|
void unpack_bitset(const std::bitset<N>& block, u64* values)
|
||||||
{
|
{
|
||||||
constexpr int count = N / 64;
|
for (int bit = 0, n = -1, shift = 0; bit < N; ++bit, ++shift)
|
||||||
for (int n = 0; n < count; ++n)
|
|
||||||
{
|
{
|
||||||
int i = (n << 6);
|
if ((bit % 64) == 0)
|
||||||
values[n] = 0;
|
|
||||||
|
|
||||||
for (int bit = 0; bit < 64; ++bit, ++i)
|
|
||||||
{
|
{
|
||||||
if (block[i])
|
values[++n] = 0;
|
||||||
{
|
shift = 0;
|
||||||
values[n] |= (1ull << bit);
|
}
|
||||||
}
|
|
||||||
|
if (block[bit])
|
||||||
|
{
|
||||||
|
values[n] |= (1ull << shift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -874,18 +873,11 @@ namespace rsx
|
|||||||
template <int N>
|
template <int N>
|
||||||
void pack_bitset(std::bitset<N>& block, u64* values)
|
void pack_bitset(std::bitset<N>& block, u64* values)
|
||||||
{
|
{
|
||||||
constexpr int count = N / 64;
|
for (int n = 0, shift = 0; shift < N; ++n, shift += 64)
|
||||||
for (int n = (count - 1); n >= 0; --n)
|
|
||||||
{
|
{
|
||||||
if ((n + 1) < count)
|
std::bitset<N> tmp = values[n];
|
||||||
{
|
tmp <<= shift;
|
||||||
block <<= 64;
|
block |= tmp;
|
||||||
}
|
|
||||||
|
|
||||||
if (values[n])
|
|
||||||
{
|
|
||||||
block |= values[n];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user