1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 02:32:36 +01:00

rsx: Relocatable transform constants

This commit is contained in:
kd-11 2022-03-23 22:53:18 +03:00 committed by kd-11
parent e650d11b30
commit 9a2d4fe46b
23 changed files with 222 additions and 67 deletions

View File

@ -87,6 +87,33 @@ namespace fmt
return src;
}
static inline
std::string replace_all(std::string src, const std::vector<std::pair<std::string, std::string>>& list)
{
for (usz pos = 0; pos < src.length(); ++pos)
{
for (usz i = 0; i < list.size(); ++i)
{
const usz comp_length = list[i].first.length();
if (src.length() - pos < comp_length)
{
continue;
}
if (src.substr(pos, comp_length) == list[i].first)
{
src.erase(pos, comp_length);
src.insert(pos, list[i].second);
pos += list[i].second.length() - 1;
break;
}
}
}
return src;
}
std::vector<std::string> split(std::string_view source, std::initializer_list<std::string_view> separators, bool is_skip_empty = true);
std::string trim(const std::string& source, const std::string& values = " \t");

View File

@ -869,19 +869,3 @@ std::tuple<u32, u32, u32> write_index_array_data_to_buffer(std::span<std::byte>
fmt::throw_exception("Unreachable");
}
}
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)
{
const __m128i vector = _mm_set_epi32(w, z, y, x);
_mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector);
}
void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w)
{
stream_vector(dst, std::bit_cast<u32>(x), std::bit_cast<u32>(y), std::bit_cast<u32>(z), std::bit_cast<u32>(w));
}
void stream_vector_from_memory(void *dst, void *src)
{
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(src));
_mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector);
}

View File

@ -38,17 +38,6 @@ std::tuple<u32, u32, u32> write_index_array_data_to_buffer(std::span<std::byte>
*/
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, rsx::primitive_type draw_mode, unsigned count);
/**
* Stream a 128 bits vector to dst.
*/
void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w);
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w);
/**
* Stream a 128 bits vector from src to dst.
*/
void stream_vector_from_memory(void *dst, void *src);
// Copy and swap data in 32-bit units
extern void(*const copy_data_swap_u32)(u32*, const u32*, u32);

View File

@ -0,0 +1,46 @@
#pragma once
#include "util/types.hpp"
#include "util/asm.hpp"
#if defined(ARCH_X64)
#include "emmintrin.h"
#include "immintrin.h"
#endif
#ifdef ARCH_ARM64
#if !defined(_MSC_VER)
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#endif
#undef FORCE_INLINE
#include "Emu/CPU/sse2neon.h"
#endif
namespace utils
{
/**
* Stream a 128 bits vector to dst.
*/
static inline
void stream_vector(void* dst, u32 x, u32 y, u32 z, u32 w)
{
const __m128i vector = _mm_set_epi32(w, z, y, x);
_mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector);
}
static inline
void stream_vector(void* dst, f32 x, f32 y, f32 z, f32 w)
{
stream_vector(dst, std::bit_cast<u32>(x), std::bit_cast<u32>(y), std::bit_cast<u32>(z), std::bit_cast<u32>(w));
}
/**
* Stream a 128 bits vector from src to dst.
*/
static inline
void stream_vector_from_memory(void* dst, void* src)
{
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(src));
_mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector);
}
}

View File

@ -619,7 +619,18 @@ void GLGSRender::end()
return;
}
analyse_current_rsx_pipeline();
if (m_graphics_state & (rsx::pipeline_state::fragment_program_ucode_dirty | rsx::pipeline_state::vertex_program_ucode_dirty))
{
// TODO: Move to shared code
if ((m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty) &&
m_vertex_prog && !m_vertex_prog->has_indexed_constants)
{
m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
}
analyse_current_rsx_pipeline();
}
m_frame_stats.setup_time += m_profiler.duration();
// Active texture environment is used to decode shaders

View File

@ -693,10 +693,13 @@ bool GLGSRender::load_program()
}
const bool was_interpreter = m_shader_interpreter.is_interpreter(m_program);
m_vertex_prog = nullptr;
m_fragment_prog = nullptr;
if (shadermode != shader_mode::interpreter_only) [[likely]]
{
void* pipeline_properties = nullptr;
m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties,
std::tie(m_program, m_vertex_prog, m_fragment_prog) = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties,
shadermode != shader_mode::recompiler, true);
if (m_prog_buffer.check_cache_missed())
@ -799,7 +802,7 @@ void GLGSRender::load_program_env()
// Vertex constants
auto mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_vertex_program_constants_data(buf);
fill_vertex_program_constants_data(buf, m_vertex_prog ? m_vertex_prog->constant_ids : std::vector<u16>{});
m_transform_constants_buffer->bind_range(GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, 8192);
}
@ -811,7 +814,7 @@ void GLGSRender::load_program_env()
auto buf = static_cast<u8*>(mapping.first);
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), fragment_constants_size },
current_fragment_program, true);
*ensure(m_fragment_prog), current_fragment_program, true);
m_fragment_constants_buffer->bind_range(GL_FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, fragment_constants_size);
}

View File

@ -69,14 +69,14 @@ namespace gl
class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
GLFragmentProgram m_fragment_prog;
GLVertexProgram m_vertex_prog;
gl::sampler_state m_fs_sampler_states[rsx::limits::fragment_textures_count]; // Fragment textures
gl::sampler_state m_fs_sampler_mirror_states[rsx::limits::fragment_textures_count]; // Alternate views of fragment textures with different format (e.g Depth vs Stencil for D24S8)
gl::sampler_state m_vs_sampler_states[rsx::limits::vertex_textures_count]; // Vertex textures
gl::glsl::program *m_program = nullptr;
const GLFragmentProgram *m_fragment_prog = nullptr;
const GLVertexProgram *m_vertex_prog = nullptr;
u32 m_interpreter_state = 0;
gl::shader_interpreter m_shader_interpreter;

View File

@ -274,6 +274,8 @@ void GLVertexProgram::Decompile(const RSXVertexProgram& prog)
shader.create(::glsl::program_domain::glsl_vertex_program, source);
id = shader.id();
has_indexed_constants = decompiler.properties.has_indexed_constants;
constant_ids = std::move(decompiler.m_constant_ids);
}
void GLVertexProgram::Delete()

View File

@ -58,6 +58,8 @@ public:
ParamArray parr;
u32 id;
gl::glsl::shader shader;
std::vector<u16> constant_ids;
bool has_indexed_constants;
void Decompile(const RSXVertexProgram& prog);

View File

@ -109,6 +109,8 @@ class program_state_cache
using binary_to_vertex_program = std::unordered_map<RSXVertexProgram, vertex_program_type, program_hash_util::vertex_program_storage_hash, program_hash_util::vertex_program_compare> ;
using binary_to_fragment_program = std::unordered_map<RSXFragmentProgram, fragment_program_type, program_hash_util::fragment_program_storage_hash, program_hash_util::fragment_program_compare>;
using pipeline_data_type = std::tuple<pipeline_type*, const vertex_program_type*, const fragment_program_type*>;
struct pipeline_key
{
u32 vertex_program_id;
@ -305,7 +307,7 @@ public:
{}
template<typename... Args>
pipeline_type* get_graphics_pipeline(
pipeline_data_type get_graphics_pipeline(
const RSXVertexProgram& vertexShader,
const RSXFragmentProgram& fragmentShader,
pipeline_properties& pipelineProperties,
@ -334,7 +336,7 @@ public:
if (const auto I = m_storage.find(key); I != m_storage.end())
{
m_cache_miss_flag = (I->second == __null_pipeline_handle);
return I->second.get();
return { I->second.get(), &vertex_program, &fragment_program };
}
}
@ -345,7 +347,7 @@ public:
if (const auto I = m_storage.find(key); I != m_storage.end())
{
m_cache_miss_flag = (I->second == __null_pipeline_handle);
return I->second.get();
return { I->second.get(), &vertex_program, &fragment_program };
}
// Insert a placeholder if the key still doesn't exist to avoid re-linking of the same pipeline
@ -391,16 +393,18 @@ public:
};
}
return backend_traits::build_pipeline(
auto result = backend_traits::build_pipeline(
vertex_program, // VS, must already be decompiled and recompiled above
fragment_program, // FS, must already be decompiled and recompiled above
pipelineProperties, // Pipeline state
compile_async, // Allow asynchronous compilation
callback, // Insertion and notification callback
std::forward<Args>(args)...); // Other arguments
return { result, &vertex_program, &fragment_program };
}
void fill_fragment_constants_buffer(std::span<f32> dst_buffer, const RSXFragmentProgram& fragment_program, bool sanitize = false) const;
void fill_fragment_constants_buffer(std::span<f32> dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const;
void clear()
{

View File

@ -130,13 +130,37 @@ struct ParamType
{
}
bool SearchName(const std::string& name) const
bool HasItem(const std::string& name) const
{
return std::any_of(items.cbegin(), items.cend(), [&name](const auto& item)
{
return item.name == name;
});
}
bool ReplaceOrInsert(const std::string& name, const ParamItem& item)
{
if (HasItem(name))
{
std::vector<ParamItem> new_list;
for (const auto& it : items)
{
if (it.name != item.name)
{
new_list.emplace_back(it.name, it.location, it.value);
}
else
{
new_list.emplace_back(item.name, item.location, item.value);
}
}
std::swap(items, new_list);
}
items.push_back(item);
return false;
}
};
struct ParamArray
@ -159,14 +183,14 @@ struct ParamArray
const auto& p = params[flag];
return std::any_of(p.cbegin(), p.cend(), [&name](const auto& param)
{
return param.SearchName(name);
return param.HasItem(name);
});
}
bool HasParam(const ParamFlag flag, const std::string& type, const std::string& name)
{
ParamType* t = SearchParam(flag, type);
return t && t->SearchName(name);
return t && t->HasItem(name);
}
std::string AddParam(const ParamFlag flag, const std::string& type, const std::string& name, const std::string& value)
@ -175,7 +199,7 @@ struct ParamArray
if (t)
{
if (!t->SearchName(name)) t->items.emplace_back(name, -1, value);
if (!t->HasItem(name)) t->items.emplace_back(name, -1, value);
}
else
{
@ -192,7 +216,7 @@ struct ParamArray
if (t)
{
if (!t->SearchName(name)) t->items.emplace_back(name, location);
if (!t->HasItem(name)) t->items.emplace_back(name, location);
}
else
{

View File

@ -126,6 +126,8 @@ std::string VertexProgramDecompiler::GetSRC(const u32 n)
break;
case RSX_VP_REGISTER_TYPE_CONSTANT:
m_parr.AddParam(PF_PARAM_UNIFORM, getFloatTypeName(4), std::string("vc[468]"));
properties.has_indexed_constants |= !!d3.index_const;
m_constant_ids.push_back(d1.const_src);
ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]";
break;
@ -391,6 +393,30 @@ std::string VertexProgramDecompiler::BuildCode()
m_parr.AddParam(PF_PARAM_OUT, float4_type, "dst_reg0", float4_type + "(0., 0., 0., 1.)");
}
if (!properties.has_indexed_constants && !m_constant_ids.empty())
{
// Relocate transform constants
std::vector<std::pair<std::string, std::string>> reloc_table;
reloc_table.reserve(m_constant_ids.size());
// First sort the data in ascending order
std::sort(m_constant_ids.begin(), m_constant_ids.end());
// Build the string lookup table
for (const auto& index : m_constant_ids)
{
reloc_table.emplace_back(fmt::format("vc[%d]", index), fmt::format("vc[%llu]", reloc_table.size()));
}
// One-time patch
main_body = fmt::replace_all(main_body, reloc_table);
// Rename the array type
auto type_list = ensure(m_parr.SearchParam(PF_PARAM_CONST, getFloatTypeName(4)));
const auto item = ParamItem(fmt::format("vc[%llu]", m_constant_ids.size()), -1);
type_list->ReplaceOrInsert("vc[468]", item);
}
std::stringstream OS;
insertHeader(OS);

View File

@ -63,6 +63,8 @@ struct VertexProgramDecompiler
const RSXVertexProgram& m_prog;
ParamArray m_parr;
std::vector<u16> m_constant_ids;
static std::string NotZeroPositive(const std::string& code);
std::string GetMask(bool is_sca) const;
std::string GetVecMask();
@ -131,6 +133,7 @@ public:
struct
{
bool has_lit_op = false;
bool has_indexed_constants = false;
}
properties;

View File

@ -9,19 +9,15 @@
#endif
template <typename Traits>
void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32> dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize) const
void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32> dst_buffer, const typename Traits::fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize) const
{
const auto I = m_fragment_shader_cache.find(fragment_program);
if (I == m_fragment_shader_cache.end())
return;
ensure((dst_buffer.size_bytes() >= ::narrow<int>(I->second.FragmentConstantOffsetCache.size()) * 16u));
ensure((dst_buffer.size_bytes() >= ::narrow<int>(fragment_program.FragmentConstantOffsetCache.size()) * 16u));
f32* dst = dst_buffer.data();
alignas(16) f32 tmp[4];
for (usz offset_in_fragment_program : I->second.FragmentConstantOffsetCache)
for (usz offset_in_fragment_program : fragment_program.FragmentConstantOffsetCache)
{
char* data = static_cast<char*>(fragment_program.get_data()) + offset_in_fragment_program;
char* data = static_cast<char*>(rsx_prog.get_data()) + offset_in_fragment_program;
#if defined(ARCH_X64)
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data));

View File

@ -5,6 +5,7 @@
#include "Emu/Cell/timers.hpp"
#include "Common/BufferUtils.h"
#include "Common/buffer_stream.hpp"
#include "Common/texture_cache.h"
#include "Common/surface_store.h"
#include "Common/time.hpp"
@ -805,10 +806,10 @@ namespace rsx
float offset_z = rsx::method_registers.viewport_offset_z();
float one = 1.f;
stream_vector(buffer, std::bit_cast<u32>(scale_x), 0, 0, std::bit_cast<u32>(offset_x));
stream_vector(static_cast<char*>(buffer) + 16, 0, std::bit_cast<u32>(scale_y), 0, std::bit_cast<u32>(offset_y));
stream_vector(static_cast<char*>(buffer) + 32, 0, 0, std::bit_cast<u32>(scale_z), std::bit_cast<u32>(offset_z));
stream_vector(static_cast<char*>(buffer) + 48, 0, 0, 0, std::bit_cast<u32>(one));
utils::stream_vector(buffer, std::bit_cast<u32>(scale_x), 0, 0, std::bit_cast<u32>(offset_x));
utils::stream_vector(static_cast<char*>(buffer) + 16, 0, std::bit_cast<u32>(scale_y), 0, std::bit_cast<u32>(offset_y));
utils::stream_vector(static_cast<char*>(buffer) + 32, 0, 0, std::bit_cast<u32>(scale_z), std::bit_cast<u32>(offset_z));
utils::stream_vector(static_cast<char*>(buffer) + 48, 0, 0, 0, std::bit_cast<u32>(one));
}
void thread::fill_user_clip_data(void *buffer) const
@ -859,9 +860,21 @@ namespace rsx
* Fill buffer with vertex program constants.
* Buffer must be at least 512 float4 wide.
*/
void thread::fill_vertex_program_constants_data(void* buffer)
void thread::fill_vertex_program_constants_data(void* buffer, const std::vector<u16>& reloc_table)
{
memcpy(buffer, rsx::method_registers.transform_constants.data(), 468 * 4 * sizeof(float));
if (!reloc_table.empty()) [[ likely ]]
{
memcpy(buffer, rsx::method_registers.transform_constants.data(), 468 * 4 * sizeof(float));
}
else
{
char* dst = reinterpret_cast<char*>(buffer);
for (const auto& index : reloc_table)
{
utils::stream_vector_from_memory(dst, &rsx::method_registers.transform_constants[index]);
dst += 16;
}
}
}
void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/)
@ -936,8 +949,8 @@ namespace rsx
const f32 alpha_ref = rsx::method_registers.alpha_ref();
u32 *dst = static_cast<u32*>(buffer);
stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control, std::bit_cast<u32>(alpha_ref));
stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
utils::stream_vector(dst, std::bit_cast<u32>(fog0), std::bit_cast<u32>(fog1), rop_control, std::bit_cast<u32>(alpha_ref));
utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast<u32>(wpos_scale), std::bit_cast<u32>(wpos_bias));
}
u64 thread::timestamp()

View File

@ -921,9 +921,9 @@ namespace rsx
/**
* Fill buffer with vertex program constants.
* Buffer must be at least 512 float4 wide.
* Relocation table allows to do a partial fill with only selected registers.
*/
void fill_vertex_program_constants_data(void* buffer);
void fill_vertex_program_constants_data(void* buffer, const std::vector<u16>& reloc_table);
/**
* Fill buffer with fragment rasterization state.

View File

@ -1028,7 +1028,18 @@ void VKGSRender::end()
m_current_frame->flags &= ~frame_context_state::dirty;
}
analyse_current_rsx_pipeline();
if (m_graphics_state & (rsx::pipeline_state::fragment_program_ucode_dirty | rsx::pipeline_state::vertex_program_ucode_dirty))
{
// TODO: Move to shared code
if ((m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty) &&
m_vertex_prog && !m_vertex_prog->has_indexed_constants)
{
m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
}
analyse_current_rsx_pipeline();
}
m_frame_stats.setup_time += m_profiler.duration();
load_texture_env();

View File

@ -1861,13 +1861,15 @@ bool VKGSRender::load_program()
}
const auto shadermode = g_cfg.video.shadermode.get();
m_vertex_prog = nullptr;
m_fragment_prog = nullptr;
if (shadermode != shader_mode::interpreter_only) [[likely]]
{
vk::enter_uninterruptible();
// Load current program from cache
m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties,
std::tie(m_program, m_vertex_prog, m_fragment_prog) = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties,
shadermode != shader_mode::recompiler, true, pipeline_layout);
vk::leave_uninterruptible();
@ -1956,7 +1958,7 @@ void VKGSRender::load_program_env()
auto mem = m_transform_constants_ring_info.alloc<256>(8192);
auto buf = m_transform_constants_ring_info.map(mem, 8192);
fill_vertex_program_constants_data(buf);
fill_vertex_program_constants_data(buf, m_vertex_prog ? m_vertex_prog->constant_ids : std::vector<u16>{});
m_transform_constants_ring_info.unmap();
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem, 8192 };
}
@ -1972,7 +1974,7 @@ void VKGSRender::load_program_env()
auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size);
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), fragment_constants_size },
current_fragment_program, true);
*ensure(m_fragment_prog), current_fragment_program, true);
m_fragment_constants_ring_info.unmap();
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size };

View File

@ -60,8 +60,8 @@ private:
};
private:
VKFragmentProgram m_fragment_prog;
VKVertexProgram m_vertex_prog;
const VKFragmentProgram *m_fragment_prog = nullptr;
const VKVertexProgram *m_vertex_prog = nullptr;
vk::glsl::program *m_program = nullptr;
vk::pipeline_props m_pipeline_properties;

View File

@ -350,6 +350,8 @@ void VKVertexProgram::Decompile(const RSXVertexProgram& prog)
decompiler.Task();
shader.create(::glsl::program_domain::glsl_vertex_program, source);
has_indexed_constants = decompiler.properties.has_indexed_constants;
constant_ids = std::move(decompiler.m_constant_ids);
}
void VKVertexProgram::Compile()

View File

@ -63,6 +63,8 @@ public:
u32 id;
vk::glsl::shader shader;
std::vector<vk::glsl::program_input> uniforms;
std::vector<u16> constant_ids;
bool has_indexed_constants;
void Decompile(const RSXVertexProgram& prog);
void Compile();

View File

@ -450,6 +450,7 @@
<ClInclude Include="Emu\Cell\Modules\cellStorage.h" />
<ClInclude Include="Emu\Cell\Modules\libfs_utility_init.h" />
<ClInclude Include="Emu\Cell\Modules\sys_crashdump.h" />
<ClInclude Include="Emu\CPU\sse2neon.h" />
<ClInclude Include="Emu\Io\camera_config.h" />
<ClInclude Include="Emu\Io\camera_handler_base.h" />
<ClInclude Include="Emu\Io\music_handler_base.h" />
@ -479,6 +480,7 @@
<ClInclude Include="Emu\NP\rpcn_config.h" />
<ClInclude Include="Emu\perf_monitor.hpp" />
<ClInclude Include="Emu\RSX\Common\bitfield.hpp" />
<ClInclude Include="Emu\RSX\Common\buffer_stream.hpp" />
<ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" />
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp" />
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />

View File

@ -2065,6 +2065,12 @@
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
<ClInclude Include="Emu\CPU\sse2neon.h">
<Filter>Emu\CPU</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\buffer_stream.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl">