From 9a2d4fe46b358af369bf58b23d6cda472dea754c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 23 Mar 2022 22:53:18 +0300 Subject: [PATCH] rsx: Relocatable transform constants --- Utilities/StrUtil.h | 27 +++++++++++ rpcs3/Emu/RSX/Common/BufferUtils.cpp | 16 ------- rpcs3/Emu/RSX/Common/BufferUtils.h | 11 ----- rpcs3/Emu/RSX/Common/buffer_stream.hpp | 46 +++++++++++++++++++ rpcs3/Emu/RSX/GL/GLDraw.cpp | 13 +++++- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 9 ++-- rpcs3/Emu/RSX/GL/GLGSRender.h | 4 +- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 2 + rpcs3/Emu/RSX/GL/GLVertexProgram.h | 2 + rpcs3/Emu/RSX/Program/ProgramStateCache.h | 14 ++++-- rpcs3/Emu/RSX/Program/ShaderParam.h | 34 ++++++++++++-- .../RSX/Program/VertexProgramDecompiler.cpp | 26 +++++++++++ .../Emu/RSX/Program/VertexProgramDecompiler.h | 3 ++ .../Emu/RSX/Program/program_state_cache2.hpp | 12 ++--- rpcs3/Emu/RSX/RSXThread.cpp | 29 ++++++++---- rpcs3/Emu/RSX/RSXThread.h | 4 +- rpcs3/Emu/RSX/VK/VKDraw.cpp | 13 +++++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 8 ++-- rpcs3/Emu/RSX/VK/VKGSRender.h | 4 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 2 + rpcs3/Emu/RSX/VK/VKVertexProgram.h | 2 + rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 6 +++ 23 files changed, 222 insertions(+), 67 deletions(-) create mode 100644 rpcs3/Emu/RSX/Common/buffer_stream.hpp diff --git a/Utilities/StrUtil.h b/Utilities/StrUtil.h index 87384bdc87..5d63ebce2c 100644 --- a/Utilities/StrUtil.h +++ b/Utilities/StrUtil.h @@ -87,6 +87,33 @@ namespace fmt return src; } + static inline + std::string replace_all(std::string src, const std::vector>& list) + { + for (usz pos = 0; pos < src.length(); ++pos) + { + for (usz i = 0; i < list.size(); ++i) + { + const usz comp_length = list[i].first.length(); + + if (src.length() - pos < comp_length) + { + continue; + } + + if (src.substr(pos, comp_length) == list[i].first) + { + src.erase(pos, comp_length); + src.insert(pos, list[i].second); + pos += list[i].second.length() - 1; + break; + } + } + } + + return src; + } + std::vector split(std::string_view source, std::initializer_list separators, bool is_skip_empty = true); std::string trim(const std::string& source, const std::string& values = " \t"); diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index a219530560..094e86a060 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -869,19 +869,3 @@ std::tuple write_index_array_data_to_buffer(std::span fmt::throw_exception("Unreachable"); } } - -void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) -{ - const __m128i vector = _mm_set_epi32(w, z, y, x); - _mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector); -} - -void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w) -{ - stream_vector(dst, std::bit_cast(x), std::bit_cast(y), std::bit_cast(z), std::bit_cast(w)); -} -void stream_vector_from_memory(void *dst, void *src) -{ - const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(src)); - _mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector); -} diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index 4ab11cc363..f2e87445db 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -38,17 +38,6 @@ std::tuple write_index_array_data_to_buffer(std::span */ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, rsx::primitive_type draw_mode, unsigned count); -/** - * Stream a 128 bits vector to dst. - */ -void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w); -void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w); - -/** - * Stream a 128 bits vector from src to dst. - */ -void stream_vector_from_memory(void *dst, void *src); - // Copy and swap data in 32-bit units extern void(*const copy_data_swap_u32)(u32*, const u32*, u32); diff --git a/rpcs3/Emu/RSX/Common/buffer_stream.hpp b/rpcs3/Emu/RSX/Common/buffer_stream.hpp new file mode 100644 index 0000000000..ea72432196 --- /dev/null +++ b/rpcs3/Emu/RSX/Common/buffer_stream.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include "util/types.hpp" +#include "util/asm.hpp" + +#if defined(ARCH_X64) +#include "emmintrin.h" +#include "immintrin.h" +#endif + +#ifdef ARCH_ARM64 +#if !defined(_MSC_VER) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif +#undef FORCE_INLINE +#include "Emu/CPU/sse2neon.h" +#endif + +namespace utils +{ + /** + * Stream a 128 bits vector to dst. + */ + static inline + void stream_vector(void* dst, u32 x, u32 y, u32 z, u32 w) + { + const __m128i vector = _mm_set_epi32(w, z, y, x); + _mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector); + } + + static inline + void stream_vector(void* dst, f32 x, f32 y, f32 z, f32 w) + { + stream_vector(dst, std::bit_cast(x), std::bit_cast(y), std::bit_cast(z), std::bit_cast(w)); + } + + /** + * Stream a 128 bits vector from src to dst. + */ + static inline + void stream_vector_from_memory(void* dst, void* src) + { + const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(src)); + _mm_stream_si128(reinterpret_cast<__m128i*>(dst), vector); + } +} diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 0712813a5e..c3fb54c352 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -619,7 +619,18 @@ void GLGSRender::end() return; } - analyse_current_rsx_pipeline(); + if (m_graphics_state & (rsx::pipeline_state::fragment_program_ucode_dirty | rsx::pipeline_state::vertex_program_ucode_dirty)) + { + // TODO: Move to shared code + if ((m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty) && + m_vertex_prog && !m_vertex_prog->has_indexed_constants) + { + m_graphics_state |= rsx::pipeline_state::transform_constants_dirty; + } + + analyse_current_rsx_pipeline(); + } + m_frame_stats.setup_time += m_profiler.duration(); // Active texture environment is used to decode shaders diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index ec9dae8197..e7d4660e55 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -693,10 +693,13 @@ bool GLGSRender::load_program() } const bool was_interpreter = m_shader_interpreter.is_interpreter(m_program); + m_vertex_prog = nullptr; + m_fragment_prog = nullptr; + if (shadermode != shader_mode::interpreter_only) [[likely]] { void* pipeline_properties = nullptr; - m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties, + std::tie(m_program, m_vertex_prog, m_fragment_prog) = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties, shadermode != shader_mode::recompiler, true); if (m_prog_buffer.check_cache_missed()) @@ -799,7 +802,7 @@ void GLGSRender::load_program_env() // Vertex constants auto mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); auto buf = static_cast(mapping.first); - fill_vertex_program_constants_data(buf); + fill_vertex_program_constants_data(buf, m_vertex_prog ? m_vertex_prog->constant_ids : std::vector{}); m_transform_constants_buffer->bind_range(GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, 8192); } @@ -811,7 +814,7 @@ void GLGSRender::load_program_env() auto buf = static_cast(mapping.first); m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), fragment_constants_size }, - current_fragment_program, true); + *ensure(m_fragment_prog), current_fragment_program, true); m_fragment_constants_buffer->bind_range(GL_FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, fragment_constants_size); } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index c3d3b5565c..10cd2bb396 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -69,14 +69,14 @@ namespace gl class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control { private: - GLFragmentProgram m_fragment_prog; - GLVertexProgram m_vertex_prog; gl::sampler_state m_fs_sampler_states[rsx::limits::fragment_textures_count]; // Fragment textures gl::sampler_state m_fs_sampler_mirror_states[rsx::limits::fragment_textures_count]; // Alternate views of fragment textures with different format (e.g Depth vs Stencil for D24S8) gl::sampler_state m_vs_sampler_states[rsx::limits::vertex_textures_count]; // Vertex textures gl::glsl::program *m_program = nullptr; + const GLFragmentProgram *m_fragment_prog = nullptr; + const GLVertexProgram *m_vertex_prog = nullptr; u32 m_interpreter_state = 0; gl::shader_interpreter m_shader_interpreter; diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index ee6a4e6660..df895a25e6 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -274,6 +274,8 @@ void GLVertexProgram::Decompile(const RSXVertexProgram& prog) shader.create(::glsl::program_domain::glsl_vertex_program, source); id = shader.id(); + has_indexed_constants = decompiler.properties.has_indexed_constants; + constant_ids = std::move(decompiler.m_constant_ids); } void GLVertexProgram::Delete() diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.h b/rpcs3/Emu/RSX/GL/GLVertexProgram.h index 4f61930e46..1bc0b6262b 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.h +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.h @@ -58,6 +58,8 @@ public: ParamArray parr; u32 id; gl::glsl::shader shader; + std::vector constant_ids; + bool has_indexed_constants; void Decompile(const RSXVertexProgram& prog); diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.h b/rpcs3/Emu/RSX/Program/ProgramStateCache.h index eacc85fa0b..664c37b000 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.h @@ -109,6 +109,8 @@ class program_state_cache using binary_to_vertex_program = std::unordered_map ; using binary_to_fragment_program = std::unordered_map; + using pipeline_data_type = std::tuple; + struct pipeline_key { u32 vertex_program_id; @@ -305,7 +307,7 @@ public: {} template - pipeline_type* get_graphics_pipeline( + pipeline_data_type get_graphics_pipeline( const RSXVertexProgram& vertexShader, const RSXFragmentProgram& fragmentShader, pipeline_properties& pipelineProperties, @@ -334,7 +336,7 @@ public: if (const auto I = m_storage.find(key); I != m_storage.end()) { m_cache_miss_flag = (I->second == __null_pipeline_handle); - return I->second.get(); + return { I->second.get(), &vertex_program, &fragment_program }; } } @@ -345,7 +347,7 @@ public: if (const auto I = m_storage.find(key); I != m_storage.end()) { m_cache_miss_flag = (I->second == __null_pipeline_handle); - return I->second.get(); + return { I->second.get(), &vertex_program, &fragment_program }; } // Insert a placeholder if the key still doesn't exist to avoid re-linking of the same pipeline @@ -391,16 +393,18 @@ public: }; } - return backend_traits::build_pipeline( + auto result = backend_traits::build_pipeline( vertex_program, // VS, must already be decompiled and recompiled above fragment_program, // FS, must already be decompiled and recompiled above pipelineProperties, // Pipeline state compile_async, // Allow asynchronous compilation callback, // Insertion and notification callback std::forward(args)...); // Other arguments + + return { result, &vertex_program, &fragment_program }; } - void fill_fragment_constants_buffer(std::span dst_buffer, const RSXFragmentProgram& fragment_program, bool sanitize = false) const; + void fill_fragment_constants_buffer(std::span dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const; void clear() { diff --git a/rpcs3/Emu/RSX/Program/ShaderParam.h b/rpcs3/Emu/RSX/Program/ShaderParam.h index 1b870037bf..8eb51611ca 100644 --- a/rpcs3/Emu/RSX/Program/ShaderParam.h +++ b/rpcs3/Emu/RSX/Program/ShaderParam.h @@ -130,13 +130,37 @@ struct ParamType { } - bool SearchName(const std::string& name) const + bool HasItem(const std::string& name) const { return std::any_of(items.cbegin(), items.cend(), [&name](const auto& item) { return item.name == name; }); } + + bool ReplaceOrInsert(const std::string& name, const ParamItem& item) + { + if (HasItem(name)) + { + std::vector new_list; + for (const auto& it : items) + { + if (it.name != item.name) + { + new_list.emplace_back(it.name, it.location, it.value); + } + else + { + new_list.emplace_back(item.name, item.location, item.value); + } + } + + std::swap(items, new_list); + } + + items.push_back(item); + return false; + } }; struct ParamArray @@ -159,14 +183,14 @@ struct ParamArray const auto& p = params[flag]; return std::any_of(p.cbegin(), p.cend(), [&name](const auto& param) { - return param.SearchName(name); + return param.HasItem(name); }); } bool HasParam(const ParamFlag flag, const std::string& type, const std::string& name) { ParamType* t = SearchParam(flag, type); - return t && t->SearchName(name); + return t && t->HasItem(name); } std::string AddParam(const ParamFlag flag, const std::string& type, const std::string& name, const std::string& value) @@ -175,7 +199,7 @@ struct ParamArray if (t) { - if (!t->SearchName(name)) t->items.emplace_back(name, -1, value); + if (!t->HasItem(name)) t->items.emplace_back(name, -1, value); } else { @@ -192,7 +216,7 @@ struct ParamArray if (t) { - if (!t->SearchName(name)) t->items.emplace_back(name, location); + if (!t->HasItem(name)) t->items.emplace_back(name, location); } else { diff --git a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp index 6085f48137..459aafe132 100644 --- a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.cpp @@ -126,6 +126,8 @@ std::string VertexProgramDecompiler::GetSRC(const u32 n) break; case RSX_VP_REGISTER_TYPE_CONSTANT: m_parr.AddParam(PF_PARAM_UNIFORM, getFloatTypeName(4), std::string("vc[468]")); + properties.has_indexed_constants |= !!d3.index_const; + m_constant_ids.push_back(d1.const_src); ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]"; break; @@ -391,6 +393,30 @@ std::string VertexProgramDecompiler::BuildCode() m_parr.AddParam(PF_PARAM_OUT, float4_type, "dst_reg0", float4_type + "(0., 0., 0., 1.)"); } + if (!properties.has_indexed_constants && !m_constant_ids.empty()) + { + // Relocate transform constants + std::vector> reloc_table; + reloc_table.reserve(m_constant_ids.size()); + + // First sort the data in ascending order + std::sort(m_constant_ids.begin(), m_constant_ids.end()); + + // Build the string lookup table + for (const auto& index : m_constant_ids) + { + reloc_table.emplace_back(fmt::format("vc[%d]", index), fmt::format("vc[%llu]", reloc_table.size())); + } + + // One-time patch + main_body = fmt::replace_all(main_body, reloc_table); + + // Rename the array type + auto type_list = ensure(m_parr.SearchParam(PF_PARAM_CONST, getFloatTypeName(4))); + const auto item = ParamItem(fmt::format("vc[%llu]", m_constant_ids.size()), -1); + type_list->ReplaceOrInsert("vc[468]", item); + } + std::stringstream OS; insertHeader(OS); diff --git a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h index 35a3c69f24..7d74d58973 100644 --- a/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/VertexProgramDecompiler.h @@ -63,6 +63,8 @@ struct VertexProgramDecompiler const RSXVertexProgram& m_prog; ParamArray m_parr; + std::vector m_constant_ids; + static std::string NotZeroPositive(const std::string& code); std::string GetMask(bool is_sca) const; std::string GetVecMask(); @@ -131,6 +133,7 @@ public: struct { bool has_lit_op = false; + bool has_indexed_constants = false; } properties; diff --git a/rpcs3/Emu/RSX/Program/program_state_cache2.hpp b/rpcs3/Emu/RSX/Program/program_state_cache2.hpp index ada78f4931..4d30cfa897 100644 --- a/rpcs3/Emu/RSX/Program/program_state_cache2.hpp +++ b/rpcs3/Emu/RSX/Program/program_state_cache2.hpp @@ -9,19 +9,15 @@ #endif template -void program_state_cache::fill_fragment_constants_buffer(std::span dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize) const +void program_state_cache::fill_fragment_constants_buffer(std::span dst_buffer, const typename Traits::fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize) const { - const auto I = m_fragment_shader_cache.find(fragment_program); - if (I == m_fragment_shader_cache.end()) - return; - - ensure((dst_buffer.size_bytes() >= ::narrow(I->second.FragmentConstantOffsetCache.size()) * 16u)); + ensure((dst_buffer.size_bytes() >= ::narrow(fragment_program.FragmentConstantOffsetCache.size()) * 16u)); f32* dst = dst_buffer.data(); alignas(16) f32 tmp[4]; - for (usz offset_in_fragment_program : I->second.FragmentConstantOffsetCache) + for (usz offset_in_fragment_program : fragment_program.FragmentConstantOffsetCache) { - char* data = static_cast(fragment_program.get_data()) + offset_in_fragment_program; + char* data = static_cast(rsx_prog.get_data()) + offset_in_fragment_program; #if defined(ARCH_X64) const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data)); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 0fe5a691c1..835cf35c2a 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -5,6 +5,7 @@ #include "Emu/Cell/timers.hpp" #include "Common/BufferUtils.h" +#include "Common/buffer_stream.hpp" #include "Common/texture_cache.h" #include "Common/surface_store.h" #include "Common/time.hpp" @@ -805,10 +806,10 @@ namespace rsx float offset_z = rsx::method_registers.viewport_offset_z(); float one = 1.f; - stream_vector(buffer, std::bit_cast(scale_x), 0, 0, std::bit_cast(offset_x)); - stream_vector(static_cast(buffer) + 16, 0, std::bit_cast(scale_y), 0, std::bit_cast(offset_y)); - stream_vector(static_cast(buffer) + 32, 0, 0, std::bit_cast(scale_z), std::bit_cast(offset_z)); - stream_vector(static_cast(buffer) + 48, 0, 0, 0, std::bit_cast(one)); + utils::stream_vector(buffer, std::bit_cast(scale_x), 0, 0, std::bit_cast(offset_x)); + utils::stream_vector(static_cast(buffer) + 16, 0, std::bit_cast(scale_y), 0, std::bit_cast(offset_y)); + utils::stream_vector(static_cast(buffer) + 32, 0, 0, std::bit_cast(scale_z), std::bit_cast(offset_z)); + utils::stream_vector(static_cast(buffer) + 48, 0, 0, 0, std::bit_cast(one)); } void thread::fill_user_clip_data(void *buffer) const @@ -859,9 +860,21 @@ namespace rsx * Fill buffer with vertex program constants. * Buffer must be at least 512 float4 wide. */ - void thread::fill_vertex_program_constants_data(void* buffer) + void thread::fill_vertex_program_constants_data(void* buffer, const std::vector& reloc_table) { - memcpy(buffer, rsx::method_registers.transform_constants.data(), 468 * 4 * sizeof(float)); + if (!reloc_table.empty()) [[ likely ]] + { + memcpy(buffer, rsx::method_registers.transform_constants.data(), 468 * 4 * sizeof(float)); + } + else + { + char* dst = reinterpret_cast(buffer); + for (const auto& index : reloc_table) + { + utils::stream_vector_from_memory(dst, &rsx::method_registers.transform_constants[index]); + dst += 16; + } + } } void thread::fill_fragment_state_buffer(void* buffer, const RSXFragmentProgram& /*fragment_program*/) @@ -936,8 +949,8 @@ namespace rsx const f32 alpha_ref = rsx::method_registers.alpha_ref(); u32 *dst = static_cast(buffer); - stream_vector(dst, std::bit_cast(fog0), std::bit_cast(fog1), rop_control, std::bit_cast(alpha_ref)); - stream_vector(dst + 4, 0u, fog_mode, std::bit_cast(wpos_scale), std::bit_cast(wpos_bias)); + utils::stream_vector(dst, std::bit_cast(fog0), std::bit_cast(fog1), rop_control, std::bit_cast(alpha_ref)); + utils::stream_vector(dst + 4, 0u, fog_mode, std::bit_cast(wpos_scale), std::bit_cast(wpos_bias)); } u64 thread::timestamp() diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 2e72650bea..0af73ee251 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -921,9 +921,9 @@ namespace rsx /** * Fill buffer with vertex program constants. - * Buffer must be at least 512 float4 wide. + * Relocation table allows to do a partial fill with only selected registers. */ - void fill_vertex_program_constants_data(void* buffer); + void fill_vertex_program_constants_data(void* buffer, const std::vector& reloc_table); /** * Fill buffer with fragment rasterization state. diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 99a546b0e5..8a40241063 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -1028,7 +1028,18 @@ void VKGSRender::end() m_current_frame->flags &= ~frame_context_state::dirty; } - analyse_current_rsx_pipeline(); + if (m_graphics_state & (rsx::pipeline_state::fragment_program_ucode_dirty | rsx::pipeline_state::vertex_program_ucode_dirty)) + { + // TODO: Move to shared code + if ((m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty) && + m_vertex_prog && !m_vertex_prog->has_indexed_constants) + { + m_graphics_state |= rsx::pipeline_state::transform_constants_dirty; + } + + analyse_current_rsx_pipeline(); + } + m_frame_stats.setup_time += m_profiler.duration(); load_texture_env(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index a358951448..b810ec1555 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1861,13 +1861,15 @@ bool VKGSRender::load_program() } const auto shadermode = g_cfg.video.shadermode.get(); + m_vertex_prog = nullptr; + m_fragment_prog = nullptr; if (shadermode != shader_mode::interpreter_only) [[likely]] { vk::enter_uninterruptible(); // Load current program from cache - m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties, + std::tie(m_program, m_vertex_prog, m_fragment_prog) = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties, shadermode != shader_mode::recompiler, true, pipeline_layout); vk::leave_uninterruptible(); @@ -1956,7 +1958,7 @@ void VKGSRender::load_program_env() auto mem = m_transform_constants_ring_info.alloc<256>(8192); auto buf = m_transform_constants_ring_info.map(mem, 8192); - fill_vertex_program_constants_data(buf); + fill_vertex_program_constants_data(buf, m_vertex_prog ? m_vertex_prog->constant_ids : std::vector{}); m_transform_constants_ring_info.unmap(); m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem, 8192 }; } @@ -1972,7 +1974,7 @@ void VKGSRender::load_program_env() auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size); m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), fragment_constants_size }, - current_fragment_program, true); + *ensure(m_fragment_prog), current_fragment_program, true); m_fragment_constants_ring_info.unmap(); m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index d56fe9c098..117516dd23 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -60,8 +60,8 @@ private: }; private: - VKFragmentProgram m_fragment_prog; - VKVertexProgram m_vertex_prog; + const VKFragmentProgram *m_fragment_prog = nullptr; + const VKVertexProgram *m_vertex_prog = nullptr; vk::glsl::program *m_program = nullptr; vk::pipeline_props m_pipeline_properties; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index b49655ad31..d031528762 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -350,6 +350,8 @@ void VKVertexProgram::Decompile(const RSXVertexProgram& prog) decompiler.Task(); shader.create(::glsl::program_domain::glsl_vertex_program, source); + has_indexed_constants = decompiler.properties.has_indexed_constants; + constant_ids = std::move(decompiler.m_constant_ids); } void VKVertexProgram::Compile() diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index 5170f7abc7..eeef80421d 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -63,6 +63,8 @@ public: u32 id; vk::glsl::shader shader; std::vector uniforms; + std::vector constant_ids; + bool has_indexed_constants; void Decompile(const RSXVertexProgram& prog); void Compile(); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index d81306c4f1..e66e1b7849 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -450,6 +450,7 @@ + @@ -479,6 +480,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index e7c753b424..5d3320c644 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2065,6 +2065,12 @@ Emu\GPU\RSX\Common + + Emu\CPU + + + Emu\GPU\RSX\Common +