From 2a62fa892bfb2976dccc17231ad3af6c4b292768 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 29 Dec 2018 16:28:12 +0300 Subject: [PATCH] rsx: Texture cache refactor - gl: Include an execution state wrapper to ensure state changes are consistent. Also removes a lot of required 'cleanup' for helper methods - texture_cache: Make execition context a mandatory field as it is required for all operations. Also removes a lot of situations where duplicate argument is added in for both fixed and vararg fields - Explicit read/write barrier for framebuffer resources depending on usage. Allows for operations like optional memory initialization before reading --- rpcs3/Emu/RSX/Common/texture_cache.h | 62 +++---- rpcs3/Emu/RSX/GL/GLExecutionState.h | 246 +++++++++++++++++++++++++++ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 31 ++-- rpcs3/Emu/RSX/GL/GLGSRender.h | 2 +- rpcs3/Emu/RSX/GL/GLHelpers.cpp | 62 +++++-- rpcs3/Emu/RSX/GL/GLHelpers.h | 22 ++- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 56 +++--- rpcs3/Emu/RSX/GL/GLRenderTargets.h | 4 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 40 ++--- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 28 +-- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 4 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 13 +- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 33 +++- rpcs3/Emu/RSX/VK/VKTexture.cpp | 28 +-- rpcs3/Emu/RSX/VK/VKTextureCache.h | 12 +- rpcs3/GLGSRender.vcxproj | 1 + rpcs3/GLGSRender.vcxproj.filters | 1 + 17 files changed, 494 insertions(+), 151 deletions(-) create mode 100644 rpcs3/Emu/RSX/GL/GLExecutionState.h diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 8151a8f408..dda02ebfa1 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -389,7 +389,7 @@ namespace rsx */ private: template - void flush_set(thrashed_set& data, Args&&... extras) + void flush_set(commandbuffer_type& cmd, thrashed_set& data, Args&&... extras) { AUDIT(!data.flushed); @@ -411,11 +411,11 @@ namespace rsx const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp()) { - surface->copy_texture(true, std::forward(extras)...); + surface->copy_texture(cmd, true, std::forward(extras)...); } } - surface->flush(std::forward(extras)...); + surface->flush(cmd, std::forward(extras)...); // Exclude this region when flushing other sections that should not trample it // If we overlap an excluded RO, set it as dirty @@ -676,7 +676,7 @@ namespace rsx //Invalidate range base implementation template - thrashed_set invalidate_range_impl_base(const address_range &fault_range_in, invalidation_cause cause, Args&&... extras) + thrashed_set invalidate_range_impl_base(commandbuffer_type& cmd, const address_range &fault_range_in, invalidation_cause cause, Args&&... extras) { #ifdef TEXTURE_CACHE_DEBUG // Check that the cache has the correct protections @@ -840,7 +840,7 @@ namespace rsx // or there is nothing to flush but we have something to unprotect if (has_flushables && !cause.skip_flush()) { - flush_set(result, std::forward(extras)...); + flush_set(cmd, result, std::forward(extras)...); } unprotect_set(result); @@ -1113,7 +1113,7 @@ namespace rsx } template - void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple&& flush_extras, Args&&... extras) + void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple&& flush_extras, Args&&... extras) { AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled @@ -1134,7 +1134,7 @@ namespace rsx { // Invalidate sections from surface cache occupying same address range std::apply(&texture_cache::invalidate_range_impl_base, std::tuple_cat( - std::make_tuple(this, rsx_range, invalidation_cause::superseded_by_fbo), + std::forward_as_tuple(this, cmd, rsx_range, invalidation_cause::superseded_by_fbo), std::forward >(flush_extras) )); } @@ -1261,7 +1261,7 @@ namespace rsx } template - thrashed_set invalidate_address(u32 address, invalidation_cause cause, Args&&... extras) + thrashed_set invalidate_address(commandbuffer_type& cmd, u32 address, invalidation_cause cause, Args&&... extras) { //Test before trying to acquire the lock const auto range = page_for(address); @@ -1269,22 +1269,22 @@ namespace rsx return{}; std::lock_guard lock(m_cache_mutex); - return invalidate_range_impl_base(range, cause, std::forward(extras)...); + return invalidate_range_impl_base(cmd, range, cause, std::forward(extras)...); } template - thrashed_set invalidate_range(const address_range &range, invalidation_cause cause, Args&&... extras) + thrashed_set invalidate_range(commandbuffer_type& cmd, const address_range &range, invalidation_cause cause, Args&&... extras) { //Test before trying to acquire the lock if (!region_intersects_cache(range, !cause.is_read())) return {}; std::lock_guard lock(m_cache_mutex); - return invalidate_range_impl_base(range, cause, std::forward(extras)...); + return invalidate_range_impl_base(cmd, range, cause, std::forward(extras)...); } template - bool flush_all(thrashed_set& data, Args&&... extras) + bool flush_all(commandbuffer_type& cmd, thrashed_set& data, Args&&... extras) { std::lock_guard lock(m_cache_mutex); @@ -1294,7 +1294,7 @@ namespace rsx if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag) { //1. Write memory to cpu side - flush_set(data, std::forward(extras)...); + flush_set(cmd, data, std::forward(extras)...); //2. Release all obsolete sections unprotect_set(data); @@ -1302,14 +1302,14 @@ namespace rsx else { // The cache contents have changed between the two readings. This means the data held is useless - invalidate_range_impl_base(data.fault_range, data.cause.undefer(), std::forward(extras)...); + invalidate_range_impl_base(cmd, data.fault_range, data.cause.undefer(), std::forward(extras)...); } return true; } template - bool flush_if_cache_miss_likely(const address_range &range, Args&&... extras) + bool flush_if_cache_miss_likely(commandbuffer_type& cmd, const address_range &range, Args&&... extras) { u32 cur_flushes_this_frame = (m_flushes_this_frame + m_speculations_this_frame); @@ -1340,7 +1340,7 @@ namespace rsx lock.upgrade(); - region.copy_texture(false, std::forward(extras)...); + region.copy_texture(cmd, false, std::forward(extras)...); result = true; cur_flushes_this_frame++; @@ -1466,7 +1466,7 @@ namespace rsx { for (auto §ion : overlapping) { - section.surface->memory_barrier(cmd); + section.surface->read_barrier(cmd); surfaces.push_back ({ @@ -1504,7 +1504,7 @@ namespace rsx u32 internal_height = tex_height; get_native_dimensions(internal_width, internal_height, texptr); - texptr->memory_barrier(cmd); + texptr->read_barrier(cmd); if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d && extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d) @@ -1597,7 +1597,7 @@ namespace rsx for (auto §ion : overlapping) { - section.surface->memory_barrier(cmd); + section.surface->read_barrier(cmd); result.external_subresource_desc.sections_to_copy.push_back ({ @@ -1713,7 +1713,7 @@ namespace rsx else { m_rtts.invalidate_surface_address(texaddr, false); - invalidate_address(texaddr, invalidation_cause::read, std::forward(extras)...); + invalidate_address(cmd, texaddr, invalidation_cause::read, std::forward(extras)...); } } @@ -1729,7 +1729,7 @@ namespace rsx else { m_rtts.invalidate_surface_address(texaddr, true); - invalidate_address(texaddr, invalidation_cause::read, std::forward(extras)...); + invalidate_address(cmd, texaddr, invalidation_cause::read, std::forward(extras)...); } } } @@ -1751,7 +1751,7 @@ namespace rsx if (!rsc.surface->test() && !m_rtts.address_is_bound(rsc.base_address, rsc.is_depth_surface)) { m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface); - invalidate_address(rsc.base_address, invalidation_cause::read, std::forward(extras)...); + invalidate_address(cmd, rsc.base_address, invalidation_cause::read, std::forward(extras)...); } else if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d && extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d) @@ -1868,7 +1868,7 @@ namespace rsx lock.upgrade(); //Invalidate - invalidate_range_impl_base(tex_range, invalidation_cause::read, std::forward(extras)...); + invalidate_range_impl_base(cmd, tex_range, invalidation_cause::read, std::forward(extras)...); //NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB return{ upload_image_from_cpu(cmd, tex_range, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, @@ -1932,14 +1932,14 @@ namespace rsx if (src_is_render_target && !src_subres.surface->test() && !m_rtts.address_is_bound(src_subres.base_address, src_subres.is_depth_surface)) { m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface); - invalidate_address(src_subres.base_address, invalidation_cause::read, std::forward(extras)...); + invalidate_address(cmd, src_subres.base_address, invalidation_cause::read, std::forward(extras)...); src_is_render_target = false; } if (dst_is_render_target && !dst_subres.surface->test() && !m_rtts.address_is_bound(dst_subres.base_address, dst_subres.is_depth_surface)) { m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface); - invalidate_address(dst_subres.base_address, invalidation_cause::read, std::forward(extras)...); + invalidate_address(cmd, dst_subres.base_address, invalidation_cause::read, std::forward(extras)...); dst_is_render_target = false; } @@ -2005,8 +2005,8 @@ namespace rsx const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height; lock.upgrade(); - invalidate_range_impl_base(address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward(extras)...); - invalidate_range_impl_base(address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward(extras)...); + invalidate_range_impl_base(cmd, address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward(extras)...); + invalidate_range_impl_base(cmd, address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward(extras)...); memcpy(dst.pixels, src.pixels, memcpy_bytes_length); return true; } @@ -2140,7 +2140,7 @@ namespace rsx lock.upgrade(); const auto rsx_range = address_range::start_length(src_address, src.pitch * src.slice_h); - invalidate_range_impl_base(rsx_range, invalidation_cause::read, std::forward(extras)...); + invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::read, std::forward(extras)...); const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1; std::vector subresource_layout; @@ -2228,7 +2228,7 @@ namespace rsx lock.upgrade(); // Invalidate as the memory is not reusable now - invalidate_range_impl_base(cached_dest->get_section_range(), invalidation_cause::write, std::forward(extras)...); + invalidate_range_impl_base(cmd, cached_dest->get_section_range(), invalidation_cause::write, std::forward(extras)...); AUDIT(!cached_dest->is_locked()); dest_texture = 0; @@ -2282,7 +2282,7 @@ namespace rsx lock.upgrade(); const auto rsx_range = address_range::start_length(dst.rsx_address, section_length); - invalidate_range_impl_base(rsx_range, invalidation_cause::write, std::forward(extras)...); + invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward(extras)...); const u16 pitch_in_block = dst_is_argb8 ? dst.pitch >> 2 : dst.pitch >> 1; std::vector subresource_layout; @@ -2372,7 +2372,7 @@ namespace rsx } typeless_info.analyse(); - blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info); + blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info); notify_surface_changed(dst.rsx_address); blit_op_result result = true; diff --git a/rpcs3/Emu/RSX/GL/GLExecutionState.h b/rpcs3/Emu/RSX/GL/GLExecutionState.h new file mode 100644 index 0000000000..03249d67d0 --- /dev/null +++ b/rpcs3/Emu/RSX/GL/GLExecutionState.h @@ -0,0 +1,246 @@ +#pragma once +#include "Utilities/types.h" +#include "Utilities/geometry.h" +#include "OpenGL.h" + +#include + +namespace gl +{ + struct driver_state + { + const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; + const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002; + const u32 DEPTH_RANGE_MIN = 0xFFFF0003; + const u32 DEPTH_RANGE_MAX = 0xFFFF0004; + + std::unordered_map properties = {}; + std::unordered_map> indexed_properties = {}; + + bool enable(u32 test, GLenum cap) + { + auto found = properties.find(cap); + if (found != properties.end() && found->second == test) + return !!test; + + properties[cap] = test; + + if (test) + glEnable(cap); + else + glDisable(cap); + + return !!test; + } + + bool enablei(u32 test, GLenum cap, u32 index) + { + auto found = indexed_properties.find(cap); + const bool exists = found != indexed_properties.end(); + + if (!exists) + { + indexed_properties[cap] = {}; + indexed_properties[cap][index] = test; + } + else + { + if (found->second[index] == test) + return !!test; + + found->second[index] = test; + } + + if (test) + glEnablei(cap, index); + else + glDisablei(cap, index); + + return !!test; + } + + inline bool test_property(GLenum property, u32 test) const + { + auto found = properties.find(property); + if (found == properties.end()) + return false; + + return (found->second == test); + } + + void depth_func(GLenum func) + { + if (!test_property(GL_DEPTH_FUNC, func)) + { + glDepthFunc(func); + properties[GL_DEPTH_FUNC] = func; + } + } + + void depth_mask(GLboolean mask) + { + if (!test_property(GL_DEPTH_WRITEMASK, mask)) + { + glDepthMask(mask); + properties[GL_DEPTH_WRITEMASK] = mask; + } + } + + void clear_depth(GLfloat depth) + { + u32 value = (u32&)depth; + if (!test_property(GL_DEPTH_CLEAR_VALUE, value)) + { + glClearDepth(depth); + properties[GL_DEPTH_CLEAR_VALUE] = value; + } + } + + void stencil_mask(GLuint mask) + { + if (!test_property(GL_STENCIL_WRITEMASK, mask)) + { + glStencilMask(mask); + properties[GL_STENCIL_WRITEMASK] = mask; + } + } + + void clear_stencil(GLint stencil) + { + u32 value = (u32&)stencil; + if (!test_property(GL_STENCIL_CLEAR_VALUE, value)) + { + glClearStencil(stencil); + properties[GL_STENCIL_CLEAR_VALUE] = value; + } + } + + void color_mask(u32 mask) + { + if (!test_property(GL_COLOR_WRITEMASK, mask)) + { + glColorMask(((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0)); + properties[GL_COLOR_WRITEMASK] = mask; + } + } + + void color_mask(bool r, bool g, bool b, bool a) + { + u32 mask = 0; + if (r) mask |= 0x10; + if (g) mask |= 0x20; + if (b) mask |= 0x40; + if (a) mask |= 0x80; + + color_mask(mask); + } + + void clear_color(u8 r, u8 g, u8 b, u8 a) + { + u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24; + if (!test_property(GL_COLOR_CLEAR_VALUE, value)) + { + glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f); + properties[GL_COLOR_CLEAR_VALUE] = value; + } + } + + void clear_color(const color4f& color) + { + clear_color(u8(color.r * 255), u8(color.g * 255), u8(color.b * 255), u8(color.a * 255)); + } + + void depth_bounds(float min, float max) + { + u32 depth_min = (u32&)min; + u32 depth_max = (u32&)max; + + if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max)) + { + glDepthBoundsEXT(min, max); + + properties[DEPTH_BOUNDS_MIN] = depth_min; + properties[DEPTH_BOUNDS_MAX] = depth_max; + } + } + + void depth_range(float min, float max) + { + u32 depth_min = (u32&)min; + u32 depth_max = (u32&)max; + + if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max)) + { + glDepthRange(min, max); + + properties[DEPTH_RANGE_MIN] = depth_min; + properties[DEPTH_RANGE_MAX] = depth_max; + } + } + + void logic_op(GLenum op) + { + if (!test_property(GL_COLOR_LOGIC_OP, op)) + { + glLogicOp(op); + properties[GL_COLOR_LOGIC_OP] = op; + } + } + + void line_width(GLfloat width) + { + u32 value = (u32&)width; + + if (!test_property(GL_LINE_WIDTH, value)) + { + glLineWidth(width); + properties[GL_LINE_WIDTH] = value; + } + } + + void front_face(GLenum face) + { + if (!test_property(GL_FRONT_FACE, face)) + { + glFrontFace(face); + properties[GL_FRONT_FACE] = face; + } + } + + void cull_face(GLenum mode) + { + if (!test_property(GL_CULL_FACE_MODE, mode)) + { + glCullFace(mode); + properties[GL_CULL_FACE_MODE] = mode; + } + } + + void polygon_offset(float factor, float units) + { + u32 _units = (u32&)units; + u32 _factor = (u32&)factor; + + if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor)) + { + glPolygonOffset(factor, units); + + properties[GL_POLYGON_OFFSET_UNITS] = _units; + properties[GL_POLYGON_OFFSET_FACTOR] = _factor; + } + } + }; + + struct command_context + { + driver_state* drv; + + command_context() + : drv(nullptr) + {} + + command_context(driver_state& drv_) + : drv(&drv_) + {} + }; +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c7565d4c48..2b2c4e8718 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -210,6 +210,7 @@ void GLGSRender::end() } }; + gl::command_context cmd{ gl_state }; gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); // Handle special memory barrier for ARGB8->D24S8 in an active DSV @@ -227,7 +228,6 @@ void GLGSRender::end() std::chrono::time_point textures_start = steady_clock::now(); std::lock_guard lock(m_sampler_mutex); - void* unused = nullptr; bool update_framebuffer_sourced = false; if (surface_store_tag != m_rtts.cache_tag) @@ -248,7 +248,7 @@ void GLGSRender::end() if (rsx::method_registers.fragment_textures[i].enabled()) { - *sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.fragment_textures[i], m_rtts); + *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.fragment_textures[i], m_rtts); if (m_textures_dirty[i]) m_fs_sampler_states[i].apply(rsx::method_registers.fragment_textures[i], fs_sampler_state[i].get()); @@ -274,7 +274,7 @@ void GLGSRender::end() if (rsx::method_registers.vertex_textures[i].enabled()) { - *sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.vertex_textures[i], m_rtts); + *sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts); if (m_vertex_textures_dirty[i]) m_vs_sampler_states[i].apply(rsx::method_registers.vertex_textures[i], vs_sampler_state[i].get()); @@ -313,7 +313,6 @@ void GLGSRender::end() //Bind textures and resolve external copy operations std::chrono::time_point textures_start = steady_clock::now(); - void *unused = nullptr; for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { @@ -329,7 +328,7 @@ void GLGSRender::end() if (!view && sampler_state->external_subresource_desc.external_handle) { - view = m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc); + view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); } } @@ -373,8 +372,7 @@ void GLGSRender::end() } else if (sampler_state->external_subresource_desc.external_handle) { - void *unused = nullptr; - m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc)->bind(); + m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); } else { @@ -393,13 +391,13 @@ void GLGSRender::end() { gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); - if (ds) ds->memory_barrier(); + if (ds) ds->write_barrier(cmd); for (auto &rtt : m_rtts.m_bound_render_targets) { if (auto surface = std::get<1>(rtt)) { - surface->memory_barrier(); + surface->write_barrier(cmd); } } } @@ -1140,7 +1138,7 @@ void GLGSRender::clear_surface(u32 arg) { u8 clear_stencil = rsx::method_registers.stencil_clear_value(); - gl_state.stencil_mask(rsx::method_registers.stencil_mask()); + gl_state.stencil_mask(0xFF); gl_state.clear_stencil(clear_stencil); mask |= GLenum(gl::buffers::stencil); @@ -1807,7 +1805,9 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) const rsx::invalidation_cause cause = is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write) : (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read); - auto result = m_gl_texture_cache.invalidate_address(address, cause); + + gl::command_context null_cmd; + auto result = m_gl_texture_cache.invalidate_address(null_cmd, address, cause); if (!result.violation_handled) return false; @@ -1831,7 +1831,8 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) void GLGSRender::on_invalidate_memory_range(const utils::address_range &range) { //Discard all memory in that range without bothering with writeback (Force it for strict?) - auto data = std::move(m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap)); + gl::command_context cmd{ gl_state }; + auto data = std::move(m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::unmap)); AUDIT(data.empty()); if (data.violation_handled) @@ -1856,7 +1857,8 @@ void GLGSRender::do_local_task(rsx::FIFO_state state) { if (q.processed) continue; - q.result = m_gl_texture_cache.flush_all(q.section_data); + gl::command_context cmd{ gl_state }; + q.result = m_gl_texture_cache.flush_all(cmd, q.section_data); q.processed = true; } } @@ -1902,7 +1904,8 @@ work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrash bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) { - if (m_gl_texture_cache.blit(src, dst, interpolate, m_rtts)) + gl::command_context cmd{ gl_state }; + if (m_gl_texture_cache.blit(cmd, src, dst, interpolate, m_rtts)) { m_samplers_dirty.store(true); return true; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index eeb307f10c..80ce700aed 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -354,7 +354,7 @@ public: private: - driver_state gl_state; + gl::driver_state gl_state; // Return element to draw and in case of indexed draw index type and offset in index buffer gl::vertex_upload_info set_vertex_buffer(); diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.cpp b/rpcs3/Emu/RSX/GL/GLHelpers.cpp index 279074a297..67121efcc3 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.cpp +++ b/rpcs3/Emu/RSX/GL/GLHelpers.cpp @@ -362,7 +362,7 @@ namespace gl return attrib_t(index); } - void blitter::scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, + void blitter::scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, bool is_depth_copy, const rsx::typeless_xfer& xfer_info) { std::unique_ptr typeless_src; @@ -400,9 +400,6 @@ namespace gl dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint); } - s32 old_fbo = 0; - glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); - filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest; GLenum attachment; gl::buffers target; @@ -427,6 +424,10 @@ namespace gl target = gl::buffers::color; } + save_binding_state saved; + + cmd.drv->enable(GL_FALSE, GL_STENCIL_TEST); + blit_src.bind(); glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0); blit_src.check(); @@ -435,10 +436,6 @@ namespace gl glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_id, 0); blit_dst.check(); - GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST); - if (scissor_test_enabled) - glDisable(GL_SCISSOR_TEST); - blit_src.blit(blit_dst, src_rect, dst_rect, target, interp); if (xfer_info.dst_is_typeless) @@ -452,10 +449,53 @@ namespace gl blit_dst.bind(); glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0); + } - if (scissor_test_enabled) - glEnable(GL_SCISSOR_TEST); + void blitter::fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color) + { + save_binding_state saved; - glBindFramebuffer(GL_FRAMEBUFFER, old_fbo); + blit_dst.bind(); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst->id(), 0); + blit_dst.check(); + + cmd.drv->clear_color(color); + cmd.drv->color_mask(true, true, true, true); + + glClear(GL_COLOR_BUFFER_BIT); + } + + void blitter::fast_clear_image(gl::command_context& cmd, const texture* dst, float depth, u8 stencil) + { + GLenum attachment; + GLbitfield clear_mask; + + switch (const auto fmt = dst->get_internal_format()) + { + case texture::internal_format::depth: + case texture::internal_format::depth16: + clear_mask = GL_DEPTH_BUFFER_BIT; + attachment = GL_DEPTH_ATTACHMENT; + break; + case texture::internal_format::depth_stencil: + case texture::internal_format::depth24_stencil8: + case texture::internal_format::depth32f_stencil8: + clear_mask = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + attachment = GL_DEPTH_STENCIL_ATTACHMENT; + break; + default: + fmt::throw_exception("Invalid texture passed to clear depth function, format=0x%x", (u32)fmt); + } + + save_binding_state saved; + + blit_dst.bind(); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst->id(), 0); + blit_dst.check(); + + cmd.drv->depth_mask(GL_TRUE); + cmd.drv->stencil_mask(0xFF); + + glClear(clear_mask); } } diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 65d2c384c9..2ae764c770 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -8,7 +8,7 @@ #include #include -#include "OpenGL.h" +#include "GLExecutionState.h" #include "../GCM.h" #include "../Common/TextureUtils.h" @@ -2796,6 +2796,21 @@ public: class blitter { + struct save_binding_state + { + GLuint old_fbo; + + save_binding_state() + { + glGetIntegerv(GL_FRAMEBUFFER_BINDING, (GLint*)&old_fbo); + } + + ~save_binding_state() + { + glBindFramebuffer(GL_FRAMEBUFFER, old_fbo); + } + }; + fbo blit_src; fbo blit_dst; @@ -2813,7 +2828,10 @@ public: blit_src.remove(); } - void scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, + void scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, bool is_depth_copy, const rsx::typeless_xfer& xfer_info); + + void fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color); + void fast_clear_image(gl::command_context& cmd, const texture* dst, float depth, u8 stencil); }; } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 30b5050a5c..092da9fdc2 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -227,6 +227,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const auto color_offsets = get_offsets(); const auto color_locations = get_locations(); + gl::command_context cmd{ gl_state }; + for (int i = 0; i < rsx::limits::color_buffers_count; ++i) { if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers) @@ -239,7 +241,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const utils::address_range surface_range = m_surface_info[i].get_memory_range(); m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_gl_texture_cache.flush_if_cache_miss_likely(surface_range); + m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range); } if (std::get<0>(m_rtts.m_bound_render_targets[i])) @@ -270,7 +272,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_gl_texture_cache.flush_if_cache_miss_likely(surface_range); + m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range); } auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); @@ -383,7 +385,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); - m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, + m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes); } } @@ -394,7 +396,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); - m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, + m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true); } } @@ -464,8 +466,6 @@ void GLGSRender::read_buffers() } else { - m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::read); - std::unique_ptr buffer(new u8[pitch * height]); color_buffer.read(buffer.get(), width, height, pitch); @@ -554,21 +554,8 @@ void GLGSRender::read_buffers() } } -void gl::render_target::memory_barrier(void*) +void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init) { - if (!old_contents) - { - // No memory to inherit - return; - } - - auto src_texture = static_cast(old_contents); - if (src_texture->get_rsx_pitch() != get_rsx_pitch()) - { - LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory"); - return; - } - auto is_depth = [](gl::texture::internal_format format) { // TODO: Change this to image aspect semantics @@ -583,6 +570,33 @@ void gl::render_target::memory_barrier(void*) } }; + if (!old_contents) + { + // No memory to inherit + if (dirty && force_init) + { + // Initialize memory contents if we did not find anything usable + // TODO: Properly sync with Cell + if (is_depth(get_internal_format())) + { + gl::g_hw_blitter->fast_clear_image(cmd, this, 1.f, 255); + } + else + { + gl::g_hw_blitter->fast_clear_image(cmd, this, {}); + } + } + + return; + } + + auto src_texture = static_cast(old_contents); + if (src_texture->get_rsx_pitch() != get_rsx_pitch()) + { + LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory"); + return; + } + auto src_bpp = src_texture->get_native_pitch() / src_texture->width(); auto dst_bpp = get_native_pitch() / width(); rsx::typeless_xfer typeless_info{}; @@ -609,7 +623,7 @@ void gl::render_target::memory_barrier(void*) } } - gl::g_hw_blitter->scale_image(old_contents, this, + gl::g_hw_blitter->scale_image(cmd, old_contents, this, { 0, 0, std::get<0>(region), std::get<1>(region) }, { 0, 0, std::get<2>(region) , std::get<3>(region) }, !dst_is_depth, dst_is_depth, typeless_info); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index b40b812285..18ef60eb8c 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -131,7 +131,9 @@ namespace gl return (rsx::apply_resolution_scale(_width, true) == internal_width) && (rsx::apply_resolution_scale(_height, true) == internal_height); } - void memory_barrier(void* = nullptr); + void memory_barrier(gl::command_context& cmd, bool force_init = false); + void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, true); } + void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, false); } }; struct framebuffer_holder : public gl::fbo, public rsx::ref_counted diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 9d5bfa0962..20a6b45d7a 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -35,7 +35,7 @@ namespace gl struct texture_cache_traits { - using commandbuffer_type = void*; + using commandbuffer_type = gl::command_context; using section_storage_type = gl::cached_texture_section; using texture_cache_type = gl::texture_cache; using texture_cache_base_type = rsx::texture_cache; @@ -252,7 +252,7 @@ namespace gl } } - void copy_texture(bool manage_lifetime) + void copy_texture(gl::command_context& cmd, bool manage_lifetime) { ASSERT(exists()); @@ -314,7 +314,7 @@ namespace gl const bool is_depth = is_depth_texture(); const bool linear_interp = is_depth? false : true; - g_hw_blitter->scale_image(vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, is_depth, {}); + g_hw_blitter->scale_image(cmd, vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, is_depth, {}); target_texture = scaled_texture.get(); } } @@ -376,12 +376,12 @@ namespace gl /** * Flush */ - void synchronize(bool blocking) + void synchronize(bool blocking, gl::command_context& cmd) { if (synchronized) return; - copy_texture(blocking); + copy_texture(cmd, blocking); if (blocking) { @@ -476,8 +476,6 @@ namespace gl } } - - /** * Misc */ @@ -727,20 +725,20 @@ namespace gl protected: - gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, + gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) override { return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); } - gl::texture_view* create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, + gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector) override { return create_temporary_subresource_impl(src, (GLenum)src->get_internal_format(), GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true); } - gl::texture_view* generate_cubemap_from_images(void*&, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override + gl::texture_view* generate_cubemap_from_images(gl::command_context&, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override { const GLenum ifmt = gl::get_sized_internal_format(gcm_format); std::unique_ptr dst_image = std::make_unique(GL_TEXTURE_CUBE_MAP, size, size, 1, 1, ifmt); @@ -769,7 +767,7 @@ namespace gl return result; } - gl::texture_view* generate_3d_from_2d_images(void*&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override + gl::texture_view* generate_3d_from_2d_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector& sources, const texture_channel_remap_t& /*remap_vector*/) override { const GLenum ifmt = gl::get_sized_internal_format(gcm_format); std::unique_ptr dst_image = std::make_unique(GL_TEXTURE_3D, width, height, depth, 1, ifmt); @@ -798,7 +796,7 @@ namespace gl return result; } - gl::texture_view* generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, + gl::texture_view* generate_atlas_from_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) override { auto result = create_temporary_subresource_impl(nullptr, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false); @@ -812,13 +810,13 @@ namespace gl return result; } - void update_image_contents(void*&, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override + void update_image_contents(gl::command_context&, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override { glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, 0, 0, 0, dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1); } - cached_texture_section* create_new_texture(void*&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, + cached_texture_section* create_new_texture(gl::command_context&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override { auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type); @@ -886,11 +884,10 @@ namespace gl return &cached; } - cached_texture_section* upload_image_from_cpu(void*&, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, + cached_texture_section* upload_image_from_cpu(gl::command_context &cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override { - void* unused = nullptr; - auto section = create_new_texture(unused, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, + auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, rsx::texture_create_flags::default_component_order); gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps, @@ -913,7 +910,7 @@ namespace gl section.set_view_flags(flags); } - void insert_texture_barrier(void*&, gl::texture*) override + void insert_texture_barrier(gl::command_context&, gl::texture*) override { auto &caps = gl::get_driver_caps(); @@ -1013,10 +1010,9 @@ namespace gl baseclass::on_frame_end(); } - bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts) + bool blit(gl::command_context &cmd, rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts) { - void* unused = nullptr; - auto result = upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter); + auto result = upload_scaled_image(src, dst, linear_interpolate, cmd, m_rtts, m_hw_blitter); if (result.succeeded) { @@ -1034,7 +1030,7 @@ namespace gl gl::texture::format::depth_stencil : gl::texture::format::depth; } - flush_if_cache_miss_likely(result.to_address_range()); + flush_if_cache_miss_likely(cmd, result.to_address_range()); } return true; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 1d81a3d2d5..4662d5ce6b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -855,7 +855,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) std::lock_guard lock(m_secondary_cb_guard); const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read; - result = std::move(m_texture_cache.invalidate_address(address, cause, m_secondary_command_buffer, m_swapchain->get_graphics_queue())); + result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause, m_swapchain->get_graphics_queue())); } if (!result.violation_handled) @@ -928,7 +928,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) m_flush_requests.producer_wait(); } - m_texture_cache.flush_all(result, m_secondary_command_buffer, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_all(m_secondary_command_buffer, result, m_swapchain->get_graphics_queue()); if (has_queue_ref) { @@ -944,7 +944,7 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range) { std::lock_guard lock(m_secondary_cb_guard); - auto data = std::move(m_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap, m_secondary_command_buffer, m_swapchain->get_graphics_queue())); + auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap, m_swapchain->get_graphics_queue())); AUDIT(data.empty()); if (data.violation_handled) @@ -1650,13 +1650,13 @@ void VKGSRender::end() // Apply write memory barriers if (g_cfg.video.strict_rendering_mode) { - if (ds) ds->memory_barrier(*m_current_command_buffer); + if (ds) ds->write_barrier(*m_current_command_buffer); for (auto &rtt : m_rtts.m_bound_render_targets) { if (auto surface = std::get<1>(rtt)) { - surface->memory_barrier(*m_current_command_buffer); + surface->write_barrier(*m_current_command_buffer); } } } @@ -1694,7 +1694,7 @@ void VKGSRender::end() if (UNLIKELY(!buffers_to_clear.empty())) { VkClearRect rect = { {{0, 0}, {m_draw_fbo->width(), m_draw_fbo->height()}}, 0, 1 }; - vkCmdClearAttachments(*m_current_command_buffer, (u32)buffers_to_clear.size(), + vkCmdClearAttachments(*m_current_command_buffer, buffers_to_clear.size(), buffers_to_clear.data(), 1, &rect); } @@ -2860,7 +2860,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) const utils::address_range rsx_range = m_surface_info[i].get_memory_range(); m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range, m_swapchain->get_graphics_queue()); } m_surface_info[i].address = m_surface_info[i].pitch = 0; @@ -2876,7 +2876,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; @@ -2929,8 +2929,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); - m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, - m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second); + m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, + m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second); } } @@ -2940,8 +2940,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); - m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, - m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, gcm_format, false); + m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, + m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple{ m_swapchain->get_graphics_queue() }, gcm_format, false); } } @@ -3301,7 +3301,7 @@ void VKGSRender::flip(int buffer) { if (section->get_protection() == utils::protection::no) { - section->copy_texture(false, *m_current_command_buffer, m_swapchain->get_graphics_queue()); + section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue()); flush_queue = true; } } @@ -3312,7 +3312,7 @@ void VKGSRender::flip(int buffer) flush_command_queue(); } - m_texture_cache.invalidate_range(range, rsx::invalidation_cause::read, *m_current_command_buffer, m_swapchain->get_graphics_queue()); + m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read, m_swapchain->get_graphics_queue()); image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height); } } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 1ff42dbd72..599b527d32 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -409,7 +409,7 @@ namespace vk vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 1, &barrier, 0, nullptr); } - void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range) + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range) { //Prepare an image to match the new layout.. VkImageMemoryBarrier barrier = {}; @@ -479,7 +479,7 @@ namespace vk vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier); } - void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range) + void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range) { if (image->current_layout == new_layout) return; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 4a8f6f779b..f11c1f0ae5 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -139,8 +139,8 @@ namespace vk VkImageAspectFlags flags, vk::data_heap &upload_heap); //Other texture management helpers - void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range); - void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range); + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range); + void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range); void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout); void copy_image_typeless(const command_buffer &cmd, const image *src, const image *dst, const areai& src_rect, const areai& dst_rect, @@ -2981,13 +2981,8 @@ public: } }; - class blitter + struct blitter { - vk::command_buffer* commands; - - public: - blitter(vk::command_buffer *c) : commands(c) {} - - void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info); + void scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info); }; } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 0562961c39..9d6d81d35b 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -57,10 +57,34 @@ namespace vk return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height()); } - void memory_barrier(vk::command_buffer& cmd) + void memory_barrier(vk::command_buffer& cmd, bool force_init = false) { if (!old_contents) { + if (dirty && force_init) + { + // Initialize memory contents if we did not find anything usable + // TODO: Properly sync with Cell + VkImageSubresourceRange range{ attachment_aspect_flag, 0, 1, 0, 1 }; + const auto old_layout = current_layout; + + change_image_layout(cmd, this, VK_IMAGE_LAYOUT_GENERAL, range); + + if (attachment_aspect_flag & VK_IMAGE_ASPECT_COLOR_BIT) + { + VkClearColorValue color{}; + vkCmdClearColorImage(cmd, value, VK_IMAGE_LAYOUT_GENERAL, &color, 1, &range); + } + else + { + VkClearDepthStencilValue clear{ 1.f, 255 }; + vkCmdClearDepthStencilImage(cmd, value, VK_IMAGE_LAYOUT_GENERAL, &clear, 1, &range); + } + + change_image_layout(cmd, this, old_layout, range); + on_write(); + } + return; } @@ -96,14 +120,17 @@ namespace vk } } - vk::blitter hw_blitter(&cmd); - hw_blitter.scale_image(old_contents, this, + vk::blitter hw_blitter; + hw_blitter.scale_image(cmd, old_contents, this, { 0, 0, std::get<0>(region), std::get<1>(region) }, { 0, 0, std::get<2>(region) , std::get<3>(region) }, /*linear?*/false, /*depth?(unused)*/false, typeless_info); on_write(); } + + void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, true); } + void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, false); } }; struct framebuffer_holder: public vk::framebuffer, public rsx::ref_counted diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 17138f5df6..22fdb51a24 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -532,7 +532,7 @@ namespace vk return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] }; } - void blitter::scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info) + void blitter::scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info) { const auto src_aspect = vk::get_aspect_flags(src->info.format); const auto dst_aspect = vk::get_aspect_flags(dst->info.format); @@ -552,7 +552,7 @@ namespace vk src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint); src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint); - vk::copy_image_typeless(*commands, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1, + vk::copy_image_typeless(cmd, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1, vk::get_aspect_flags(src->info.format), vk::get_aspect_flags(format)); } @@ -568,7 +568,7 @@ namespace vk dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint); dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint); - vk::copy_image_typeless(*commands, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1, + vk::copy_image_typeless(cmd, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1, vk::get_aspect_flags(dst->info.format), vk::get_aspect_flags(format)); } else if (xfer_info.dst_context == rsx::texture_upload_context::framebuffer_storage) @@ -591,24 +591,24 @@ namespace vk const auto data_length = src->info.extent.width * src->info.extent.height * 4; const auto current_layout = src->current_layout; - vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - vkCmdCopyImageToBuffer(*commands, src->value, src->current_layout, scratch_buf->value, 1, ©); - vk::change_image_layout(*commands, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + vk::change_image_layout(cmd, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, scratch_buf->value, 1, ©); + vk::change_image_layout(cmd, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, + vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, data_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vk::get_compute_task()->run(*commands, scratch_buf, data_length); + vk::get_compute_task()->run(cmd, scratch_buf, data_length); - vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, + vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, data_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); real_src = vk::get_typeless_helper(src->info.format, src->width(), src->height()); - vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + vk::change_image_layout(cmd, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, ©); + vkCmdCopyBufferToImage(cmd, scratch_buf->value, real_src->value, real_src->current_layout, 1, ©); } } } @@ -637,17 +637,17 @@ namespace vk const auto dst_width = dst_area.x2 - dst_area.x1; const auto dst_height = dst_area.y2 - dst_area.y1; - copy_scaled_image(*commands, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, + copy_scaled_image(cmd, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, dst_area.x1, dst_area.y1, dst_width, dst_height, 1, dst_aspect, real_src->info.format == real_dst->info.format, interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, real_src->info.format, real_dst->info.format); if (real_dst != dst) { auto internal_width = dst->width() * xfer_info.dst_scaling_hint; - vk::copy_image_typeless(*commands, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1, + vk::copy_image_typeless(cmd, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1, vk::get_aspect_flags(real_dst->info.format), vk::get_aspect_flags(dst->info.format)); } - change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { (VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers }); + change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { (VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers }); } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 9b98894e5d..749be7fc8d 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -148,7 +148,7 @@ namespace vk return flushed; } - void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, VkQueue submit_queue) + void copy_texture(vk::command_buffer& cmd, bool manage_cb_lifetime, VkQueue submit_queue) { ASSERT(exists()); @@ -320,7 +320,7 @@ namespace vk m_device = &cmd.get_command_pool().get_owner(); } - copy_texture(blocking, cmd, submit_queue); + copy_texture(cmd, blocking, submit_queue); } void* map_synchronized(u32 offset, u32 size) @@ -1036,7 +1036,7 @@ namespace vk template sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts) { - return upload_texture(cmd, tex, m_rtts, cmd, const_cast(m_submit_queue)); + return upload_texture(cmd, tex, m_rtts, const_cast(m_submit_queue)); } vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height) @@ -1094,14 +1094,14 @@ namespace vk bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) { - blitter helper(&cmd); - auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast(m_submit_queue)); + blitter helper; + auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, const_cast(m_submit_queue)); if (reply.succeeded) { if (reply.real_dst_size) { - flush_if_cache_miss_likely(reply.to_address_range(), cmd, m_submit_queue); + flush_if_cache_miss_likely(cmd, reply.to_address_range(), m_submit_queue); } return true; diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 72a22875f3..29f6d29492 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -71,6 +71,7 @@ + diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 742287d10a..edaa456e83 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -25,5 +25,6 @@ + \ No newline at end of file