mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-26 04:32:35 +01:00
rsx: Improvements to memory flush mechanism
- Batch dma transfers whenever possible and do them in one go - vk: Always ensure that queued dma transfers are visible to the GPU before they are needed by the host Requires a little refactoring to allow proper communication of the commandbuffer state - vk: Code cleanup, the simplified mechanism makes it so that its not necessary to pass tons of args to methods - vk: Fixup - do not forcefully do dma transfers on sections in an invalidation zone! They may have been speculated correctly already
This commit is contained in:
parent
385485204b
commit
5260f4b47d
@ -364,6 +364,8 @@ namespace rsx
|
|||||||
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0;
|
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0;
|
||||||
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
|
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
|
||||||
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;
|
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;
|
||||||
|
virtual void prepare_for_dma_transfers(commandbuffer_type&) = 0;
|
||||||
|
virtual void cleanup_after_dma_transfers(commandbuffer_type&) = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual void destroy() = 0;
|
virtual void destroy() = 0;
|
||||||
@ -397,13 +399,13 @@ namespace rsx
|
|||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
void err_once(const char* fmt, const Args&... params)
|
void err_once(const char* fmt, const Args&... params)
|
||||||
{
|
{
|
||||||
logs::RSX.error(fmt, params...);
|
emit_once(true, fmt, params...);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
void warn_once(const char* fmt, const Args&... params)
|
void warn_once(const char* fmt, const Args&... params)
|
||||||
{
|
{
|
||||||
logs::RSX.warning(fmt, params...);
|
emit_once(false, fmt, params...);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -458,19 +460,40 @@ namespace rsx
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rsx::simple_array<section_storage_type*> sections_to_transfer;
|
||||||
for (auto &surface : data.sections_to_flush)
|
for (auto &surface : data.sections_to_flush)
|
||||||
{
|
{
|
||||||
if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
|
if (!surface->is_synchronized())
|
||||||
|
{
|
||||||
|
sections_to_transfer.push_back(surface);
|
||||||
|
}
|
||||||
|
else if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
|
||||||
{
|
{
|
||||||
// This region is set to always read from itself (unavoidable hard sync)
|
// This region is set to always read from itself (unavoidable hard sync)
|
||||||
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
|
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
|
||||||
if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp())
|
if (ROP_timestamp > surface->get_sync_timestamp())
|
||||||
{
|
{
|
||||||
surface->copy_texture(cmd, true, std::forward<Args>(extras)...);
|
sections_to_transfer.push_back(surface);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
surface->flush(cmd, std::forward<Args>(extras)...);
|
if (!sections_to_transfer.empty())
|
||||||
|
{
|
||||||
|
// Batch all hard faults together
|
||||||
|
prepare_for_dma_transfers(cmd);
|
||||||
|
|
||||||
|
for (auto &surface : sections_to_transfer)
|
||||||
|
{
|
||||||
|
surface->copy_texture(cmd, true, std::forward<Args>(extras)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_after_dma_transfers(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto &surface : data.sections_to_flush)
|
||||||
|
{
|
||||||
|
surface->flush();
|
||||||
|
|
||||||
// Exclude this region when flushing other sections that should not trample it
|
// Exclude this region when flushing other sections that should not trample it
|
||||||
// If we overlap an excluded RO, set it as dirty
|
// If we overlap an excluded RO, set it as dirty
|
||||||
@ -1224,7 +1247,7 @@ namespace rsx
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename ...FlushArgs, typename ...Args>
|
template <typename ...FlushArgs, typename ...Args>
|
||||||
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple<FlushArgs...>&& flush_extras, Args&&... extras)
|
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, Args&&... extras)
|
||||||
{
|
{
|
||||||
AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled
|
AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled
|
||||||
|
|
||||||
@ -1244,10 +1267,7 @@ namespace rsx
|
|||||||
if (!region.is_locked() || region.get_context() != texture_upload_context::framebuffer_storage)
|
if (!region.is_locked() || region.get_context() != texture_upload_context::framebuffer_storage)
|
||||||
{
|
{
|
||||||
// Invalidate sections from surface cache occupying same address range
|
// Invalidate sections from surface cache occupying same address range
|
||||||
std::apply(&texture_cache::invalidate_range_impl_base<FlushArgs...>, std::tuple_cat(
|
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::superseded_by_fbo);
|
||||||
std::forward_as_tuple(this, cmd, rsx_range, invalidation_cause::superseded_by_fbo),
|
|
||||||
std::forward<std::tuple<FlushArgs...> >(flush_extras)
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!region.is_locked() || region.can_be_reused())
|
if (!region.is_locked() || region.can_be_reused())
|
||||||
|
@ -1303,14 +1303,10 @@ namespace rsx
|
|||||||
return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always;
|
return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always;
|
||||||
}
|
}
|
||||||
|
|
||||||
void on_flush(bool miss)
|
void on_flush()
|
||||||
{
|
{
|
||||||
speculatively_flushed = false;
|
speculatively_flushed = false;
|
||||||
|
|
||||||
if (miss)
|
|
||||||
{
|
|
||||||
m_tex_cache->on_miss(*derived());
|
|
||||||
}
|
|
||||||
m_tex_cache->on_flush();
|
m_tex_cache->on_flush();
|
||||||
|
|
||||||
if (tracked_by_predictor())
|
if (tracked_by_predictor())
|
||||||
@ -1328,6 +1324,12 @@ namespace rsx
|
|||||||
m_tex_cache->on_speculative_flush();
|
m_tex_cache->on_speculative_flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void on_miss()
|
||||||
|
{
|
||||||
|
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
||||||
|
m_tex_cache->on_miss(*derived());
|
||||||
|
}
|
||||||
|
|
||||||
void touch(u64 tag)
|
void touch(u64 tag)
|
||||||
{
|
{
|
||||||
last_write_tag = tag;
|
last_write_tag = tag;
|
||||||
@ -1454,11 +1456,9 @@ namespace rsx
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
// Returns false if there was a cache miss
|
// Returns false if there was a cache miss
|
||||||
template <typename ...Args>
|
void flush()
|
||||||
bool flush(Args&&... extras)
|
|
||||||
{
|
{
|
||||||
if (flushed) return true;
|
if (flushed) return;
|
||||||
bool miss = false;
|
|
||||||
|
|
||||||
// Sanity checks
|
// Sanity checks
|
||||||
ASSERT(exists());
|
ASSERT(exists());
|
||||||
@ -1469,19 +1469,12 @@ namespace rsx
|
|||||||
{
|
{
|
||||||
flushed = true;
|
flushed = true;
|
||||||
flush_exclusions.clear();
|
flush_exclusions.clear();
|
||||||
on_flush(miss);
|
on_flush();
|
||||||
return !miss;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are not synchronized, we must synchronize before proceeding (hard fault)
|
// NOTE: Hard faults should have been pre-processed beforehand
|
||||||
if (!synchronized)
|
ASSERT(synchronized);
|
||||||
{
|
|
||||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
|
||||||
derived()->synchronize(true, std::forward<Args>(extras)...);
|
|
||||||
miss = true;
|
|
||||||
|
|
||||||
ASSERT(synchronized); // TODO ruipin: This might be possible in OGL. Revisit
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy flush result to guest memory
|
// Copy flush result to guest memory
|
||||||
imp_flush();
|
imp_flush();
|
||||||
@ -1491,9 +1484,7 @@ namespace rsx
|
|||||||
flushed = true;
|
flushed = true;
|
||||||
derived()->finish_flush();
|
derived()->finish_flush();
|
||||||
flush_exclusions.clear();
|
flush_exclusions.clear();
|
||||||
on_flush(miss);
|
on_flush();
|
||||||
|
|
||||||
return !miss;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_flush_exclusion(const address_range& rng)
|
void add_flush_exclusion(const address_range& rng)
|
||||||
|
@ -392,7 +392,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||||||
{
|
{
|
||||||
// Mark buffer regions as NO_ACCESS on Cell-visible side
|
// Mark buffer regions as NO_ACCESS on Cell-visible side
|
||||||
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
|
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
|
||||||
std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes);
|
color_format.format, color_format.type, color_format.swap_bytes);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -407,7 +407,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||||||
{
|
{
|
||||||
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
|
||||||
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
|
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
|
||||||
std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true);
|
depth_format_gl.format, depth_format_gl.type, true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -217,14 +217,18 @@ namespace gl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_texture(gl::command_context& cmd, bool manage_lifetime)
|
void copy_texture(gl::command_context& cmd, bool miss)
|
||||||
{
|
{
|
||||||
ASSERT(exists());
|
ASSERT(exists());
|
||||||
|
|
||||||
if (!manage_lifetime)
|
if (LIKELY(!miss))
|
||||||
{
|
{
|
||||||
baseclass::on_speculative_flush();
|
baseclass::on_speculative_flush();
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
baseclass::on_miss();
|
||||||
|
}
|
||||||
|
|
||||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||||
{
|
{
|
||||||
@ -347,15 +351,6 @@ namespace gl
|
|||||||
/**
|
/**
|
||||||
* Flush
|
* Flush
|
||||||
*/
|
*/
|
||||||
void synchronize(bool blocking, gl::command_context& cmd)
|
|
||||||
{
|
|
||||||
if (synchronized)
|
|
||||||
return;
|
|
||||||
|
|
||||||
verify(HERE), cmd.drv;
|
|
||||||
copy_texture(cmd, blocking);
|
|
||||||
}
|
|
||||||
|
|
||||||
void* map_synchronized(u32 offset, u32 size)
|
void* map_synchronized(u32 offset, u32 size)
|
||||||
{
|
{
|
||||||
AUDIT(synchronized && !m_fence.is_empty());
|
AUDIT(synchronized && !m_fence.is_empty());
|
||||||
@ -642,7 +637,7 @@ namespace gl
|
|||||||
if (src)
|
if (src)
|
||||||
{
|
{
|
||||||
//Format mismatch
|
//Format mismatch
|
||||||
err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt);
|
warn_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Apply base component map onto the new texture if a data cast has been done
|
//Apply base component map onto the new texture if a data cast has been done
|
||||||
@ -992,6 +987,12 @@ namespace gl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void prepare_for_dma_transfers(gl::command_context&) override
|
||||||
|
{}
|
||||||
|
|
||||||
|
void cleanup_after_dma_transfers(gl::command_context&) override
|
||||||
|
{}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
using baseclass::texture_cache;
|
using baseclass::texture_cache;
|
||||||
|
@ -855,7 +855,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||||||
std::lock_guard lock(m_secondary_cb_guard);
|
std::lock_guard lock(m_secondary_cb_guard);
|
||||||
|
|
||||||
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
|
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
|
||||||
result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause, m_swapchain->get_graphics_queue()));
|
result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!result.violation_handled)
|
if (!result.violation_handled)
|
||||||
@ -897,7 +897,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||||||
m_flush_requests.producer_wait();
|
m_flush_requests.producer_wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_texture_cache.flush_all(m_secondary_command_buffer, result, m_swapchain->get_graphics_queue());
|
m_texture_cache.flush_all(m_secondary_command_buffer, result);
|
||||||
|
|
||||||
if (has_queue_ref)
|
if (has_queue_ref)
|
||||||
{
|
{
|
||||||
@ -913,7 +913,7 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
|||||||
{
|
{
|
||||||
std::lock_guard lock(m_secondary_cb_guard);
|
std::lock_guard lock(m_secondary_cb_guard);
|
||||||
|
|
||||||
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap, m_swapchain->get_graphics_queue()));
|
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap));
|
||||||
AUDIT(data.empty());
|
AUDIT(data.empty());
|
||||||
|
|
||||||
if (data.violation_handled)
|
if (data.violation_handled)
|
||||||
@ -1454,7 +1454,7 @@ void VKGSRender::end()
|
|||||||
|
|
||||||
if (rsx::method_registers.fragment_textures[i].enabled())
|
if (rsx::method_registers.fragment_textures[i].enabled())
|
||||||
{
|
{
|
||||||
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
|
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
|
||||||
|
|
||||||
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
|
||||||
const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 ||
|
const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 ||
|
||||||
@ -1526,7 +1526,7 @@ void VKGSRender::end()
|
|||||||
|
|
||||||
if (rsx::method_registers.vertex_textures[i].enabled())
|
if (rsx::method_registers.vertex_textures[i].enabled())
|
||||||
{
|
{
|
||||||
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
|
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
|
||||||
|
|
||||||
bool replace = !vs_sampler_handles[i];
|
bool replace = !vs_sampler_handles[i];
|
||||||
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
|
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
|
||||||
@ -1725,7 +1725,7 @@ void VKGSRender::end()
|
|||||||
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
|
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
|
||||||
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
|
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
|
||||||
|
|
||||||
m_current_command_buffer->flags |= cb_has_occlusion_task;
|
m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply write memory barriers
|
// Apply write memory barriers
|
||||||
@ -1796,7 +1796,6 @@ void VKGSRender::end()
|
|||||||
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
|
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_current_command_buffer->num_draws++;
|
|
||||||
m_rtts.on_write();
|
m_rtts.on_write();
|
||||||
|
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
@ -2187,7 +2186,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint)
|
|||||||
{
|
{
|
||||||
if (hint == rsx::FIFO_hint::hint_conditional_render_eval)
|
if (hint == rsx::FIFO_hint::hint_conditional_render_eval)
|
||||||
{
|
{
|
||||||
if (m_current_command_buffer->flags & cb_has_occlusion_task)
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)
|
||||||
{
|
{
|
||||||
// Occlusion test result evaluation is coming up, avoid a hard sync
|
// Occlusion test result evaluation is coming up, avoid a hard sync
|
||||||
if (!m_flush_requests.pending())
|
if (!m_flush_requests.pending())
|
||||||
@ -2881,7 +2880,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||||||
|
|
||||||
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
|
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
|
||||||
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
|
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
|
||||||
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range, m_swapchain->get_graphics_queue());
|
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
||||||
@ -2898,7 +2897,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||||||
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
|
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
|
||||||
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
|
||||||
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
|
||||||
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
|
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
||||||
@ -2944,6 +2943,12 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||||||
m_texture_cache.notify_surface_changed(layout.zeta_address);
|
m_texture_cache.notify_surface_changed(layout.zeta_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Before messing with memory properties, flush command queue if there are dma transfers queued up
|
||||||
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
|
||||||
|
{
|
||||||
|
flush_command_queue();
|
||||||
|
}
|
||||||
|
|
||||||
const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format);
|
const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format);
|
||||||
for (u8 index : m_draw_buffers)
|
for (u8 index : m_draw_buffers)
|
||||||
{
|
{
|
||||||
@ -2953,11 +2958,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||||||
if (g_cfg.video.write_color_buffers)
|
if (g_cfg.video.write_color_buffers)
|
||||||
{
|
{
|
||||||
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
|
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
|
||||||
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second);
|
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
|
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2968,11 +2973,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||||||
{
|
{
|
||||||
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
|
||||||
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
|
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
|
||||||
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, gcm_format, false);
|
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
|
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3323,21 +3328,22 @@ void VKGSRender::flip(int buffer)
|
|||||||
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
|
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
|
||||||
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
|
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
|
||||||
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
|
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
|
||||||
bool flush_queue = false;
|
|
||||||
|
|
||||||
for (const auto & section : overlap)
|
for (const auto & section : overlap)
|
||||||
{
|
{
|
||||||
section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue());
|
if (!section->is_synchronized())
|
||||||
flush_queue = true;
|
{
|
||||||
|
section->copy_texture(*m_current_command_buffer, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flush_queue)
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
|
||||||
{
|
{
|
||||||
// Submit for processing to lower hard fault penalty
|
// Submit for processing to lower hard fault penalty
|
||||||
flush_command_queue();
|
flush_command_queue();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read, m_swapchain->get_graphics_queue());
|
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read);
|
||||||
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
|
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3487,16 +3493,15 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
|||||||
//Verify enough memory exists before attempting to handle data transfer
|
//Verify enough memory exists before attempting to handle data transfer
|
||||||
check_heap_status();
|
check_heap_status();
|
||||||
|
|
||||||
const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes();
|
|
||||||
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
|
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
|
||||||
{
|
{
|
||||||
m_samplers_dirty.store(true);
|
m_samplers_dirty.store(true);
|
||||||
m_current_command_buffer->flags |= cb_has_blit_transfer;
|
m_current_command_buffer->set_flag(vk::command_buffer::cb_has_blit_transfer);
|
||||||
|
|
||||||
if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count)
|
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
|
||||||
{
|
{
|
||||||
// A speculative write happened, flush while the dma resource is valid
|
// A dma transfer has been queued onto this cb
|
||||||
// TODO: Deeper investigation as to why this can trigger problems
|
// This likely means that we're done with the tranfers to the target (writes_likely_completed=1)
|
||||||
flush_command_queue();
|
flush_command_queue();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -48,20 +48,11 @@ namespace vk
|
|||||||
|
|
||||||
extern u64 get_system_time();
|
extern u64 get_system_time();
|
||||||
|
|
||||||
enum command_buffer_data_flag
|
|
||||||
{
|
|
||||||
cb_has_occlusion_task = 1,
|
|
||||||
cb_has_blit_transfer = 2
|
|
||||||
};
|
|
||||||
|
|
||||||
struct command_buffer_chunk: public vk::command_buffer
|
struct command_buffer_chunk: public vk::command_buffer
|
||||||
{
|
{
|
||||||
VkFence submit_fence = VK_NULL_HANDLE;
|
VkFence submit_fence = VK_NULL_HANDLE;
|
||||||
VkDevice m_device = VK_NULL_HANDLE;
|
VkDevice m_device = VK_NULL_HANDLE;
|
||||||
|
|
||||||
u32 num_draws = 0;
|
|
||||||
u32 flags = 0;
|
|
||||||
|
|
||||||
std::atomic_bool pending = { false };
|
std::atomic_bool pending = { false };
|
||||||
std::atomic<u64> last_sync = { 0 };
|
std::atomic<u64> last_sync = { 0 };
|
||||||
shared_mutex guard_mutex;
|
shared_mutex guard_mutex;
|
||||||
@ -100,8 +91,6 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||||||
wait(FRAME_PRESENT_TIMEOUT);
|
wait(FRAME_PRESENT_TIMEOUT);
|
||||||
|
|
||||||
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
||||||
num_draws = 0;
|
|
||||||
flags = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool poke()
|
bool poke()
|
||||||
|
@ -1168,6 +1168,14 @@ namespace vk
|
|||||||
}
|
}
|
||||||
access_hint = flush_only;
|
access_hint = flush_only;
|
||||||
|
|
||||||
|
enum command_buffer_data_flag : u32
|
||||||
|
{
|
||||||
|
cb_has_occlusion_task = 1,
|
||||||
|
cb_has_blit_transfer = 2,
|
||||||
|
cb_has_dma_transfer = 4
|
||||||
|
};
|
||||||
|
u32 flags = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
command_buffer() {}
|
command_buffer() {}
|
||||||
~command_buffer() {}
|
~command_buffer() {}
|
||||||
@ -1206,6 +1214,16 @@ namespace vk
|
|||||||
return *pool;
|
return *pool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void clear_flags()
|
||||||
|
{
|
||||||
|
flags = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_flag(command_buffer_data_flag flag)
|
||||||
|
{
|
||||||
|
flags |= flag;
|
||||||
|
}
|
||||||
|
|
||||||
operator VkCommandBuffer() const
|
operator VkCommandBuffer() const
|
||||||
{
|
{
|
||||||
return commands;
|
return commands;
|
||||||
@ -1278,6 +1296,8 @@ namespace vk
|
|||||||
acquire_global_submit_lock();
|
acquire_global_submit_lock();
|
||||||
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
|
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
|
||||||
release_global_submit_lock();
|
release_global_submit_lock();
|
||||||
|
|
||||||
|
clear_flags();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -66,11 +66,21 @@ namespace vk
|
|||||||
managed_texture.reset(vram_texture);
|
managed_texture.reset(vram_texture);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Even if we are managing the same vram section, we cannot guarantee contents are static
|
if (synchronized)
|
||||||
//The create method is only invoked when a new managed session is required
|
{
|
||||||
synchronized = false;
|
// Even if we are managing the same vram section, we cannot guarantee contents are static
|
||||||
flushed = false;
|
// The create method is only invoked when a new managed session is required
|
||||||
sync_timestamp = 0ull;
|
if (!flushed)
|
||||||
|
{
|
||||||
|
// Reset fence
|
||||||
|
verify(HERE), m_device, dma_buffer, dma_fence != VK_NULL_HANDLE;
|
||||||
|
vkResetEvent(*m_device, dma_fence);
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized = false;
|
||||||
|
flushed = false;
|
||||||
|
sync_timestamp = 0ull;
|
||||||
|
}
|
||||||
|
|
||||||
// Notify baseclass
|
// Notify baseclass
|
||||||
baseclass::on_section_resources_created();
|
baseclass::on_section_resources_created();
|
||||||
@ -148,14 +158,18 @@ namespace vk
|
|||||||
return flushed;
|
return flushed;
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_texture(vk::command_buffer& cmd, bool manage_cb_lifetime, VkQueue submit_queue)
|
void copy_texture(vk::command_buffer& cmd, bool miss)
|
||||||
{
|
{
|
||||||
ASSERT(exists());
|
ASSERT(exists());
|
||||||
|
|
||||||
if (!manage_cb_lifetime)
|
if (LIKELY(!miss))
|
||||||
{
|
{
|
||||||
baseclass::on_speculative_flush();
|
baseclass::on_speculative_flush();
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
baseclass::on_miss();
|
||||||
|
}
|
||||||
|
|
||||||
if (m_device == nullptr)
|
if (m_device == nullptr)
|
||||||
{
|
{
|
||||||
@ -175,11 +189,6 @@ namespace vk
|
|||||||
dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (manage_cb_lifetime)
|
|
||||||
{
|
|
||||||
cmd.begin();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||||
{
|
{
|
||||||
auto as_rtt = static_cast<vk::render_target*>(vram_texture);
|
auto as_rtt = static_cast<vk::render_target*>(vram_texture);
|
||||||
@ -295,36 +304,20 @@ namespace vk
|
|||||||
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©);
|
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (manage_cb_lifetime)
|
if (LIKELY(!miss))
|
||||||
{
|
{
|
||||||
VkFence submit_fence;
|
// If this is speculated, it should only occur once
|
||||||
VkFenceCreateInfo create_info{};
|
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
|
||||||
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
|
||||||
vkCreateFence(*m_device, &create_info, nullptr, &submit_fence);
|
|
||||||
|
|
||||||
cmd.end();
|
|
||||||
cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
|
||||||
|
|
||||||
// Now we need to restart the command-buffer to restore it to the way it was before...
|
|
||||||
vk::wait_for_fence(submit_fence);
|
|
||||||
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
vkDestroyFence(*m_device, submit_fence, nullptr);
|
|
||||||
vkSetEvent(*m_device, dma_fence);
|
|
||||||
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
|
|
||||||
{
|
|
||||||
// If this is a primary CB, restart it
|
|
||||||
cmd.begin();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Only used when doing speculation
|
// This is the only acceptable situation where a sync can occur twice, due to flush_always being set
|
||||||
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
|
vkResetEvent(*m_device, dma_fence);
|
||||||
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
|
||||||
|
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||||
|
|
||||||
synchronized = true;
|
synchronized = true;
|
||||||
sync_timestamp = get_system_time();
|
sync_timestamp = get_system_time();
|
||||||
}
|
}
|
||||||
@ -332,19 +325,6 @@ namespace vk
|
|||||||
/**
|
/**
|
||||||
* Flush
|
* Flush
|
||||||
*/
|
*/
|
||||||
void synchronize(bool blocking, vk::command_buffer& cmd, VkQueue submit_queue)
|
|
||||||
{
|
|
||||||
if (synchronized)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (m_device == nullptr)
|
|
||||||
{
|
|
||||||
m_device = &cmd.get_command_pool().get_owner();
|
|
||||||
}
|
|
||||||
|
|
||||||
copy_texture(cmd, blocking, submit_queue);
|
|
||||||
}
|
|
||||||
|
|
||||||
void* map_synchronized(u32 offset, u32 size)
|
void* map_synchronized(u32 offset, u32 size)
|
||||||
{
|
{
|
||||||
AUDIT(synchronized);
|
AUDIT(synchronized);
|
||||||
@ -1104,6 +1084,44 @@ namespace vk
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void prepare_for_dma_transfers(vk::command_buffer& cmd) override
|
||||||
|
{
|
||||||
|
if (!cmd.is_recording())
|
||||||
|
{
|
||||||
|
cmd.begin();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cleanup_after_dma_transfers(vk::command_buffer& cmd) override
|
||||||
|
{
|
||||||
|
// End recording
|
||||||
|
cmd.end();
|
||||||
|
|
||||||
|
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
|
||||||
|
{
|
||||||
|
// Primary access command queue, must restart it after
|
||||||
|
VkFence submit_fence;
|
||||||
|
VkFenceCreateInfo info{};
|
||||||
|
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||||
|
vkCreateFence(*m_device, &info, nullptr, &submit_fence);
|
||||||
|
|
||||||
|
cmd.submit(m_submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
||||||
|
|
||||||
|
vk::wait_for_fence(submit_fence, GENERAL_WAIT_TIMEOUT);
|
||||||
|
vkDestroyFence(*m_device, submit_fence, nullptr);
|
||||||
|
|
||||||
|
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
|
||||||
|
cmd.begin();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Auxilliary command queue with auto-restart capability
|
||||||
|
cmd.submit(m_submit_queue, {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(HERE), cmd.flags == 0;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using baseclass::texture_cache;
|
using baseclass::texture_cache;
|
||||||
|
|
||||||
@ -1181,12 +1199,6 @@ namespace vk
|
|||||||
baseclass::on_frame_end();
|
baseclass::on_frame_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename RsxTextureType>
|
|
||||||
sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
|
|
||||||
{
|
|
||||||
return upload_texture(cmd, tex, m_rtts, const_cast<const VkQueue>(m_submit_queue));
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height)
|
vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height)
|
||||||
{
|
{
|
||||||
if (!m_formats_support.bgra8_linear)
|
if (!m_formats_support.bgra8_linear)
|
||||||
@ -1243,13 +1255,13 @@ namespace vk
|
|||||||
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
|
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
|
||||||
{
|
{
|
||||||
blitter helper;
|
blitter helper;
|
||||||
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, const_cast<const VkQueue>(m_submit_queue));
|
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper);
|
||||||
|
|
||||||
if (reply.succeeded)
|
if (reply.succeeded)
|
||||||
{
|
{
|
||||||
if (reply.real_dst_size)
|
if (reply.real_dst_size)
|
||||||
{
|
{
|
||||||
flush_if_cache_miss_likely(cmd, reply.to_address_range(), m_submit_queue);
|
flush_if_cache_miss_likely(cmd, reply.to_address_range());
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
Loading…
Reference in New Issue
Block a user