1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-26 04:32:35 +01:00

rsx: Improvements to memory flush mechanism

- Batch dma transfers whenever possible and do them in one go
- vk: Always ensure that queued dma transfers are visible to the GPU before they are needed by the host
  Requires a little refactoring to allow proper communication of the commandbuffer state
- vk: Code cleanup, the simplified mechanism makes it so that its not necessary to pass tons of args to methods
- vk: Fixup - do not forcefully do dma transfers on sections in an invalidation zone! They may have been speculated correctly already
This commit is contained in:
kd-11 2019-03-16 12:14:11 +03:00 committed by kd-11
parent 385485204b
commit 5260f4b47d
8 changed files with 178 additions and 140 deletions

View File

@ -364,6 +364,8 @@ namespace rsx
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0; virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0;
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0; virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0; virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;
virtual void prepare_for_dma_transfers(commandbuffer_type&) = 0;
virtual void cleanup_after_dma_transfers(commandbuffer_type&) = 0;
public: public:
virtual void destroy() = 0; virtual void destroy() = 0;
@ -397,13 +399,13 @@ namespace rsx
template <typename... Args> template <typename... Args>
void err_once(const char* fmt, const Args&... params) void err_once(const char* fmt, const Args&... params)
{ {
logs::RSX.error(fmt, params...); emit_once(true, fmt, params...);
} }
template <typename... Args> template <typename... Args>
void warn_once(const char* fmt, const Args&... params) void warn_once(const char* fmt, const Args&... params)
{ {
logs::RSX.warning(fmt, params...); emit_once(false, fmt, params...);
} }
/** /**
@ -458,19 +460,40 @@ namespace rsx
}); });
} }
rsx::simple_array<section_storage_type*> sections_to_transfer;
for (auto &surface : data.sections_to_flush) for (auto &surface : data.sections_to_flush)
{ {
if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always) if (!surface->is_synchronized())
{
sections_to_transfer.push_back(surface);
}
else if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
{ {
// This region is set to always read from itself (unavoidable hard sync) // This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp()) if (ROP_timestamp > surface->get_sync_timestamp())
{ {
surface->copy_texture(cmd, true, std::forward<Args>(extras)...); sections_to_transfer.push_back(surface);
} }
} }
}
surface->flush(cmd, std::forward<Args>(extras)...); if (!sections_to_transfer.empty())
{
// Batch all hard faults together
prepare_for_dma_transfers(cmd);
for (auto &surface : sections_to_transfer)
{
surface->copy_texture(cmd, true, std::forward<Args>(extras)...);
}
cleanup_after_dma_transfers(cmd);
}
for (auto &surface : data.sections_to_flush)
{
surface->flush();
// Exclude this region when flushing other sections that should not trample it // Exclude this region when flushing other sections that should not trample it
// If we overlap an excluded RO, set it as dirty // If we overlap an excluded RO, set it as dirty
@ -1224,7 +1247,7 @@ namespace rsx
} }
template <typename ...FlushArgs, typename ...Args> template <typename ...FlushArgs, typename ...Args>
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple<FlushArgs...>&& flush_extras, Args&&... extras) void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, Args&&... extras)
{ {
AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled
@ -1244,10 +1267,7 @@ namespace rsx
if (!region.is_locked() || region.get_context() != texture_upload_context::framebuffer_storage) if (!region.is_locked() || region.get_context() != texture_upload_context::framebuffer_storage)
{ {
// Invalidate sections from surface cache occupying same address range // Invalidate sections from surface cache occupying same address range
std::apply(&texture_cache::invalidate_range_impl_base<FlushArgs...>, std::tuple_cat( invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::superseded_by_fbo);
std::forward_as_tuple(this, cmd, rsx_range, invalidation_cause::superseded_by_fbo),
std::forward<std::tuple<FlushArgs...> >(flush_extras)
));
} }
if (!region.is_locked() || region.can_be_reused()) if (!region.is_locked() || region.can_be_reused())

View File

@ -1303,14 +1303,10 @@ namespace rsx
return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always; return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always;
} }
void on_flush(bool miss) void on_flush()
{ {
speculatively_flushed = false; speculatively_flushed = false;
if (miss)
{
m_tex_cache->on_miss(*derived());
}
m_tex_cache->on_flush(); m_tex_cache->on_flush();
if (tracked_by_predictor()) if (tracked_by_predictor())
@ -1328,6 +1324,12 @@ namespace rsx
m_tex_cache->on_speculative_flush(); m_tex_cache->on_speculative_flush();
} }
void on_miss()
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
m_tex_cache->on_miss(*derived());
}
void touch(u64 tag) void touch(u64 tag)
{ {
last_write_tag = tag; last_write_tag = tag;
@ -1454,11 +1456,9 @@ namespace rsx
public: public:
// Returns false if there was a cache miss // Returns false if there was a cache miss
template <typename ...Args> void flush()
bool flush(Args&&... extras)
{ {
if (flushed) return true; if (flushed) return;
bool miss = false;
// Sanity checks // Sanity checks
ASSERT(exists()); ASSERT(exists());
@ -1469,19 +1469,12 @@ namespace rsx
{ {
flushed = true; flushed = true;
flush_exclusions.clear(); flush_exclusions.clear();
on_flush(miss); on_flush();
return !miss; return;
} }
// If we are not synchronized, we must synchronize before proceeding (hard fault) // NOTE: Hard faults should have been pre-processed beforehand
if (!synchronized) ASSERT(synchronized);
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
derived()->synchronize(true, std::forward<Args>(extras)...);
miss = true;
ASSERT(synchronized); // TODO ruipin: This might be possible in OGL. Revisit
}
// Copy flush result to guest memory // Copy flush result to guest memory
imp_flush(); imp_flush();
@ -1491,9 +1484,7 @@ namespace rsx
flushed = true; flushed = true;
derived()->finish_flush(); derived()->finish_flush();
flush_exclusions.clear(); flush_exclusions.clear();
on_flush(miss); on_flush();
return !miss;
} }
void add_flush_exclusion(const address_range& rng) void add_flush_exclusion(const address_range& rng)

View File

@ -392,7 +392,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{ {
// Mark buffer regions as NO_ACCESS on Cell-visible side // Mark buffer regions as NO_ACCESS on Cell-visible side
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes); color_format.format, color_format.type, color_format.swap_bytes);
} }
else else
{ {
@ -407,7 +407,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{ {
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true); depth_format_gl.format, depth_format_gl.type, true);
} }
else else
{ {

View File

@ -217,14 +217,18 @@ namespace gl
} }
} }
void copy_texture(gl::command_context& cmd, bool manage_lifetime) void copy_texture(gl::command_context& cmd, bool miss)
{ {
ASSERT(exists()); ASSERT(exists());
if (!manage_lifetime) if (LIKELY(!miss))
{ {
baseclass::on_speculative_flush(); baseclass::on_speculative_flush();
} }
else
{
baseclass::on_miss();
}
if (context == rsx::texture_upload_context::framebuffer_storage) if (context == rsx::texture_upload_context::framebuffer_storage)
{ {
@ -347,15 +351,6 @@ namespace gl
/** /**
* Flush * Flush
*/ */
void synchronize(bool blocking, gl::command_context& cmd)
{
if (synchronized)
return;
verify(HERE), cmd.drv;
copy_texture(cmd, blocking);
}
void* map_synchronized(u32 offset, u32 size) void* map_synchronized(u32 offset, u32 size)
{ {
AUDIT(synchronized && !m_fence.is_empty()); AUDIT(synchronized && !m_fence.is_empty());
@ -642,7 +637,7 @@ namespace gl
if (src) if (src)
{ {
//Format mismatch //Format mismatch
err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt); warn_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt);
} }
//Apply base component map onto the new texture if a data cast has been done //Apply base component map onto the new texture if a data cast has been done
@ -992,6 +987,12 @@ namespace gl
} }
} }
void prepare_for_dma_transfers(gl::command_context&) override
{}
void cleanup_after_dma_transfers(gl::command_context&) override
{}
public: public:
using baseclass::texture_cache; using baseclass::texture_cache;

View File

@ -855,7 +855,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
std::lock_guard lock(m_secondary_cb_guard); std::lock_guard lock(m_secondary_cb_guard);
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read; const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause, m_swapchain->get_graphics_queue())); result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause));
} }
if (!result.violation_handled) if (!result.violation_handled)
@ -897,7 +897,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
m_flush_requests.producer_wait(); m_flush_requests.producer_wait();
} }
m_texture_cache.flush_all(m_secondary_command_buffer, result, m_swapchain->get_graphics_queue()); m_texture_cache.flush_all(m_secondary_command_buffer, result);
if (has_queue_ref) if (has_queue_ref)
{ {
@ -913,7 +913,7 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
{ {
std::lock_guard lock(m_secondary_cb_guard); std::lock_guard lock(m_secondary_cb_guard);
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap, m_swapchain->get_graphics_queue())); auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap));
AUDIT(data.empty()); AUDIT(data.empty());
if (data.violation_handled) if (data.violation_handled)
@ -1454,7 +1454,7 @@ void VKGSRender::end()
if (rsx::method_registers.fragment_textures[i].enabled()) if (rsx::method_registers.fragment_textures[i].enabled())
{ {
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 || const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 ||
@ -1526,7 +1526,7 @@ void VKGSRender::end()
if (rsx::method_registers.vertex_textures[i].enabled()) if (rsx::method_registers.vertex_textures[i].enabled())
{ {
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
bool replace = !vs_sampler_handles[i]; bool replace = !vs_sampler_handles[i];
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN); const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
@ -1725,7 +1725,7 @@ void VKGSRender::end()
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id); m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer; m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
m_current_command_buffer->flags |= cb_has_occlusion_task; m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task;
} }
// Apply write memory barriers // Apply write memory barriers
@ -1796,7 +1796,6 @@ void VKGSRender::end()
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id); m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
} }
m_current_command_buffer->num_draws++;
m_rtts.on_write(); m_rtts.on_write();
rsx::thread::end(); rsx::thread::end();
@ -2187,7 +2186,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint)
{ {
if (hint == rsx::FIFO_hint::hint_conditional_render_eval) if (hint == rsx::FIFO_hint::hint_conditional_render_eval)
{ {
if (m_current_command_buffer->flags & cb_has_occlusion_task) if (m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)
{ {
// Occlusion test result evaluation is coming up, avoid a hard sync // Occlusion test result evaluation is coming up, avoid a hard sync
if (!m_flush_requests.pending()) if (!m_flush_requests.pending())
@ -2881,7 +2880,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const utils::address_range rsx_range = m_surface_info[i].get_memory_range(); const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once); m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range, m_swapchain->get_graphics_queue()); m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range);
} }
m_surface_info[i].address = m_surface_info[i].pitch = 0; m_surface_info[i].address = m_surface_info[i].pitch = 0;
@ -2898,7 +2897,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format); auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range);
} }
m_depth_surface_info.address = m_depth_surface_info.pitch = 0; m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
@ -2944,6 +2943,12 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_texture_cache.notify_surface_changed(layout.zeta_address); m_texture_cache.notify_surface_changed(layout.zeta_address);
} }
// Before messing with memory properties, flush command queue if there are dma transfers queued up
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
flush_command_queue();
}
const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format); const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format);
for (u8 index : m_draw_buffers) for (u8 index : m_draw_buffers)
{ {
@ -2953,11 +2958,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (g_cfg.video.write_color_buffers) if (g_cfg.video.write_color_buffers)
{ {
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second); m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second);
} }
else else
{ {
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
} }
} }
@ -2968,11 +2973,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{ {
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, gcm_format, false); m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false);
} }
else else
{ {
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
} }
} }
@ -3323,21 +3328,22 @@ void VKGSRender::flip(int buffer)
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height); const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage; const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask); const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
bool flush_queue = false;
for (const auto & section : overlap) for (const auto & section : overlap)
{ {
section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue()); if (!section->is_synchronized())
flush_queue = true; {
section->copy_texture(*m_current_command_buffer, true);
}
} }
if (flush_queue) if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{ {
// Submit for processing to lower hard fault penalty // Submit for processing to lower hard fault penalty
flush_command_queue(); flush_command_queue();
} }
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read, m_swapchain->get_graphics_queue()); m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read);
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height); image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
} }
} }
@ -3487,16 +3493,15 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
//Verify enough memory exists before attempting to handle data transfer //Verify enough memory exists before attempting to handle data transfer
check_heap_status(); check_heap_status();
const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes();
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer)) if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
{ {
m_samplers_dirty.store(true); m_samplers_dirty.store(true);
m_current_command_buffer->flags |= cb_has_blit_transfer; m_current_command_buffer->set_flag(vk::command_buffer::cb_has_blit_transfer);
if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count) if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{ {
// A speculative write happened, flush while the dma resource is valid // A dma transfer has been queued onto this cb
// TODO: Deeper investigation as to why this can trigger problems // This likely means that we're done with the tranfers to the target (writes_likely_completed=1)
flush_command_queue(); flush_command_queue();
} }
return true; return true;

View File

@ -48,20 +48,11 @@ namespace vk
extern u64 get_system_time(); extern u64 get_system_time();
enum command_buffer_data_flag
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2
};
struct command_buffer_chunk: public vk::command_buffer struct command_buffer_chunk: public vk::command_buffer
{ {
VkFence submit_fence = VK_NULL_HANDLE; VkFence submit_fence = VK_NULL_HANDLE;
VkDevice m_device = VK_NULL_HANDLE; VkDevice m_device = VK_NULL_HANDLE;
u32 num_draws = 0;
u32 flags = 0;
std::atomic_bool pending = { false }; std::atomic_bool pending = { false };
std::atomic<u64> last_sync = { 0 }; std::atomic<u64> last_sync = { 0 };
shared_mutex guard_mutex; shared_mutex guard_mutex;
@ -100,8 +91,6 @@ struct command_buffer_chunk: public vk::command_buffer
wait(FRAME_PRESENT_TIMEOUT); wait(FRAME_PRESENT_TIMEOUT);
CHECK_RESULT(vkResetCommandBuffer(commands, 0)); CHECK_RESULT(vkResetCommandBuffer(commands, 0));
num_draws = 0;
flags = 0;
} }
bool poke() bool poke()

View File

@ -1168,6 +1168,14 @@ namespace vk
} }
access_hint = flush_only; access_hint = flush_only;
enum command_buffer_data_flag : u32
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4
};
u32 flags = 0;
public: public:
command_buffer() {} command_buffer() {}
~command_buffer() {} ~command_buffer() {}
@ -1206,6 +1214,16 @@ namespace vk
return *pool; return *pool;
} }
void clear_flags()
{
flags = 0;
}
void set_flag(command_buffer_data_flag flag)
{
flags |= flag;
}
operator VkCommandBuffer() const operator VkCommandBuffer() const
{ {
return commands; return commands;
@ -1278,6 +1296,8 @@ namespace vk
acquire_global_submit_lock(); acquire_global_submit_lock();
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
release_global_submit_lock(); release_global_submit_lock();
clear_flags();
} }
}; };

View File

@ -66,11 +66,21 @@ namespace vk
managed_texture.reset(vram_texture); managed_texture.reset(vram_texture);
} }
//Even if we are managing the same vram section, we cannot guarantee contents are static if (synchronized)
//The create method is only invoked when a new managed session is required {
synchronized = false; // Even if we are managing the same vram section, we cannot guarantee contents are static
flushed = false; // The create method is only invoked when a new managed session is required
sync_timestamp = 0ull; if (!flushed)
{
// Reset fence
verify(HERE), m_device, dma_buffer, dma_fence != VK_NULL_HANDLE;
vkResetEvent(*m_device, dma_fence);
}
synchronized = false;
flushed = false;
sync_timestamp = 0ull;
}
// Notify baseclass // Notify baseclass
baseclass::on_section_resources_created(); baseclass::on_section_resources_created();
@ -148,14 +158,18 @@ namespace vk
return flushed; return flushed;
} }
void copy_texture(vk::command_buffer& cmd, bool manage_cb_lifetime, VkQueue submit_queue) void copy_texture(vk::command_buffer& cmd, bool miss)
{ {
ASSERT(exists()); ASSERT(exists());
if (!manage_cb_lifetime) if (LIKELY(!miss))
{ {
baseclass::on_speculative_flush(); baseclass::on_speculative_flush();
} }
else
{
baseclass::on_miss();
}
if (m_device == nullptr) if (m_device == nullptr)
{ {
@ -175,11 +189,6 @@ namespace vk
dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
} }
if (manage_cb_lifetime)
{
cmd.begin();
}
if (context == rsx::texture_upload_context::framebuffer_storage) if (context == rsx::texture_upload_context::framebuffer_storage)
{ {
auto as_rtt = static_cast<vk::render_target*>(vram_texture); auto as_rtt = static_cast<vk::render_target*>(vram_texture);
@ -295,36 +304,20 @@ namespace vk
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, &copy); vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, &copy);
} }
if (manage_cb_lifetime) if (LIKELY(!miss))
{ {
VkFence submit_fence; // If this is speculated, it should only occur once
VkFenceCreateInfo create_info{}; verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &create_info, nullptr, &submit_fence);
cmd.end();
cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
// Now we need to restart the command-buffer to restore it to the way it was before...
vk::wait_for_fence(submit_fence);
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
// Cleanup
vkDestroyFence(*m_device, submit_fence, nullptr);
vkSetEvent(*m_device, dma_fence);
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
{
// If this is a primary CB, restart it
cmd.begin();
}
} }
else else
{ {
// Only used when doing speculation // This is the only acceptable situation where a sync can occur twice, due to flush_always being set
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET; vkResetEvent(*m_device, dma_fence);
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
} }
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
synchronized = true; synchronized = true;
sync_timestamp = get_system_time(); sync_timestamp = get_system_time();
} }
@ -332,19 +325,6 @@ namespace vk
/** /**
* Flush * Flush
*/ */
void synchronize(bool blocking, vk::command_buffer& cmd, VkQueue submit_queue)
{
if (synchronized)
return;
if (m_device == nullptr)
{
m_device = &cmd.get_command_pool().get_owner();
}
copy_texture(cmd, blocking, submit_queue);
}
void* map_synchronized(u32 offset, u32 size) void* map_synchronized(u32 offset, u32 size)
{ {
AUDIT(synchronized); AUDIT(synchronized);
@ -1104,6 +1084,44 @@ namespace vk
} }
} }
void prepare_for_dma_transfers(vk::command_buffer& cmd) override
{
if (!cmd.is_recording())
{
cmd.begin();
}
}
void cleanup_after_dma_transfers(vk::command_buffer& cmd) override
{
// End recording
cmd.end();
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
{
// Primary access command queue, must restart it after
VkFence submit_fence;
VkFenceCreateInfo info{};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &info, nullptr, &submit_fence);
cmd.submit(m_submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
vk::wait_for_fence(submit_fence, GENERAL_WAIT_TIMEOUT);
vkDestroyFence(*m_device, submit_fence, nullptr);
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
cmd.begin();
}
else
{
// Auxilliary command queue with auto-restart capability
cmd.submit(m_submit_queue, {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
verify(HERE), cmd.flags == 0;
}
public: public:
using baseclass::texture_cache; using baseclass::texture_cache;
@ -1181,12 +1199,6 @@ namespace vk
baseclass::on_frame_end(); baseclass::on_frame_end();
} }
template<typename RsxTextureType>
sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
{
return upload_texture(cmd, tex, m_rtts, const_cast<const VkQueue>(m_submit_queue));
}
vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height) vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height)
{ {
if (!m_formats_support.bgra8_linear) if (!m_formats_support.bgra8_linear)
@ -1243,13 +1255,13 @@ namespace vk
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
{ {
blitter helper; blitter helper;
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, const_cast<const VkQueue>(m_submit_queue)); auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper);
if (reply.succeeded) if (reply.succeeded)
{ {
if (reply.real_dst_size) if (reply.real_dst_size)
{ {
flush_if_cache_miss_likely(cmd, reply.to_address_range(), m_submit_queue); flush_if_cache_miss_likely(cmd, reply.to_address_range());
} }
return true; return true;