1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 10:42:36 +01:00

rsx: Improvements to memory flush mechanism

- Batch dma transfers whenever possible and do them in one go
- vk: Always ensure that queued dma transfers are visible to the GPU before they are needed by the host
  Requires a little refactoring to allow proper communication of the commandbuffer state
- vk: Code cleanup, the simplified mechanism makes it so that its not necessary to pass tons of args to methods
- vk: Fixup - do not forcefully do dma transfers on sections in an invalidation zone! They may have been speculated correctly already
This commit is contained in:
kd-11 2019-03-16 12:14:11 +03:00 committed by kd-11
parent 385485204b
commit 5260f4b47d
8 changed files with 178 additions and 140 deletions

View File

@ -364,6 +364,8 @@ namespace rsx
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0;
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;
virtual void prepare_for_dma_transfers(commandbuffer_type&) = 0;
virtual void cleanup_after_dma_transfers(commandbuffer_type&) = 0;
public:
virtual void destroy() = 0;
@ -397,13 +399,13 @@ namespace rsx
template <typename... Args>
void err_once(const char* fmt, const Args&... params)
{
logs::RSX.error(fmt, params...);
emit_once(true, fmt, params...);
}
template <typename... Args>
void warn_once(const char* fmt, const Args&... params)
{
logs::RSX.warning(fmt, params...);
emit_once(false, fmt, params...);
}
/**
@ -458,19 +460,40 @@ namespace rsx
});
}
rsx::simple_array<section_storage_type*> sections_to_transfer;
for (auto &surface : data.sections_to_flush)
{
if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
if (!surface->is_synchronized())
{
sections_to_transfer.push_back(surface);
}
else if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
{
// This region is set to always read from itself (unavoidable hard sync)
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp())
if (ROP_timestamp > surface->get_sync_timestamp())
{
surface->copy_texture(cmd, true, std::forward<Args>(extras)...);
sections_to_transfer.push_back(surface);
}
}
}
surface->flush(cmd, std::forward<Args>(extras)...);
if (!sections_to_transfer.empty())
{
// Batch all hard faults together
prepare_for_dma_transfers(cmd);
for (auto &surface : sections_to_transfer)
{
surface->copy_texture(cmd, true, std::forward<Args>(extras)...);
}
cleanup_after_dma_transfers(cmd);
}
for (auto &surface : data.sections_to_flush)
{
surface->flush();
// Exclude this region when flushing other sections that should not trample it
// If we overlap an excluded RO, set it as dirty
@ -1224,7 +1247,7 @@ namespace rsx
}
template <typename ...FlushArgs, typename ...Args>
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple<FlushArgs...>&& flush_extras, Args&&... extras)
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, Args&&... extras)
{
AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled
@ -1244,10 +1267,7 @@ namespace rsx
if (!region.is_locked() || region.get_context() != texture_upload_context::framebuffer_storage)
{
// Invalidate sections from surface cache occupying same address range
std::apply(&texture_cache::invalidate_range_impl_base<FlushArgs...>, std::tuple_cat(
std::forward_as_tuple(this, cmd, rsx_range, invalidation_cause::superseded_by_fbo),
std::forward<std::tuple<FlushArgs...> >(flush_extras)
));
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::superseded_by_fbo);
}
if (!region.is_locked() || region.can_be_reused())

View File

@ -1303,14 +1303,10 @@ namespace rsx
return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always;
}
void on_flush(bool miss)
void on_flush()
{
speculatively_flushed = false;
if (miss)
{
m_tex_cache->on_miss(*derived());
}
m_tex_cache->on_flush();
if (tracked_by_predictor())
@ -1328,6 +1324,12 @@ namespace rsx
m_tex_cache->on_speculative_flush();
}
void on_miss()
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
m_tex_cache->on_miss(*derived());
}
void touch(u64 tag)
{
last_write_tag = tag;
@ -1454,11 +1456,9 @@ namespace rsx
public:
// Returns false if there was a cache miss
template <typename ...Args>
bool flush(Args&&... extras)
void flush()
{
if (flushed) return true;
bool miss = false;
if (flushed) return;
// Sanity checks
ASSERT(exists());
@ -1469,19 +1469,12 @@ namespace rsx
{
flushed = true;
flush_exclusions.clear();
on_flush(miss);
return !miss;
on_flush();
return;
}
// If we are not synchronized, we must synchronize before proceeding (hard fault)
if (!synchronized)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
derived()->synchronize(true, std::forward<Args>(extras)...);
miss = true;
ASSERT(synchronized); // TODO ruipin: This might be possible in OGL. Revisit
}
// NOTE: Hard faults should have been pre-processed beforehand
ASSERT(synchronized);
// Copy flush result to guest memory
imp_flush();
@ -1491,9 +1484,7 @@ namespace rsx
flushed = true;
derived()->finish_flush();
flush_exclusions.clear();
on_flush(miss);
return !miss;
on_flush();
}
void add_flush_exclusion(const address_range& rng)

View File

@ -392,7 +392,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
// Mark buffer regions as NO_ACCESS on Cell-visible side
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes);
color_format.format, color_format.type, color_format.swap_bytes);
}
else
{
@ -407,7 +407,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true);
depth_format_gl.format, depth_format_gl.type, true);
}
else
{

View File

@ -217,14 +217,18 @@ namespace gl
}
}
void copy_texture(gl::command_context& cmd, bool manage_lifetime)
void copy_texture(gl::command_context& cmd, bool miss)
{
ASSERT(exists());
if (!manage_lifetime)
if (LIKELY(!miss))
{
baseclass::on_speculative_flush();
}
else
{
baseclass::on_miss();
}
if (context == rsx::texture_upload_context::framebuffer_storage)
{
@ -347,15 +351,6 @@ namespace gl
/**
* Flush
*/
void synchronize(bool blocking, gl::command_context& cmd)
{
if (synchronized)
return;
verify(HERE), cmd.drv;
copy_texture(cmd, blocking);
}
void* map_synchronized(u32 offset, u32 size)
{
AUDIT(synchronized && !m_fence.is_empty());
@ -642,7 +637,7 @@ namespace gl
if (src)
{
//Format mismatch
err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt);
warn_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt);
}
//Apply base component map onto the new texture if a data cast has been done
@ -992,6 +987,12 @@ namespace gl
}
}
void prepare_for_dma_transfers(gl::command_context&) override
{}
void cleanup_after_dma_transfers(gl::command_context&) override
{}
public:
using baseclass::texture_cache;

View File

@ -855,7 +855,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
std::lock_guard lock(m_secondary_cb_guard);
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause, m_swapchain->get_graphics_queue()));
result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause));
}
if (!result.violation_handled)
@ -897,7 +897,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
m_flush_requests.producer_wait();
}
m_texture_cache.flush_all(m_secondary_command_buffer, result, m_swapchain->get_graphics_queue());
m_texture_cache.flush_all(m_secondary_command_buffer, result);
if (has_queue_ref)
{
@ -913,7 +913,7 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
{
std::lock_guard lock(m_secondary_cb_guard);
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap, m_swapchain->get_graphics_queue()));
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap));
AUDIT(data.empty());
if (data.violation_handled)
@ -1454,7 +1454,7 @@ void VKGSRender::end()
if (rsx::method_registers.fragment_textures[i].enabled())
{
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts);
const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 ||
@ -1526,7 +1526,7 @@ void VKGSRender::end()
if (rsx::method_registers.vertex_textures[i].enabled())
{
*sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
*sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts);
bool replace = !vs_sampler_handles[i];
const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN);
@ -1725,7 +1725,7 @@ void VKGSRender::end()
m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id);
m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer;
m_current_command_buffer->flags |= cb_has_occlusion_task;
m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task;
}
// Apply write memory barriers
@ -1796,7 +1796,6 @@ void VKGSRender::end()
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
}
m_current_command_buffer->num_draws++;
m_rtts.on_write();
rsx::thread::end();
@ -2187,7 +2186,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint)
{
if (hint == rsx::FIFO_hint::hint_conditional_render_eval)
{
if (m_current_command_buffer->flags & cb_has_occlusion_task)
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)
{
// Occlusion test result evaluation is coming up, avoid a hard sync
if (!m_flush_requests.pending())
@ -2881,7 +2880,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range, m_swapchain->get_graphics_queue());
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range);
}
m_surface_info[i].address = m_surface_info[i].pitch = 0;
@ -2898,7 +2897,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range);
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
@ -2944,6 +2943,12 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_texture_cache.notify_surface_changed(layout.zeta_address);
}
// Before messing with memory properties, flush command queue if there are dma transfers queued up
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
flush_command_queue();
}
const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format);
for (u8 index : m_draw_buffers)
{
@ -2953,11 +2958,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (g_cfg.video.write_color_buffers)
{
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second);
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second);
}
else
{
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
}
}
@ -2968,11 +2973,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, gcm_format, false);
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false);
}
else
{
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range);
}
}
@ -3323,21 +3328,22 @@ void VKGSRender::flip(int buffer)
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
bool flush_queue = false;
for (const auto & section : overlap)
{
section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue());
flush_queue = true;
if (!section->is_synchronized())
{
section->copy_texture(*m_current_command_buffer, true);
}
}
if (flush_queue)
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
// Submit for processing to lower hard fault penalty
flush_command_queue();
}
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read, m_swapchain->get_graphics_queue());
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read);
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
}
}
@ -3487,16 +3493,15 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
//Verify enough memory exists before attempting to handle data transfer
check_heap_status();
const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes();
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
{
m_samplers_dirty.store(true);
m_current_command_buffer->flags |= cb_has_blit_transfer;
m_current_command_buffer->set_flag(vk::command_buffer::cb_has_blit_transfer);
if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count)
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
// A speculative write happened, flush while the dma resource is valid
// TODO: Deeper investigation as to why this can trigger problems
// A dma transfer has been queued onto this cb
// This likely means that we're done with the tranfers to the target (writes_likely_completed=1)
flush_command_queue();
}
return true;

View File

@ -48,20 +48,11 @@ namespace vk
extern u64 get_system_time();
enum command_buffer_data_flag
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2
};
struct command_buffer_chunk: public vk::command_buffer
{
VkFence submit_fence = VK_NULL_HANDLE;
VkDevice m_device = VK_NULL_HANDLE;
u32 num_draws = 0;
u32 flags = 0;
std::atomic_bool pending = { false };
std::atomic<u64> last_sync = { 0 };
shared_mutex guard_mutex;
@ -100,8 +91,6 @@ struct command_buffer_chunk: public vk::command_buffer
wait(FRAME_PRESENT_TIMEOUT);
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
num_draws = 0;
flags = 0;
}
bool poke()

View File

@ -1168,6 +1168,14 @@ namespace vk
}
access_hint = flush_only;
enum command_buffer_data_flag : u32
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4
};
u32 flags = 0;
public:
command_buffer() {}
~command_buffer() {}
@ -1206,6 +1214,16 @@ namespace vk
return *pool;
}
void clear_flags()
{
flags = 0;
}
void set_flag(command_buffer_data_flag flag)
{
flags |= flag;
}
operator VkCommandBuffer() const
{
return commands;
@ -1278,6 +1296,8 @@ namespace vk
acquire_global_submit_lock();
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
release_global_submit_lock();
clear_flags();
}
};

View File

@ -66,11 +66,21 @@ namespace vk
managed_texture.reset(vram_texture);
}
//Even if we are managing the same vram section, we cannot guarantee contents are static
//The create method is only invoked when a new managed session is required
synchronized = false;
flushed = false;
sync_timestamp = 0ull;
if (synchronized)
{
// Even if we are managing the same vram section, we cannot guarantee contents are static
// The create method is only invoked when a new managed session is required
if (!flushed)
{
// Reset fence
verify(HERE), m_device, dma_buffer, dma_fence != VK_NULL_HANDLE;
vkResetEvent(*m_device, dma_fence);
}
synchronized = false;
flushed = false;
sync_timestamp = 0ull;
}
// Notify baseclass
baseclass::on_section_resources_created();
@ -148,14 +158,18 @@ namespace vk
return flushed;
}
void copy_texture(vk::command_buffer& cmd, bool manage_cb_lifetime, VkQueue submit_queue)
void copy_texture(vk::command_buffer& cmd, bool miss)
{
ASSERT(exists());
if (!manage_cb_lifetime)
if (LIKELY(!miss))
{
baseclass::on_speculative_flush();
}
else
{
baseclass::on_miss();
}
if (m_device == nullptr)
{
@ -175,11 +189,6 @@ namespace vk
dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
}
if (manage_cb_lifetime)
{
cmd.begin();
}
if (context == rsx::texture_upload_context::framebuffer_storage)
{
auto as_rtt = static_cast<vk::render_target*>(vram_texture);
@ -295,36 +304,20 @@ namespace vk
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, &copy);
}
if (manage_cb_lifetime)
if (LIKELY(!miss))
{
VkFence submit_fence;
VkFenceCreateInfo create_info{};
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &create_info, nullptr, &submit_fence);
cmd.end();
cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
// Now we need to restart the command-buffer to restore it to the way it was before...
vk::wait_for_fence(submit_fence);
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
// Cleanup
vkDestroyFence(*m_device, submit_fence, nullptr);
vkSetEvent(*m_device, dma_fence);
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
{
// If this is a primary CB, restart it
cmd.begin();
}
// If this is speculated, it should only occur once
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
}
else
{
// Only used when doing speculation
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
// This is the only acceptable situation where a sync can occur twice, due to flush_always being set
vkResetEvent(*m_device, dma_fence);
}
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
synchronized = true;
sync_timestamp = get_system_time();
}
@ -332,19 +325,6 @@ namespace vk
/**
* Flush
*/
void synchronize(bool blocking, vk::command_buffer& cmd, VkQueue submit_queue)
{
if (synchronized)
return;
if (m_device == nullptr)
{
m_device = &cmd.get_command_pool().get_owner();
}
copy_texture(cmd, blocking, submit_queue);
}
void* map_synchronized(u32 offset, u32 size)
{
AUDIT(synchronized);
@ -1104,6 +1084,44 @@ namespace vk
}
}
void prepare_for_dma_transfers(vk::command_buffer& cmd) override
{
if (!cmd.is_recording())
{
cmd.begin();
}
}
void cleanup_after_dma_transfers(vk::command_buffer& cmd) override
{
// End recording
cmd.end();
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
{
// Primary access command queue, must restart it after
VkFence submit_fence;
VkFenceCreateInfo info{};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &info, nullptr, &submit_fence);
cmd.submit(m_submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
vk::wait_for_fence(submit_fence, GENERAL_WAIT_TIMEOUT);
vkDestroyFence(*m_device, submit_fence, nullptr);
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
cmd.begin();
}
else
{
// Auxilliary command queue with auto-restart capability
cmd.submit(m_submit_queue, {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
verify(HERE), cmd.flags == 0;
}
public:
using baseclass::texture_cache;
@ -1181,12 +1199,6 @@ namespace vk
baseclass::on_frame_end();
}
template<typename RsxTextureType>
sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
{
return upload_texture(cmd, tex, m_rtts, const_cast<const VkQueue>(m_submit_queue));
}
vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height)
{
if (!m_formats_support.bgra8_linear)
@ -1243,13 +1255,13 @@ namespace vk
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
{
blitter helper;
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, const_cast<const VkQueue>(m_submit_queue));
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper);
if (reply.succeeded)
{
if (reply.real_dst_size)
{
flush_if_cache_miss_likely(cmd, reply.to_address_range(), m_submit_queue);
flush_if_cache_miss_likely(cmd, reply.to_address_range());
}
return true;