1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2025-01-31 20:41:45 +01:00

vk/gl: Synchronization improvements

- Properly wait for the buffer transfer operation to finish before map/readback!
- Change vkFence to vkEvent which works more like a GL fence which is what is needed.
- Implement supporting methods and functions
- Do not destroy fence by immediately waiting after copying to dma buffer
This commit is contained in:
kd-11 2019-03-14 15:27:50 +03:00 committed by kd-11
parent 85cb703633
commit a49a0f2a86
7 changed files with 99 additions and 87 deletions

View File

@ -2670,6 +2670,7 @@ namespace rsx
else
{
verify(HERE), dst_is_render_target;
dst_subres.surface->on_write();
}
if (rsx::get_resolution_scale_percent() != 100)

View File

@ -61,7 +61,6 @@ namespace gl
texture::format format = texture::format::rgba;
texture::type type = texture::type::ubyte;
rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample;
u8 get_pixel_size(texture::format fmt_, texture::type type_)
{
@ -157,7 +156,7 @@ namespace gl
using baseclass::cached_texture_section;
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes)
gl::texture::format gl_format = gl::texture::format::rgba, gl::texture::type gl_type = gl::texture::type::ubyte, bool swap_bytes = false)
{
auto new_texture = static_cast<gl::viewable_image*>(image);
ASSERT(!exists() || !is_managed() || vram_texture == new_texture);
@ -166,11 +165,9 @@ namespace gl
if (read_only)
{
managed_texture.reset(vram_texture);
aa_mode = rsx::surface_antialiasing::center_1_sample;
}
else
{
aa_mode = static_cast<gl::render_target*>(image)->read_aa_mode;
ASSERT(managed_texture.get() == nullptr);
}
@ -193,28 +190,6 @@ namespace gl
baseclass::on_section_resources_created();
}
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps, u16 pitch)
{
ASSERT(!exists() || !is_managed() || vram_texture == image);
verify(HERE), pitch;
//Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle
this->width = width;
this->height = height;
this->depth = depth;
this->mipmaps = mipmaps;
managed_texture.reset(image);
vram_texture = image;
rsx_pitch = pitch;
real_pitch = 0;
// Notify baseclass
baseclass::on_section_resources_created();
}
void set_dimensions(u32 width, u32 height, u32 /*depth*/, u32 pitch)
{
this->width = width;
@ -264,7 +239,9 @@ namespace gl
u32 real_width = width;
u32 real_height = height;
switch (aa_mode)
if (context == rsx::texture_upload_context::framebuffer_storage)
{
switch (static_cast<gl::render_target*>(vram_texture)->read_aa_mode)
{
case rsx::surface_antialiasing::center_1_sample:
break;
@ -276,6 +253,7 @@ namespace gl
real_height *= 2;
break;
}
}
areai src_area = { 0, 0, 0, 0 };
const areai dst_area = { 0, 0, (s32)real_width, (s32)real_height };
@ -376,16 +354,13 @@ namespace gl
verify(HERE), cmd.drv;
copy_texture(cmd, blocking);
if (blocking)
{
m_fence.wait_for_signal();
}
}
void* map_synchronized(u32 offset, u32 size)
{
AUDIT(synchronized);
AUDIT(synchronized && !m_fence.is_empty());
m_fence.wait_for_signal();
verify(HERE), (offset + size) <= pbo_size;
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
@ -894,7 +869,7 @@ namespace gl
cached.set_image_type(type);
cached.set_gcm_format(gcm_format);
cached.create_read_only(image, width, height, depth, mipmaps, pitch);
cached.create(width, height, depth, mipmaps, image, pitch, true);
cached.set_dirty(false);
if (context != rsx::texture_upload_context::blit_engine_dst)

View File

@ -871,10 +871,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
const bool is_rsxthr = std::this_thread::get_id() == m_rsx_thread;
bool has_queue_ref = false;
u64 sync_timestamp = 0ull;
for (const auto& tex : result.sections_to_flush)
sync_timestamp = std::max(sync_timestamp, tex->get_sync_timestamp());
if (!is_rsxthr)
{
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
@ -882,7 +878,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
std::lock_guard lock(m_flush_queue_mutex);
m_flush_requests.post(sync_timestamp == 0ull);
m_flush_requests.post(false);
has_queue_ref = true;
}
else if (!vk::is_uninterruptible())
@ -895,33 +891,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
//LOG_ERROR(RSX, "Fault in uninterruptible code!");
}
if (sync_timestamp > 0)
{
// Wait for earliest cb submitted after the sync timestamp to finish
command_buffer_chunk *target_cb = nullptr;
for (auto &cb : m_primary_cb_list)
{
if (cb.last_sync >= sync_timestamp)
{
if (!cb.pending)
{
target_cb = nullptr;
break;
}
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
{
target_cb = &cb;
}
}
}
if (target_cb)
{
target_cb->wait(GENERAL_WAIT_TIMEOUT);
}
}
if (has_queue_ref)
{
//Wait for the RSX thread to process request if it hasn't already
@ -3520,9 +3489,18 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
//Verify enough memory exists before attempting to handle data transfer
check_heap_status();
const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes();
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
{
m_samplers_dirty.store(true);
m_current_command_buffer->flags |= cb_has_blit_transfer;
if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count)
{
// A speculative write happened, flush while the dma resource is valid
// TODO: Deeper investigation as to why this can trigger problems
flush_command_queue();
}
return true;
}

View File

@ -50,7 +50,8 @@ extern u64 get_system_time();
enum command_buffer_data_flag
{
cb_has_occlusion_task = 1
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2
};
struct command_buffer_chunk: public vk::command_buffer

View File

@ -654,6 +654,42 @@ namespace vk
}
}
VkResult wait_for_event(VkEvent event, u64 timeout)
{
u64 t = 0;
while (true)
{
switch (const auto status = vkGetEventStatus(*g_current_renderer, event))
{
case VK_EVENT_SET:
return VK_SUCCESS;
case VK_EVENT_RESET:
break;
default:
die_with_error(HERE, status);
return status;
}
if (timeout)
{
if (!t)
{
t = get_system_time();
continue;
}
if ((get_system_time() - t) > timeout)
{
LOG_ERROR(RSX, "[vulkan] vk::wait_for_event has timed out!");
return VK_TIMEOUT;
}
}
//std::this_thread::yield();
_mm_pause();
}
}
void die_with_error(const char* faulting_addr, VkResult error_code)
{
std::string error_message;

View File

@ -181,6 +181,7 @@ namespace vk
// Fence reset with driver workarounds in place
void reset_fence(VkFence *pFence);
VkResult wait_for_fence(VkFence pFence, u64 timeout = 0ull);
VkResult wait_for_event(VkEvent pEvent, u64 timeout = 0ull);
void die_with_error(const char* faulting_addr, VkResult error_code);

View File

@ -36,7 +36,7 @@ namespace vk
std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
//DMA relevant data
VkFence dma_fence = VK_NULL_HANDLE;
VkEvent dma_fence = VK_NULL_HANDLE;
vk::render_device* m_device = nullptr;
vk::viewable_image *vram_texture = nullptr;
std::unique_ptr<vk::buffer> dma_buffer;
@ -82,9 +82,9 @@ namespace vk
{
dma_buffer.reset();
if (dma_fence != nullptr)
if (dma_fence != VK_NULL_HANDLE)
{
vkDestroyFence(*m_device, dma_fence, nullptr);
vkDestroyEvent(*m_device, dma_fence, nullptr);
dma_fence = VK_NULL_HANDLE;
}
}
@ -164,9 +164,9 @@ namespace vk
if (dma_fence == VK_NULL_HANDLE)
{
VkFenceCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &createInfo, nullptr, &dma_fence);
VkEventCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence);
}
if (dma_buffer.get() == nullptr)
@ -297,17 +297,33 @@ namespace vk
if (manage_cb_lifetime)
{
VkFence submit_fence;
VkFenceCreateInfo create_info{};
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &create_info, nullptr, &submit_fence);
cmd.end();
cmd.submit(submit_queue, {}, dma_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
// Now we need to restart the command-buffer to restore it to the way it was before...
vk::wait_for_fence(dma_fence);
vk::reset_fence(&dma_fence);
vk::wait_for_fence(submit_fence);
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
// Cleanup
vkDestroyFence(*m_device, submit_fence, nullptr);
vkSetEvent(*m_device, dma_fence);
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
{
// If this is a primary CB, restart it
cmd.begin();
}
}
else
{
// Only used when doing speculation
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
}
synchronized = true;
sync_timestamp = get_system_time();
@ -333,6 +349,10 @@ namespace vk
{
AUDIT(synchronized);
// Synchronize, reset dma_fence after waiting
vk::wait_for_event(dma_fence, GENERAL_WAIT_TIMEOUT);
vkResetEvent(*m_device, dma_fence);
return dma_buffer->map(offset, size);
}