mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-31 12:31:45 +01:00
vk/gl: Synchronization improvements
- Properly wait for the buffer transfer operation to finish before map/readback! - Change vkFence to vkEvent which works more like a GL fence which is what is needed. - Implement supporting methods and functions - Do not destroy fence by immediately waiting after copying to dma buffer
This commit is contained in:
parent
85cb703633
commit
a49a0f2a86
@ -2670,6 +2670,7 @@ namespace rsx
|
||||
else
|
||||
{
|
||||
verify(HERE), dst_is_render_target;
|
||||
dst_subres.surface->on_write();
|
||||
}
|
||||
|
||||
if (rsx::get_resolution_scale_percent() != 100)
|
||||
|
@ -61,7 +61,6 @@ namespace gl
|
||||
|
||||
texture::format format = texture::format::rgba;
|
||||
texture::type type = texture::type::ubyte;
|
||||
rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample;
|
||||
|
||||
u8 get_pixel_size(texture::format fmt_, texture::type type_)
|
||||
{
|
||||
@ -157,7 +156,7 @@ namespace gl
|
||||
using baseclass::cached_texture_section;
|
||||
|
||||
void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only,
|
||||
gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes)
|
||||
gl::texture::format gl_format = gl::texture::format::rgba, gl::texture::type gl_type = gl::texture::type::ubyte, bool swap_bytes = false)
|
||||
{
|
||||
auto new_texture = static_cast<gl::viewable_image*>(image);
|
||||
ASSERT(!exists() || !is_managed() || vram_texture == new_texture);
|
||||
@ -166,11 +165,9 @@ namespace gl
|
||||
if (read_only)
|
||||
{
|
||||
managed_texture.reset(vram_texture);
|
||||
aa_mode = rsx::surface_antialiasing::center_1_sample;
|
||||
}
|
||||
else
|
||||
{
|
||||
aa_mode = static_cast<gl::render_target*>(image)->read_aa_mode;
|
||||
ASSERT(managed_texture.get() == nullptr);
|
||||
}
|
||||
|
||||
@ -193,28 +190,6 @@ namespace gl
|
||||
baseclass::on_section_resources_created();
|
||||
}
|
||||
|
||||
void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps, u16 pitch)
|
||||
{
|
||||
ASSERT(!exists() || !is_managed() || vram_texture == image);
|
||||
|
||||
verify(HERE), pitch;
|
||||
|
||||
//Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle
|
||||
this->width = width;
|
||||
this->height = height;
|
||||
this->depth = depth;
|
||||
this->mipmaps = mipmaps;
|
||||
|
||||
managed_texture.reset(image);
|
||||
vram_texture = image;
|
||||
|
||||
rsx_pitch = pitch;
|
||||
real_pitch = 0;
|
||||
|
||||
// Notify baseclass
|
||||
baseclass::on_section_resources_created();
|
||||
}
|
||||
|
||||
void set_dimensions(u32 width, u32 height, u32 /*depth*/, u32 pitch)
|
||||
{
|
||||
this->width = width;
|
||||
@ -264,17 +239,20 @@ namespace gl
|
||||
u32 real_width = width;
|
||||
u32 real_height = height;
|
||||
|
||||
switch (aa_mode)
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
case rsx::surface_antialiasing::center_1_sample:
|
||||
break;
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
real_width *= 2;
|
||||
break;
|
||||
default:
|
||||
real_width *= 2;
|
||||
real_height *= 2;
|
||||
break;
|
||||
switch (static_cast<gl::render_target*>(vram_texture)->read_aa_mode)
|
||||
{
|
||||
case rsx::surface_antialiasing::center_1_sample:
|
||||
break;
|
||||
case rsx::surface_antialiasing::diagonal_centered_2_samples:
|
||||
real_width *= 2;
|
||||
break;
|
||||
default:
|
||||
real_width *= 2;
|
||||
real_height *= 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
areai src_area = { 0, 0, 0, 0 };
|
||||
@ -376,16 +354,13 @@ namespace gl
|
||||
|
||||
verify(HERE), cmd.drv;
|
||||
copy_texture(cmd, blocking);
|
||||
|
||||
if (blocking)
|
||||
{
|
||||
m_fence.wait_for_signal();
|
||||
}
|
||||
}
|
||||
|
||||
void* map_synchronized(u32 offset, u32 size)
|
||||
{
|
||||
AUDIT(synchronized);
|
||||
AUDIT(synchronized && !m_fence.is_empty());
|
||||
|
||||
m_fence.wait_for_signal();
|
||||
|
||||
verify(HERE), (offset + size) <= pbo_size;
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
|
||||
@ -894,7 +869,7 @@ namespace gl
|
||||
cached.set_image_type(type);
|
||||
cached.set_gcm_format(gcm_format);
|
||||
|
||||
cached.create_read_only(image, width, height, depth, mipmaps, pitch);
|
||||
cached.create(width, height, depth, mipmaps, image, pitch, true);
|
||||
cached.set_dirty(false);
|
||||
|
||||
if (context != rsx::texture_upload_context::blit_engine_dst)
|
||||
|
@ -871,10 +871,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
const bool is_rsxthr = std::this_thread::get_id() == m_rsx_thread;
|
||||
bool has_queue_ref = false;
|
||||
|
||||
u64 sync_timestamp = 0ull;
|
||||
for (const auto& tex : result.sections_to_flush)
|
||||
sync_timestamp = std::max(sync_timestamp, tex->get_sync_timestamp());
|
||||
|
||||
if (!is_rsxthr)
|
||||
{
|
||||
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
|
||||
@ -882,7 +878,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
|
||||
std::lock_guard lock(m_flush_queue_mutex);
|
||||
|
||||
m_flush_requests.post(sync_timestamp == 0ull);
|
||||
m_flush_requests.post(false);
|
||||
has_queue_ref = true;
|
||||
}
|
||||
else if (!vk::is_uninterruptible())
|
||||
@ -895,33 +891,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
//LOG_ERROR(RSX, "Fault in uninterruptible code!");
|
||||
}
|
||||
|
||||
if (sync_timestamp > 0)
|
||||
{
|
||||
// Wait for earliest cb submitted after the sync timestamp to finish
|
||||
command_buffer_chunk *target_cb = nullptr;
|
||||
for (auto &cb : m_primary_cb_list)
|
||||
{
|
||||
if (cb.last_sync >= sync_timestamp)
|
||||
{
|
||||
if (!cb.pending)
|
||||
{
|
||||
target_cb = nullptr;
|
||||
break;
|
||||
}
|
||||
|
||||
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
|
||||
{
|
||||
target_cb = &cb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (target_cb)
|
||||
{
|
||||
target_cb->wait(GENERAL_WAIT_TIMEOUT);
|
||||
}
|
||||
}
|
||||
|
||||
if (has_queue_ref)
|
||||
{
|
||||
//Wait for the RSX thread to process request if it hasn't already
|
||||
@ -3520,9 +3489,18 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
||||
//Verify enough memory exists before attempting to handle data transfer
|
||||
check_heap_status();
|
||||
|
||||
const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes();
|
||||
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
|
||||
{
|
||||
m_samplers_dirty.store(true);
|
||||
m_current_command_buffer->flags |= cb_has_blit_transfer;
|
||||
|
||||
if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count)
|
||||
{
|
||||
// A speculative write happened, flush while the dma resource is valid
|
||||
// TODO: Deeper investigation as to why this can trigger problems
|
||||
flush_command_queue();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -50,7 +50,8 @@ extern u64 get_system_time();
|
||||
|
||||
enum command_buffer_data_flag
|
||||
{
|
||||
cb_has_occlusion_task = 1
|
||||
cb_has_occlusion_task = 1,
|
||||
cb_has_blit_transfer = 2
|
||||
};
|
||||
|
||||
struct command_buffer_chunk: public vk::command_buffer
|
||||
|
@ -654,6 +654,42 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
VkResult wait_for_event(VkEvent event, u64 timeout)
|
||||
{
|
||||
u64 t = 0;
|
||||
while (true)
|
||||
{
|
||||
switch (const auto status = vkGetEventStatus(*g_current_renderer, event))
|
||||
{
|
||||
case VK_EVENT_SET:
|
||||
return VK_SUCCESS;
|
||||
case VK_EVENT_RESET:
|
||||
break;
|
||||
default:
|
||||
die_with_error(HERE, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
if (timeout)
|
||||
{
|
||||
if (!t)
|
||||
{
|
||||
t = get_system_time();
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((get_system_time() - t) > timeout)
|
||||
{
|
||||
LOG_ERROR(RSX, "[vulkan] vk::wait_for_event has timed out!");
|
||||
return VK_TIMEOUT;
|
||||
}
|
||||
}
|
||||
|
||||
//std::this_thread::yield();
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
void die_with_error(const char* faulting_addr, VkResult error_code)
|
||||
{
|
||||
std::string error_message;
|
||||
|
@ -181,6 +181,7 @@ namespace vk
|
||||
// Fence reset with driver workarounds in place
|
||||
void reset_fence(VkFence *pFence);
|
||||
VkResult wait_for_fence(VkFence pFence, u64 timeout = 0ull);
|
||||
VkResult wait_for_event(VkEvent pEvent, u64 timeout = 0ull);
|
||||
|
||||
void die_with_error(const char* faulting_addr, VkResult error_code);
|
||||
|
||||
|
@ -36,7 +36,7 @@ namespace vk
|
||||
std::unique_ptr<vk::viewable_image> managed_texture = nullptr;
|
||||
|
||||
//DMA relevant data
|
||||
VkFence dma_fence = VK_NULL_HANDLE;
|
||||
VkEvent dma_fence = VK_NULL_HANDLE;
|
||||
vk::render_device* m_device = nullptr;
|
||||
vk::viewable_image *vram_texture = nullptr;
|
||||
std::unique_ptr<vk::buffer> dma_buffer;
|
||||
@ -82,9 +82,9 @@ namespace vk
|
||||
{
|
||||
dma_buffer.reset();
|
||||
|
||||
if (dma_fence != nullptr)
|
||||
if (dma_fence != VK_NULL_HANDLE)
|
||||
{
|
||||
vkDestroyFence(*m_device, dma_fence, nullptr);
|
||||
vkDestroyEvent(*m_device, dma_fence, nullptr);
|
||||
dma_fence = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
@ -164,9 +164,9 @@ namespace vk
|
||||
|
||||
if (dma_fence == VK_NULL_HANDLE)
|
||||
{
|
||||
VkFenceCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
vkCreateFence(*m_device, &createInfo, nullptr, &dma_fence);
|
||||
VkEventCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
|
||||
vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence);
|
||||
}
|
||||
|
||||
if (dma_buffer.get() == nullptr)
|
||||
@ -297,16 +297,32 @@ namespace vk
|
||||
|
||||
if (manage_cb_lifetime)
|
||||
{
|
||||
cmd.end();
|
||||
cmd.submit(submit_queue, {}, dma_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||
VkFence submit_fence;
|
||||
VkFenceCreateInfo create_info{};
|
||||
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
vkCreateFence(*m_device, &create_info, nullptr, &submit_fence);
|
||||
|
||||
//Now we need to restart the command-buffer to restore it to the way it was before...
|
||||
vk::wait_for_fence(dma_fence);
|
||||
vk::reset_fence(&dma_fence);
|
||||
cmd.end();
|
||||
cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||
|
||||
// Now we need to restart the command-buffer to restore it to the way it was before...
|
||||
vk::wait_for_fence(submit_fence);
|
||||
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
|
||||
|
||||
// Cleanup
|
||||
vkDestroyFence(*m_device, submit_fence, nullptr);
|
||||
vkSetEvent(*m_device, dma_fence);
|
||||
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
|
||||
{
|
||||
// If this is a primary CB, restart it
|
||||
cmd.begin();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Only used when doing speculation
|
||||
verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET;
|
||||
vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||
}
|
||||
|
||||
synchronized = true;
|
||||
@ -333,6 +349,10 @@ namespace vk
|
||||
{
|
||||
AUDIT(synchronized);
|
||||
|
||||
// Synchronize, reset dma_fence after waiting
|
||||
vk::wait_for_event(dma_fence, GENERAL_WAIT_TIMEOUT);
|
||||
vkResetEvent(*m_device, dma_fence);
|
||||
|
||||
return dma_buffer->map(offset, size);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user