1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-26 04:32:35 +01:00

vk: Implement double-buffered heaps for platforms without universal support for host visibility (APPLE)

This commit is contained in:
kd-11 2018-08-23 21:56:06 +03:00 committed by kd-11
parent c6e35706a3
commit f0a91484a0
4 changed files with 163 additions and 21 deletions

View File

@ -565,7 +565,7 @@ VKGSRender::VKGSRender() : GSRender()
//Create secondary command_buffer for parallel operations
m_secondary_command_buffer_pool.create((*m_device));
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
m_secondary_command_buffer.create(m_secondary_command_buffer_pool, true);
m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all;
//Precalculated stuff
@ -601,7 +601,7 @@ VKGSRender::VKGSRender() : GSRender()
}
const auto& memory_map = m_device->get_memory_mapping();
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32);
vk::initialize_compiler_context();
@ -2504,6 +2504,25 @@ void VKGSRender::write_buffers()
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{
if (m_attrib_ring_info.dirty() ||
m_uniform_buffer_ring_info.dirty() ||
m_index_buffer_ring_info.dirty() ||
m_transform_constants_ring_info.dirty() ||
m_texture_upload_buffer_ring_info.dirty())
{
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
m_secondary_command_buffer.begin();
m_attrib_ring_info.sync(m_secondary_command_buffer);
m_uniform_buffer_ring_info.sync(m_secondary_command_buffer);
m_index_buffer_ring_info.sync(m_secondary_command_buffer);
m_transform_constants_ring_info.sync(m_secondary_command_buffer);
m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer);
m_secondary_command_buffer.end();
m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
}
m_current_command_buffer->end();
m_current_command_buffer->tag();
m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), semaphores, fence, pipeline_stage_flags);

View File

@ -19,6 +19,7 @@ namespace vk
atomic_t<bool> g_cb_no_interrupt_flag { false };
//Driver compatibility workarounds
VkFlags g_heap_compatible_buffer_types = 0;
driver_vendor g_driver_vendor = driver_vendor::unknown;
bool g_drv_no_primitive_restart_flag = false;
bool g_drv_sanitize_fp_values = false;
@ -273,6 +274,7 @@ namespace vk
g_num_processed_frames = 0;
g_num_total_frames = 0;
g_driver_vendor = driver_vendor::unknown;
g_heap_compatible_buffer_types = 0;
const auto gpu_name = g_current_renderer->gpu().name();
@ -313,6 +315,49 @@ namespace vk
LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name);
}
}
{
// Buffer memory tests, only useful for portability on macOS
VkBufferUsageFlags types[] =
{
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT
};
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
VkBuffer tmp;
VkMemoryRequirements memory_reqs;
VkBufferCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.size = 4096;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
info.flags = 0;
for (const auto &usage : types)
{
info.usage = usage;
CHECK_RESULT(vkCreateBuffer(*g_current_renderer, &info, nullptr, &tmp));
vkGetBufferMemoryRequirements(*g_current_renderer, tmp, &memory_reqs);
if (g_current_renderer->get_compatible_memory_type(memory_reqs.memoryTypeBits, memory_flags, nullptr))
{
g_heap_compatible_buffer_types |= usage;
}
vkDestroyBuffer(*g_current_renderer, tmp, nullptr);
}
}
}
VkFlags get_heap_compatible_buffer_types()
{
return g_heap_compatible_buffer_types;
}
driver_vendor get_driver_vendor()

View File

@ -96,6 +96,7 @@ namespace vk
bool emulate_primitive_restart(rsx::primitive_type type);
bool sanitize_fp_values();
bool fence_reset_disabled();
VkFlags get_heap_compatible_buffer_types();
driver_vendor get_driver_vendor();
VkComponentMapping default_component_map();
@ -535,7 +536,11 @@ namespace vk
{
if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask)
{
*type_index = i;
if (type_index)
{
*type_index = i;
}
return true;
}
}
@ -1078,6 +1083,8 @@ namespace vk
{
private:
bool is_open = false;
bool is_pending = false;
VkFence m_submit_fence = VK_NULL_HANDLE;
protected:
vk::command_pool *pool = nullptr;
@ -1095,21 +1102,33 @@ namespace vk
command_buffer() {}
~command_buffer() {}
void create(vk::command_pool &cmd_pool)
void create(vk::command_pool &cmd_pool, bool auto_reset = false)
{
VkCommandBufferAllocateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
infos.commandBufferCount = 1;
infos.commandPool = (VkCommandPool)cmd_pool;
infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands));
if (auto_reset)
{
VkFenceCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence));
}
pool = &cmd_pool;
}
void destroy()
{
vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands);
if (m_submit_fence)
{
vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr);
}
}
vk::command_pool& get_command_pool() const
@ -1124,6 +1143,15 @@ namespace vk
void begin()
{
if (m_submit_fence && is_pending)
{
while (vkGetFenceStatus(pool->get_owner(), m_submit_fence) != VK_SUCCESS);
is_pending = false;
CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
if (is_open)
return;
@ -1158,6 +1186,11 @@ namespace vk
return;
}
if (fence == VK_NULL_HANDLE)
{
fence = m_submit_fence;
}
VkSubmitInfo infos = {};
infos.commandBufferCount = 1;
infos.pCommandBuffers = &commands;
@ -1169,6 +1202,8 @@ namespace vk
acquire_global_submit_lock();
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
release_global_submit_lock();
is_pending = true;
}
};
@ -2695,50 +2730,98 @@ public:
bool mapped = false;
void *_ptr = nullptr;
std::unique_ptr<buffer> shadow;
std::vector<VkBufferCopy> dirty_ranges;
// NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow
// Avoid mapping/unmapping to keep these drivers from stalling
// NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either
void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000)
{
data_heap::init(size, name, guard);
const auto device = get_current_renderer();
const auto memory_map = device->get_memory_mapping();
const VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
data_heap::init(size, name, guard);
heap.reset(new buffer(*device, size, memory_map.host_visible_coherent, memory_flags, usage, 0));
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
auto memory_index = memory_map.host_visible_coherent;
if (!(get_heap_compatible_buffer_types() & usage))
{
LOG_WARNING(RSX, "Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", (u32)usage);
shadow.reset(new buffer(*device, size, memory_index, memory_flags, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
memory_index = memory_map.device_local;
}
heap.reset(new buffer(*device, size, memory_index, memory_flags, usage, 0));
}
void destroy()
{
if (mapped)
{
heap->unmap();
mapped = false;
unmap(true);
}
heap.reset();
shadow.reset();
}
void* map(size_t offset, size_t size)
{
if (!_ptr)
{
_ptr = heap->map(0, heap->size());
if (shadow)
_ptr = shadow->map(0, shadow->size());
else
_ptr = heap->map(0, heap->size());
mapped = true;
}
if (shadow)
{
dirty_ranges.push_back({offset, offset, size});
}
return (u8*)_ptr + offset;
}
void unmap()
void unmap(bool force = false)
{
if (g_cfg.video.disable_vulkan_mem_allocator)
if (force || g_cfg.video.disable_vulkan_mem_allocator)
{
heap->unmap();
if (shadow)
shadow->unmap();
else
heap->unmap();
mapped = false;
_ptr = nullptr;
}
}
bool dirty()
{
return !dirty_ranges.empty();
}
void sync(const vk::command_buffer& cmd)
{
if (!dirty_ranges.empty())
{
verify (HERE), shadow, heap;
vkCmdCopyBuffer(cmd, shadow->value, heap->value, (u32)dirty_ranges.size(), dirty_ranges.data());
dirty_ranges.resize(0);
insert_buffer_memory_barrier(cmd, heap->value, 0, heap->size(),
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
}
}
};
}

View File

@ -58,13 +58,8 @@ namespace vk
{
if (!m_vao.heap)
{
auto memory_types = vk::get_memory_mapping(m_device->gpu());
m_vao.init(1 * 0x100000, "overlays VAO", 128);
m_vao.heap = std::make_unique<vk::buffer>(*m_device, 1 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0);
m_ubo.init(8 * 0x100000, "overlays UBO", 128);
m_ubo.heap = std::make_unique<vk::buffer>(*m_device, 8 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0);
m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128);
m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128);
}
}