mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-26 04:32:35 +01:00
vk: Implement double-buffered heaps for platforms without universal support for host visibility (APPLE)
This commit is contained in:
parent
c6e35706a3
commit
f0a91484a0
@ -565,7 +565,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
|
||||
//Create secondary command_buffer for parallel operations
|
||||
m_secondary_command_buffer_pool.create((*m_device));
|
||||
m_secondary_command_buffer.create(m_secondary_command_buffer_pool);
|
||||
m_secondary_command_buffer.create(m_secondary_command_buffer_pool, true);
|
||||
m_secondary_command_buffer.access_hint = vk::command_buffer::access_type_hint::all;
|
||||
|
||||
//Precalculated stuff
|
||||
@ -601,7 +601,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||
}
|
||||
|
||||
const auto& memory_map = m_device->get_memory_mapping();
|
||||
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
|
||||
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, memory_map.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
|
||||
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R8_UINT, 0, 32);
|
||||
|
||||
vk::initialize_compiler_context();
|
||||
@ -2504,6 +2504,25 @@ void VKGSRender::write_buffers()
|
||||
|
||||
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
|
||||
{
|
||||
if (m_attrib_ring_info.dirty() ||
|
||||
m_uniform_buffer_ring_info.dirty() ||
|
||||
m_index_buffer_ring_info.dirty() ||
|
||||
m_transform_constants_ring_info.dirty() ||
|
||||
m_texture_upload_buffer_ring_info.dirty())
|
||||
{
|
||||
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
|
||||
m_secondary_command_buffer.begin();
|
||||
|
||||
m_attrib_ring_info.sync(m_secondary_command_buffer);
|
||||
m_uniform_buffer_ring_info.sync(m_secondary_command_buffer);
|
||||
m_index_buffer_ring_info.sync(m_secondary_command_buffer);
|
||||
m_transform_constants_ring_info.sync(m_secondary_command_buffer);
|
||||
m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer);
|
||||
|
||||
m_secondary_command_buffer.end();
|
||||
m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||
}
|
||||
|
||||
m_current_command_buffer->end();
|
||||
m_current_command_buffer->tag();
|
||||
m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), semaphores, fence, pipeline_stage_flags);
|
||||
|
@ -19,6 +19,7 @@ namespace vk
|
||||
atomic_t<bool> g_cb_no_interrupt_flag { false };
|
||||
|
||||
//Driver compatibility workarounds
|
||||
VkFlags g_heap_compatible_buffer_types = 0;
|
||||
driver_vendor g_driver_vendor = driver_vendor::unknown;
|
||||
bool g_drv_no_primitive_restart_flag = false;
|
||||
bool g_drv_sanitize_fp_values = false;
|
||||
@ -273,6 +274,7 @@ namespace vk
|
||||
g_num_processed_frames = 0;
|
||||
g_num_total_frames = 0;
|
||||
g_driver_vendor = driver_vendor::unknown;
|
||||
g_heap_compatible_buffer_types = 0;
|
||||
|
||||
const auto gpu_name = g_current_renderer->gpu().name();
|
||||
|
||||
@ -313,6 +315,49 @@ namespace vk
|
||||
LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Buffer memory tests, only useful for portability on macOS
|
||||
VkBufferUsageFlags types[] =
|
||||
{
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT
|
||||
};
|
||||
|
||||
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
VkBuffer tmp;
|
||||
VkMemoryRequirements memory_reqs;
|
||||
|
||||
VkBufferCreateInfo info = {};
|
||||
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
info.size = 4096;
|
||||
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
info.flags = 0;
|
||||
|
||||
for (const auto &usage : types)
|
||||
{
|
||||
info.usage = usage;
|
||||
CHECK_RESULT(vkCreateBuffer(*g_current_renderer, &info, nullptr, &tmp));
|
||||
|
||||
vkGetBufferMemoryRequirements(*g_current_renderer, tmp, &memory_reqs);
|
||||
if (g_current_renderer->get_compatible_memory_type(memory_reqs.memoryTypeBits, memory_flags, nullptr))
|
||||
{
|
||||
g_heap_compatible_buffer_types |= usage;
|
||||
}
|
||||
|
||||
vkDestroyBuffer(*g_current_renderer, tmp, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkFlags get_heap_compatible_buffer_types()
|
||||
{
|
||||
return g_heap_compatible_buffer_types;
|
||||
}
|
||||
|
||||
driver_vendor get_driver_vendor()
|
||||
|
@ -96,6 +96,7 @@ namespace vk
|
||||
bool emulate_primitive_restart(rsx::primitive_type type);
|
||||
bool sanitize_fp_values();
|
||||
bool fence_reset_disabled();
|
||||
VkFlags get_heap_compatible_buffer_types();
|
||||
driver_vendor get_driver_vendor();
|
||||
|
||||
VkComponentMapping default_component_map();
|
||||
@ -535,7 +536,11 @@ namespace vk
|
||||
{
|
||||
if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask)
|
||||
{
|
||||
*type_index = i;
|
||||
if (type_index)
|
||||
{
|
||||
*type_index = i;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1078,6 +1083,8 @@ namespace vk
|
||||
{
|
||||
private:
|
||||
bool is_open = false;
|
||||
bool is_pending = false;
|
||||
VkFence m_submit_fence = VK_NULL_HANDLE;
|
||||
|
||||
protected:
|
||||
vk::command_pool *pool = nullptr;
|
||||
@ -1095,21 +1102,33 @@ namespace vk
|
||||
command_buffer() {}
|
||||
~command_buffer() {}
|
||||
|
||||
void create(vk::command_pool &cmd_pool)
|
||||
void create(vk::command_pool &cmd_pool, bool auto_reset = false)
|
||||
{
|
||||
VkCommandBufferAllocateInfo infos = {};
|
||||
infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
infos.commandBufferCount = 1;
|
||||
infos.commandPool = (VkCommandPool)cmd_pool;
|
||||
infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
|
||||
CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands));
|
||||
|
||||
if (auto_reset)
|
||||
{
|
||||
VkFenceCreateInfo info = {};
|
||||
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence));
|
||||
}
|
||||
|
||||
pool = &cmd_pool;
|
||||
}
|
||||
|
||||
void destroy()
|
||||
{
|
||||
vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands);
|
||||
|
||||
if (m_submit_fence)
|
||||
{
|
||||
vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
vk::command_pool& get_command_pool() const
|
||||
@ -1124,6 +1143,15 @@ namespace vk
|
||||
|
||||
void begin()
|
||||
{
|
||||
if (m_submit_fence && is_pending)
|
||||
{
|
||||
while (vkGetFenceStatus(pool->get_owner(), m_submit_fence) != VK_SUCCESS);
|
||||
is_pending = false;
|
||||
|
||||
CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence));
|
||||
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
||||
}
|
||||
|
||||
if (is_open)
|
||||
return;
|
||||
|
||||
@ -1158,6 +1186,11 @@ namespace vk
|
||||
return;
|
||||
}
|
||||
|
||||
if (fence == VK_NULL_HANDLE)
|
||||
{
|
||||
fence = m_submit_fence;
|
||||
}
|
||||
|
||||
VkSubmitInfo infos = {};
|
||||
infos.commandBufferCount = 1;
|
||||
infos.pCommandBuffers = &commands;
|
||||
@ -1169,6 +1202,8 @@ namespace vk
|
||||
acquire_global_submit_lock();
|
||||
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
|
||||
release_global_submit_lock();
|
||||
|
||||
is_pending = true;
|
||||
}
|
||||
};
|
||||
|
||||
@ -2695,50 +2730,98 @@ public:
|
||||
bool mapped = false;
|
||||
void *_ptr = nullptr;
|
||||
|
||||
std::unique_ptr<buffer> shadow;
|
||||
std::vector<VkBufferCopy> dirty_ranges;
|
||||
|
||||
// NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow
|
||||
// Avoid mapping/unmapping to keep these drivers from stalling
|
||||
// NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either
|
||||
|
||||
void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000)
|
||||
{
|
||||
data_heap::init(size, name, guard);
|
||||
|
||||
const auto device = get_current_renderer();
|
||||
const auto memory_map = device->get_memory_mapping();
|
||||
const VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
data_heap::init(size, name, guard);
|
||||
heap.reset(new buffer(*device, size, memory_map.host_visible_coherent, memory_flags, usage, 0));
|
||||
VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
auto memory_index = memory_map.host_visible_coherent;
|
||||
|
||||
if (!(get_heap_compatible_buffer_types() & usage))
|
||||
{
|
||||
LOG_WARNING(RSX, "Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", (u32)usage);
|
||||
|
||||
shadow.reset(new buffer(*device, size, memory_index, memory_flags, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
|
||||
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
memory_index = memory_map.device_local;
|
||||
}
|
||||
|
||||
heap.reset(new buffer(*device, size, memory_index, memory_flags, usage, 0));
|
||||
}
|
||||
|
||||
void destroy()
|
||||
{
|
||||
if (mapped)
|
||||
{
|
||||
heap->unmap();
|
||||
mapped = false;
|
||||
unmap(true);
|
||||
}
|
||||
|
||||
heap.reset();
|
||||
shadow.reset();
|
||||
}
|
||||
|
||||
void* map(size_t offset, size_t size)
|
||||
{
|
||||
if (!_ptr)
|
||||
{
|
||||
_ptr = heap->map(0, heap->size());
|
||||
if (shadow)
|
||||
_ptr = shadow->map(0, shadow->size());
|
||||
else
|
||||
_ptr = heap->map(0, heap->size());
|
||||
|
||||
mapped = true;
|
||||
}
|
||||
|
||||
if (shadow)
|
||||
{
|
||||
dirty_ranges.push_back({offset, offset, size});
|
||||
}
|
||||
|
||||
return (u8*)_ptr + offset;
|
||||
}
|
||||
|
||||
void unmap()
|
||||
void unmap(bool force = false)
|
||||
{
|
||||
if (g_cfg.video.disable_vulkan_mem_allocator)
|
||||
if (force || g_cfg.video.disable_vulkan_mem_allocator)
|
||||
{
|
||||
heap->unmap();
|
||||
if (shadow)
|
||||
shadow->unmap();
|
||||
else
|
||||
heap->unmap();
|
||||
|
||||
mapped = false;
|
||||
_ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool dirty()
|
||||
{
|
||||
return !dirty_ranges.empty();
|
||||
}
|
||||
|
||||
void sync(const vk::command_buffer& cmd)
|
||||
{
|
||||
if (!dirty_ranges.empty())
|
||||
{
|
||||
verify (HERE), shadow, heap;
|
||||
vkCmdCopyBuffer(cmd, shadow->value, heap->value, (u32)dirty_ranges.size(), dirty_ranges.data());
|
||||
dirty_ranges.resize(0);
|
||||
|
||||
insert_buffer_memory_barrier(cmd, heap->value, 0, heap->size(),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -58,13 +58,8 @@ namespace vk
|
||||
{
|
||||
if (!m_vao.heap)
|
||||
{
|
||||
auto memory_types = vk::get_memory_mapping(m_device->gpu());
|
||||
|
||||
m_vao.init(1 * 0x100000, "overlays VAO", 128);
|
||||
m_vao.heap = std::make_unique<vk::buffer>(*m_device, 1 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0);
|
||||
|
||||
m_ubo.init(8 * 0x100000, "overlays UBO", 128);
|
||||
m_ubo.heap = std::make_unique<vk::buffer>(*m_device, 8 * 0x100000, memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0);
|
||||
m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128);
|
||||
m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user