1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

vk: Restructure gpu labels and add them as an event backend

This commit is contained in:
kd-11 2023-06-29 01:06:26 +03:00 committed by kd-11
parent 92992a5406
commit a047ddffc0
4 changed files with 237 additions and 85 deletions

View File

@ -56,6 +56,8 @@ namespace vk
std::deque<eid_scope_t> m_eid_map;
shared_mutex m_eid_map_lock;
std::vector<std::function<void()>> m_exit_handlers;
inline eid_scope_t& get_current_eid_scope()
{
const auto eid = current_event_id();
@ -77,6 +79,12 @@ namespace vk
void destroy()
{
flush();
// Run the on-exit callbacks
for (const auto& callback : m_exit_handlers)
{
callback();
}
}
void flush()
@ -121,6 +129,11 @@ namespace vk
return ret;
}
void add_exit_callback(std::function<void()> callback) override
{
m_exit_handlers.push_back(callback);
}
void dispose(vk::disposable_t& disposable) override
{
get_current_eid_scope().m_disposables.emplace_back(std::move(disposable));

View File

@ -44,6 +44,8 @@ namespace vk
{
virtual void dispose(vk::disposable_t& object) = 0;
virtual void add_exit_callback(std::function<void()> callback) = 0;
template<typename T>
void dispose(std::unique_ptr<T>& object)
{

View File

@ -15,6 +15,40 @@
namespace vk
{
namespace globals
{
static std::unique_ptr<gpu_debug_marker_pool> g_gpu_debug_marker_pool;
static std::unique_ptr<gpu_label_pool> g_gpu_label_pool;
gpu_debug_marker_pool& get_shared_marker_pool(const vk::render_device& dev)
{
if (!g_gpu_debug_marker_pool)
{
g_gpu_debug_marker_pool = std::make_unique<gpu_debug_marker_pool>(dev, 65536);
vk::get_gc()->add_exit_callback([]()
{
g_gpu_debug_marker_pool.reset();
});
}
return *g_gpu_debug_marker_pool;
}
gpu_label_pool& get_shared_label_pool(const vk::render_device& dev)
{
if (!g_gpu_label_pool)
{
g_gpu_label_pool = std::make_unique<gpu_label_pool>(dev, 65536);
vk::get_gc()->add_exit_callback([]()
{
g_gpu_label_pool.reset();
});
}
return *g_gpu_label_pool;
}
}
// Util
namespace v1_utils
{
@ -175,8 +209,23 @@ namespace vk
}
event::event(const render_device& dev, sync_domain domain)
: m_device(&dev), v2(dev.get_synchronization2_support())
: m_device(&dev)
{
m_backend = dev.get_synchronization2_support()
? sync_backend::events_v2
: sync_backend::events_v1;
if (domain != sync_domain::gpu &&
vk::get_driver_vendor() == vk::driver_vendor::AMD &&
vk::get_chip_family() < vk::chip_class::AMD_navi1x)
{
// Events don't work quite right on AMD drivers
m_backend = sync_backend::gpu_label;
m_label = std::make_unique<vk::gpu_label>(globals::get_shared_label_pool(dev));
return;
}
VkEventCreateInfo info
{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
@ -184,7 +233,7 @@ namespace vk
.flags = 0
};
if (v2 && domain == sync_domain::gpu)
if (domain == sync_domain::gpu && m_backend == sync_backend::events_v2)
{
info.flags = VK_EVENT_CREATE_DEVICE_ONLY_BIT_KHR;
}
@ -202,32 +251,40 @@ namespace vk
void event::resolve_dependencies(const command_buffer& cmd, const VkDependencyInfoKHR& dependency)
{
if (v2)
ensure(m_backend != sync_backend::gpu_label);
if (m_backend == sync_backend::events_v2)
{
m_device->_vkCmdPipelineBarrier2KHR(cmd, &dependency);
return;
}
else
{
const auto src_stages = v1_utils::gather_src_stages(dependency);
const auto dst_stages = v1_utils::gather_dst_stages(dependency);
const auto memory_barriers = v1_utils::get_memory_barriers(dependency);
const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency);
const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency);
vkCmdPipelineBarrier(cmd, src_stages, dst_stages, dependency.dependencyFlags,
::size32(memory_barriers), memory_barriers.data(),
::size32(buffer_memory_barriers), buffer_memory_barriers.data(),
::size32(image_memory_barriers), image_memory_barriers.data());
}
const auto src_stages = v1_utils::gather_src_stages(dependency);
const auto dst_stages = v1_utils::gather_dst_stages(dependency);
const auto memory_barriers = v1_utils::get_memory_barriers(dependency);
const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency);
const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency);
vkCmdPipelineBarrier(cmd, src_stages, dst_stages, dependency.dependencyFlags,
::size32(memory_barriers), memory_barriers.data(),
::size32(buffer_memory_barriers), buffer_memory_barriers.data(),
::size32(image_memory_barriers), image_memory_barriers.data());
}
void event::signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency)
{
if (m_backend == sync_backend::gpu_label)
{
// Fallback path
m_label->signal(cmd, dependency);
return;
}
// Resolve the actual dependencies on a pipeline barrier
resolve_dependencies(cmd, dependency);
// Signalling won't wait. The caller is responsible for setting up the dependencies correctly.
if (v2) [[ likely ]]
if (m_backend == sync_backend::events_v2)
{
// We need a memory barrier to keep AMDVLK from hanging
VkMemoryBarrier2KHR mem_barrier =
@ -243,12 +300,12 @@ namespace vk
.memoryBarrierCount = 1,
.pMemoryBarriers = &mem_barrier
};
m_device->_vkCmdSetEvent2KHR(cmd, m_vk_event, &empty_dependency);
return;
}
else
{
vkCmdSetEvent(cmd, m_vk_event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
vkCmdSetEvent(cmd, m_vk_event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
void event::host_signal() const
@ -261,29 +318,34 @@ namespace vk
{
ensure(m_vk_event);
if (v2) [[ likely ]]
if (m_backend == sync_backend::events_v2) [[ likely ]]
{
m_device->_vkCmdWaitEvents2KHR(cmd, 1, &m_vk_event, &dependency);
return;
}
else
{
const auto src_stages = v1_utils::gather_src_stages(dependency);
const auto dst_stages = v1_utils::gather_dst_stages(dependency);
const auto memory_barriers = v1_utils::get_memory_barriers(dependency);
const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency);
const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency);
vkCmdWaitEvents(cmd,
1, &m_vk_event,
src_stages, dst_stages,
::size32(memory_barriers), memory_barriers.data(),
::size32(buffer_memory_barriers), buffer_memory_barriers.data(),
::size32(image_memory_barriers), image_memory_barriers.data());
}
const auto src_stages = v1_utils::gather_src_stages(dependency);
const auto dst_stages = v1_utils::gather_dst_stages(dependency);
const auto memory_barriers = v1_utils::get_memory_barriers(dependency);
const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency);
const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency);
vkCmdWaitEvents(cmd,
1, &m_vk_event,
src_stages, dst_stages,
::size32(memory_barriers), memory_barriers.data(),
::size32(buffer_memory_barriers), buffer_memory_barriers.data(),
::size32(image_memory_barriers), image_memory_barriers.data());
}
void event::reset() const
{
if (m_backend == sync_backend::gpu_label)
{
m_label->reset();
return;
}
vkResetEvent(*m_device, m_vk_event);
}
@ -292,11 +354,11 @@ namespace vk
return vkGetEventStatus(*m_device, m_vk_event);
}
gpu_debug_marker_pool::gpu_debug_marker_pool(const vk::render_device& dev, u32 count)
: m_count(count), pdev(&dev)
gpu_label_pool::gpu_label_pool(const vk::render_device& dev, u32 count)
: pdev(&dev), m_count(count)
{}
std::tuple<VkBuffer, u64, volatile u32*> gpu_debug_marker_pool::allocate()
std::tuple<VkBuffer, u64, volatile u32*> gpu_label_pool::allocate()
{
if (!m_buffer || m_offset >= m_count)
{
@ -308,7 +370,7 @@ namespace vk
return { m_buffer->value, out_offset * 4, m_mapped + out_offset };
}
void gpu_debug_marker_pool::create_impl()
void gpu_label_pool::create_impl()
{
if (m_buffer)
{
@ -331,32 +393,68 @@ namespace vk
m_offset = 0;
}
gpu_debug_marker::gpu_debug_marker(gpu_debug_marker_pool& pool, std::string message)
: m_message(std::move(message)), m_device(*pool.pdev)
gpu_label::gpu_label(gpu_label_pool& pool)
{
std::tie(m_buffer, m_buffer_offset, m_value) = pool.allocate();
*m_value = 0xCAFEBABE;
std::tie(m_buffer_handle, m_buffer_offset, m_ptr) = pool.allocate();
reset();
}
gpu_label::~gpu_label()
{
m_ptr = nullptr;
m_buffer_offset = 0;
m_buffer_handle = VK_NULL_HANDLE;
}
void gpu_label::signal(const vk::command_buffer& cmd, const VkDependencyInfoKHR& dependency)
{
const auto src_stages = v1_utils::gather_src_stages(dependency);
auto dst_stages = v1_utils::gather_dst_stages(dependency);
auto memory_barriers = v1_utils::get_memory_barriers(dependency);
const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency);
const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency);
// Ensure wait before filling the label
dst_stages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
if (memory_barriers.empty())
{
const VkMemoryBarrier signal_barrier =
{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT
};
memory_barriers.push_back(signal_barrier);
}
else
{
auto& barrier = memory_barriers.front();
barrier.dstAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
vkCmdPipelineBarrier(cmd, src_stages, dst_stages, dependency.dependencyFlags,
::size32(memory_barriers), memory_barriers.data(),
::size32(buffer_memory_barriers), buffer_memory_barriers.data(),
::size32(image_memory_barriers), image_memory_barriers.data());
vkCmdFillBuffer(cmd, m_buffer_handle, m_buffer_offset, 4, label_constants::set_);
}
gpu_debug_marker::gpu_debug_marker(gpu_debug_marker_pool& pool, std::string message)
: gpu_label(pool), m_message(std::move(message))
{}
gpu_debug_marker::~gpu_debug_marker()
{
if (!m_printed)
{
dump();
}
m_value = nullptr;
}
void gpu_debug_marker::signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access)
{
insert_global_memory_barrier(cmd, stages, VK_PIPELINE_STAGE_TRANSFER_BIT, access, VK_ACCESS_TRANSFER_WRITE_BIT);
vkCmdFillBuffer(cmd, m_buffer, m_buffer_offset, 4, 0xDEADBEEF);
}
void gpu_debug_marker::dump()
{
if (*m_value == 0xCAFEBABE)
if (*m_ptr == gpu_label::label_constants::reset_)
{
rsx_log.error("DEBUG MARKER NOT REACHED: %s", m_message);
}
@ -366,7 +464,7 @@ namespace vk
void gpu_debug_marker::dump() const
{
if (*m_value == 0xCAFEBABE)
if (*m_ptr == gpu_label::label_constants::reset_)
{
rsx_log.error("DEBUG MARKER NOT REACHED: %s", m_message);
}
@ -376,18 +474,6 @@ namespace vk
}
}
// FIXME
static std::unique_ptr<gpu_debug_marker_pool> g_gpu_debug_marker_pool;
gpu_debug_marker_pool& get_shared_marker_pool(const vk::render_device& dev)
{
if (!g_gpu_debug_marker_pool)
{
g_gpu_debug_marker_pool = std::make_unique<gpu_debug_marker_pool>(dev, 65536);
}
return *g_gpu_debug_marker_pool;
}
void gpu_debug_marker::insert(
const vk::render_device& dev,
const vk::command_buffer& cmd,
@ -395,8 +481,24 @@ namespace vk
VkPipelineStageFlags stages,
VkAccessFlags access)
{
auto result = std::make_unique<gpu_debug_marker>(get_shared_marker_pool(dev), message);
result->signal(cmd, stages, access);
VkMemoryBarrier2KHR barrier =
{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR,
.srcStageMask = stages,
.srcAccessMask = access,
.dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT
};
VkDependencyInfoKHR dependency =
{
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.memoryBarrierCount = 1,
.pMemoryBarriers = &barrier
};
auto result = std::make_unique<gpu_debug_marker>(globals::get_shared_marker_pool(dev), message);
result->signal(cmd, dependency);
vk::get_resource_manager()->dispose(result);
}

View File

@ -9,6 +9,7 @@
namespace vk
{
class command_buffer;
class gpu_label;
class image;
enum class sync_domain
@ -55,9 +56,21 @@ namespace vk
class event
{
enum class sync_backend
{
events_v1,
events_v2,
gpu_label
};
const vk::render_device* m_device = nullptr;
sync_backend m_backend = sync_backend::events_v1;
// For events_v1 and events_v2
VkEvent m_vk_event = VK_NULL_HANDLE;
bool v2 = true;
// For gpu_label
std::unique_ptr<gpu_label> m_label{};
void resolve_dependencies(const command_buffer& cmd, const VkDependencyInfoKHR& dependency);
@ -88,38 +101,60 @@ namespace vk
operator VkSemaphore() const;
};
class gpu_debug_marker_pool
// Custom primitives
class gpu_label_pool
{
std::unique_ptr<buffer> m_buffer;
public:
gpu_label_pool(const vk::render_device& dev, u32 count);
std::tuple<VkBuffer, u64, volatile u32*> allocate();
private:
void create_impl();
const vk::render_device* pdev = nullptr;
std::unique_ptr<buffer> m_buffer{};
volatile u32* m_mapped = nullptr;
u64 m_offset = 0;
u32 m_count = 0;
void create_impl();
public:
gpu_debug_marker_pool(const vk::render_device& dev, u32 count);
std::tuple<VkBuffer, u64, volatile u32*> allocate();
const vk::render_device* pdev = nullptr;
};
class gpu_debug_marker
class gpu_label
{
protected:
enum label_constants : u32
{
set_ = 0xCAFEBABE,
reset_ = 0xDEADBEEF
};
VkBuffer m_buffer_handle = VK_NULL_HANDLE;
u32 m_buffer_offset = 0;
volatile u32* m_ptr = nullptr;
public:
gpu_label(gpu_label_pool& pool);
virtual ~gpu_label();
void signal(const vk::command_buffer& cmd, const VkDependencyInfoKHR& dependency);
void reset() { *m_ptr = label_constants::reset_; }
bool signaled() const { return label_constants::set_ == *m_ptr; }
};
class gpu_debug_marker_pool : public gpu_label_pool
{
using gpu_label_pool::gpu_label_pool;
};
class gpu_debug_marker : public gpu_label
{
std::string m_message;
bool m_printed = false;
VkDevice m_device = VK_NULL_HANDLE;
VkBuffer m_buffer = VK_NULL_HANDLE;
u64 m_buffer_offset = 0;
volatile u32* m_value = nullptr;
public:
gpu_debug_marker(gpu_debug_marker_pool& pool, std::string message);
~gpu_debug_marker();
gpu_debug_marker(const event&) = delete;
void signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access);
void dump();
void dump() const;