1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 20:22:30 +01:00

rsx: Move the host job management into common code to share with GL

This commit is contained in:
kd-11 2024-10-19 03:14:29 +03:00 committed by kd-11
parent 7fdfbe3c52
commit 6ce1816e3f
13 changed files with 251 additions and 53 deletions

View File

@ -3,6 +3,7 @@
#include "../Overlays/Shaders/shader_loading_dialog_native.h"
#include "GLGSRender.h"
#include "GLCompute.h"
#include "GLDMA.h"
#include "Emu/Memory/vm_locking.h"
#include "Emu/RSX/rsx_methods.h"
@ -180,6 +181,20 @@ void GLGSRender::on_init_thread()
backend_config.supports_normalized_barycentrics = false;
}
if (gl_caps.AMD_pinned_memory)
{
backend_config.supports_host_gpu_labels = true;
if (g_cfg.video.host_label_synchronization)
{
m_host_gpu_context_data = std::make_unique<gl::buffer>();
m_host_gpu_context_data->create(gl::buffer::target::array, 4096);
auto host_context_ptr = reinterpret_cast<rsx::host_gpu_context_t*>(m_host_gpu_context_data->map(0, 4096, gl::buffer::access::read));
m_host_dma_ctrl = std::make_unique<rsx::RSXDMAWriter>(host_context_ptr);
}
}
// Use industry standard resource alignment values as defaults
m_uniform_buffer_offset_align = 256;
m_min_texbuffer_alignment = 256;
@ -397,6 +412,7 @@ void GLGSRender::on_exit()
// TODO: Move these
gl::destroy_compute_tasks();
gl::destroy_overlay_passes();
gl::clear_dma_resources();
gl::destroy_global_texture_resources();
@ -407,6 +423,9 @@ void GLGSRender::on_exit()
m_prog_buffer.clear();
m_rtts.destroy();
m_host_dma_ctrl.reset();
m_host_gpu_context_data.reset();
for (auto &fbo : m_framebuffer_cache)
{
fbo.remove();

View File

@ -128,7 +128,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
GLProgramBuffer m_prog_buffer;
//buffer
// Draw Buffers
gl::fbo* m_draw_fbo = nullptr;
std::list<gl::framebuffer_holder> m_framebuffer_cache;
std::unique_ptr<gl::texture> m_flip_tex_color[2];
@ -137,7 +137,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
std::unique_ptr<gl::upscaler> m_upscaler;
output_scaling_mode m_output_scaling = output_scaling_mode::bilinear;
//vaos are mandatory for core profile
// VAOs are mandatory for core profile
gl::vao m_vao;
shared_mutex m_sampler_mutex;
@ -150,6 +150,9 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
// Occlusion query type, can be SAMPLES_PASSED or ANY_SAMPLES_PASSED
GLenum m_occlusion_type = GL_ANY_SAMPLES_PASSED;
// Host context for GPU-driven work
std::unique_ptr<gl::buffer> m_host_gpu_context_data;
public:
u64 get_cycles() final;

View File

@ -0,0 +1,67 @@
#include "stdafx.h"
#include "RSXDMAWriter.h"
#include "Utilities//Thread.h"
#include <util/asm.hpp>
namespace rsx
{
void RSXDMAWriter::update()
{
if (m_dispatch_handlers.empty())
{
m_job_queue.clear();
return;
}
while (!m_job_queue.empty())
{
const auto job = m_job_queue.front();
if (const auto dispatch = m_dispatch_handlers.find(job.dispatch_class);
dispatch == m_dispatch_handlers.end() || dispatch->second.handler(m_host_context_ptr, &job))
{
// No handler registered, or callback consumed the job
m_job_queue.pop_front();
continue;
}
// Dispatcher found and rejected the job. Stop, we'll try again later.
break;
}
}
void RSXDMAWriter::register_handler(host_dispatch_handler_t handler)
{
m_dispatch_handlers[handler.dispatch_class] = handler;
}
void RSXDMAWriter::deregister_handler(int dispatch_class)
{
m_dispatch_handlers.erase(dispatch_class);
}
void RSXDMAWriter::enqueue(const host_gpu_write_op_t& request)
{
m_job_queue.push_back(request);
}
void RSXDMAWriter::drain_label_queue()
{
if (!m_host_context_ptr)
{
return;
}
// FIXME: This is a busy wait, consider yield to improve responsiveness on weak devices.
while (!m_host_context_ptr->in_flight_commands_completed())
{
utils::pause();
if (thread_ctrl::state() == thread_state::aborting)
{
break;
}
}
}
}

View File

@ -0,0 +1,115 @@
#pragma once
#include <util/types.hpp>
#include <unordered_map>
#include <functional>
#include <deque>
namespace rsx
{
struct host_gpu_context_t
{
u64 magic = 0xCAFEBABE;
u64 event_counter = 0;
u64 texture_load_request_event = 0;
u64 texture_load_complete_event = 0;
u64 last_label_acquire_event = 0;
u64 last_label_release2_event = 0;
u64 commands_complete_event = 0;
inline u64 inc_counter() volatile
{
// Workaround for volatile increment warning. GPU can see this value directly, but currently we do not modify it on the device.
event_counter = event_counter + 1;
return event_counter;
}
inline bool in_flight_commands_completed() const volatile
{
return last_label_release2_event == commands_complete_event;
}
inline bool texture_loads_completed() const volatile
{
// Return true if all texture load requests are done.
return texture_load_complete_event == texture_load_request_event;
}
inline bool has_unflushed_texture_loads() const volatile
{
return texture_load_request_event > last_label_release2_event;
}
inline u64 on_texture_load_acquire() volatile
{
texture_load_request_event = inc_counter();
return texture_load_request_event;
}
inline void on_texture_load_release() volatile
{
// Normally released by the host device, but implemented nonetheless for software fallback
texture_load_complete_event = texture_load_request_event;
}
inline u64 on_label_acquire() volatile
{
last_label_acquire_event = inc_counter();
return last_label_acquire_event;
}
inline void on_label_release() volatile
{
last_label_release2_event = last_label_acquire_event;
}
inline bool needs_label_release() const volatile
{
return last_label_acquire_event > last_label_release2_event;
}
};
struct host_gpu_write_op_t
{
int dispatch_class = 0;
void* userdata = nullptr;
};
struct host_dispatch_handler_t
{
int dispatch_class = 0;
std::function<bool(const volatile host_gpu_context_t*, const host_gpu_write_op_t*)> handler;
};
class RSXDMAWriter
{
public:
RSXDMAWriter(void* mem)
: m_host_context_ptr(new (mem)host_gpu_context_t)
{}
RSXDMAWriter(host_gpu_context_t* pctx)
: m_host_context_ptr(pctx)
{}
void update();
void register_handler(host_dispatch_handler_t handler);
void deregister_handler(int dispatch_class);
void enqueue(const host_gpu_write_op_t& request);
void drain_label_queue();
volatile host_gpu_context_t* host_ctx() const
{
return m_host_context_ptr;
}
private:
std::unordered_map<int, host_dispatch_handler_t> m_dispatch_handlers;
volatile host_gpu_context_t* m_host_context_ptr = nullptr;
std::deque<host_gpu_write_op_t> m_job_queue;
};
}

View File

@ -1162,6 +1162,7 @@ namespace rsx
// Update other sub-units
zcull_ctrl->update(this);
m_host_dma_ctrl->update();
}
// Execute FIFO queue

View File

@ -35,6 +35,8 @@
#include "NV47/FW/GRAPH_backend.h"
#include "Host/RSXDMAWriter.h"
extern atomic_t<bool> g_user_asked_for_frame_capture;
extern atomic_t<bool> g_disable_frame_limit;
extern rsx::frame_trace_data frame_debug;
@ -212,6 +214,9 @@ namespace rsx
// Context
context* m_ctx = nullptr;
// Host DMA
std::unique_ptr<RSXDMAWriter> m_host_dma_ctrl;
public:
atomic_t<u64> new_get_put = u64{umax};
u32 restore_point = 0;

View File

@ -867,8 +867,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0,
VMM_ALLOCATION_POOL_SYSTEM);
m_host_data_ptr = new (m_host_object_data->map(0, 0x100000)) vk::host_data_t();
ensure(m_host_data_ptr->magic == 0xCAFEBABE);
m_host_dma_ctrl = std::make_unique<rsx::RSXDMAWriter>(m_host_object_data->map(0, 0x10000));
}
else
{
@ -1784,6 +1783,11 @@ void VKGSRender::flush_command_queue(bool hard_sync, bool do_not_switch)
m_current_command_buffer->begin();
}
std::pair<volatile vk::host_data_t*, VkBuffer> VKGSRender::map_host_object_data() const
{
return { m_host_dma_ctrl->host_ctx(), m_host_object_data->value};
}
bool VKGSRender::release_GCM_label(u32 address, u32 args)
{
if (!backend_config.supports_host_gpu_labels)
@ -1791,25 +1795,13 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
return false;
}
auto drain_label_queue = [this]()
{
while (m_host_data_ptr->last_label_release_event > m_host_data_ptr->commands_complete_event)
{
utils::pause();
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
if (thread_ctrl::state() == thread_state::aborting)
{
break;
}
}
};
ensure(m_host_data_ptr);
if (m_host_data_ptr->texture_load_complete_event == m_host_data_ptr->texture_load_request_event)
if (host_ctx->texture_loads_completed())
{
// All texture loads already seen by the host GPU
// Wait for all previously submitted labels to be flushed
drain_label_queue();
m_host_dma_ctrl->drain_label_queue();
return false;
}
@ -1821,13 +1813,13 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
// NVIDIA GPUs can disappoint when DMA blocks straddle VirtualAlloc boundaries.
// Take the L and try the fallback.
rsx_log.warning("Host label update at 0x%x was not possible.", address);
drain_label_queue();
m_host_dma_ctrl->drain_label_queue();
return false;
}
m_host_data_ptr->last_label_release_event = m_host_data_ptr->inc_counter();
const auto release_event_id = host_ctx->on_label_acquire();
if (m_host_data_ptr->texture_load_request_event > m_host_data_ptr->last_label_submit_event)
if (host_ctx->has_unflushed_texture_loads())
{
if (vk::is_renderpass_open(*m_current_command_buffer))
{
@ -1842,14 +1834,15 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
auto cmd = m_secondary_cb_list.next();
cmd->begin();
vkCmdUpdateBuffer(*cmd, mapping.second->value, mapping.first, 4, &write_data);
vkCmdUpdateBuffer(*cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::commands_complete_event), 8, const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
vkCmdUpdateBuffer(*cmd, m_host_object_data->value, ::offset32(&vk::host_data_t::commands_complete_event), 8, &release_event_id);
cmd->end();
vk::queue_submit_t submit_info = { m_device->get_graphics_queue(), nullptr };
cmd->submit(submit_info);
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
host_ctx->on_label_release();
}
return true;
}
@ -2516,15 +2509,15 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
m_current_command_buffer->flags &= ~vk::command_buffer::cb_has_open_query;
}
if (m_host_data_ptr && m_host_data_ptr->last_label_release_event > m_host_data_ptr->last_label_submit_event)
if (m_host_dma_ctrl && m_host_dma_ctrl->host_ctx()->needs_label_release())
{
vkCmdUpdateBuffer(*m_current_command_buffer,
m_host_object_data->value,
::offset32(&vk::host_data_t::commands_complete_event),
sizeof(u64),
const_cast<u64*>(&m_host_data_ptr->last_label_release_event));
const_cast<u64*>(&m_host_dma_ctrl->host_ctx()->last_label_acquire_event));
m_host_data_ptr->last_label_submit_event = m_host_data_ptr->last_label_release_event;
m_host_dma_ctrl->host_ctx()->on_label_release();
}
m_current_command_buffer->end();

View File

@ -1,6 +1,4 @@
#pragma once
#include "Emu/RSX/GSRender.h"
#include "Emu/Cell/timers.hpp"
#include "upscalers/upscaling.h"
@ -19,15 +17,23 @@
#include "VKFramebuffer.h"
#include "VKShaderInterpreter.h"
#include "VKQueryPool.h"
#include "../GCM.h"
#include "util/asm.hpp"
#include "Emu/RSX/GCM.h"
#include "Emu/RSX/GSRender.h"
#include "Emu/RSX/Host/RSXDMAWriter.h"
#include <thread>
#include <optional>
using namespace vk::vmm_allocation_pool_; // clang workaround.
using namespace vk::upscaling_flags_; // ditto
namespace vk
{
using host_data_t = rsx::host_gpu_context_t;
}
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
@ -118,7 +124,6 @@ private:
vk::command_buffer_chain<VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
volatile vk::host_data_t* m_host_data_ptr = nullptr;
std::unique_ptr<vk::buffer> m_host_object_data;
vk::descriptor_pool m_descriptor_pool;
@ -274,7 +279,7 @@ public:
void end_conditional_rendering() override;
// Host sync object
inline std::pair<volatile vk::host_data_t*, VkBuffer> map_host_object_data() { return { m_host_data_ptr, m_host_object_data->value }; }
std::pair<volatile vk::host_data_t*, VkBuffer> map_host_object_data() const;
// GRAPH backend
void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override;

View File

@ -6,6 +6,7 @@
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/rsx_utils.h"
#include "Emu/RSX/rsx_cache.h"
#include "Utilities/mutex.h"
#include "util/asm.hpp"

View File

@ -1246,8 +1246,8 @@ namespace vk
// Queue a sync update on the CB doing the load
auto [host_data, host_buffer] = static_cast<VKGSRender*>(rsxthr)->map_host_object_data();
ensure(host_data);
const auto event_id = host_data->inc_counter();
host_data->texture_load_request_event = event_id;
const auto event_id = host_data->on_texture_load_acquire();
vkCmdUpdateBuffer(cmd2, host_buffer, ::offset32(&vk::host_data_t::texture_load_complete_event), sizeof(u64), &event_id);
}
}

View File

@ -18,25 +18,6 @@ namespace vk
gpu = 1
};
struct host_data_t // Pick a better name
{
u64 magic = 0xCAFEBABE;
u64 event_counter = 0;
u64 texture_load_request_event = 0;
u64 texture_load_complete_event = 0;
u64 last_label_release_event = 0;
u64 last_label_submit_event = 0;
u64 commands_complete_event = 0;
u64 last_label_request_timestamp = 0;
inline u64 inc_counter() volatile
{
// Workaround for volatile increment warning. GPU can see this value directly, but currently we do not modify it on the device.
event_counter = event_counter + 1;
return event_counter;
}
};
struct fence
{
atomic_t<bool> flushed = false;

View File

@ -104,6 +104,7 @@
<ClCompile Include="Emu\perf_monitor.cpp" />
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\draw_call.cpp" />
<ClCompile Include="Emu\RSX\NV47\FW\reg_context.cpp" />
<ClCompile Include="Emu\RSX\NV47\HW\common.cpp" />
@ -617,6 +618,7 @@
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h" />
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.hpp" />
<ClInclude Include="Emu\RSX\NV47\FW\draw_call.inc.h" />
<ClInclude Include="Emu\RSX\NV47\FW\GRAPH_backend.h" />

View File

@ -1300,6 +1300,9 @@
<ClCompile Include="Emu\RSX\gcm_enums.cpp">
<Filter>Emu\GPU\RSX\NV47\FW</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Host\RSXDMAWriter.cpp">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -2620,6 +2623,9 @@
<ClInclude Include="Emu\RSX\color_utils.h">
<Filter>Emu\GPU\RSX\Utils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Host\RSXDMAWriter.h">
<Filter>Emu\GPU\RSX\Host Mini-Driver</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">