From 1b305bf789f95d18f382249ac972cbc73f4f9717 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 28 May 2022 22:28:40 +0300 Subject: [PATCH] gl: Workaround for poor AMD OpenGL performance - Turns out the AMD driver really hates it if you render with a mapped index buffer. The driver internally seems to make a copy of the consumed indices and uses that. Very slow. I was able to isolate this after observing that glDrawArrays is not entirely shit, but glDrawElements duration scaled linearly with the number of vertices. --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 4 +- rpcs3/Emu/RSX/GL/GLHelpers.h | 69 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c9904a9e16..a30b477c45 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -236,14 +236,14 @@ void GLGSRender::on_init_thread() m_vertex_env_buffer = std::make_unique(); m_texture_parameters_buffer = std::make_unique(); m_vertex_layout_buffer = std::make_unique(); - m_index_ring_buffer = std::make_unique(); + m_index_ring_buffer = gl_caps.vendor_AMD ? std::make_unique() : std::make_unique(); m_vertex_instructions_buffer = std::make_unique(); m_fragment_instructions_buffer = std::make_unique(); m_raster_env_ring_buffer = std::make_unique(); } m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000); - m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000); + m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000); m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000); m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_fragment_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index c91da6a9fb..6d9e605c4b 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -812,6 +812,11 @@ namespace gl public: + virtual void bind() + { + buffer::bind(); + } + virtual void recreate(GLsizeiptr size, const void* data = nullptr) { if (m_id) @@ -890,6 +895,8 @@ namespace gl virtual void unmap() {} + virtual void flush() {} + //Notification of a draw command virtual void notify() { @@ -1011,6 +1018,68 @@ namespace gl void notify() override {} }; + // A non-persistent ring buffer + // Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant + // Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers) + class transient_ring_buffer : public ring_buffer + { + bool dirty = false; + + void* map_internal(u32 offset, u32 length) + { + flush(); + + dirty = true; + return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + } + + public: + + void bind() override + { + flush(); + buffer::bind(); + } + + void recreate(GLsizeiptr size, const void* data = nullptr) override + { + if (m_id) + { + m_fence.wait_for_signal(); + remove(); + } + + buffer::create(); + save_binding_state save(current_target(), *this); + DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT); + + m_data_loc = 0; + m_size = ::narrow(size); + m_memory_type = memory_type::host_visible; + } + + std::pair alloc_from_heap(u32 alloc_size, u16 alignment) override + { + ensure(m_memory_mapping == nullptr); + const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment); + return { map_internal(allocation.second, alloc_size), allocation.second }; + } + + void flush() override + { + if (dirty) + { + buffer::unmap(); + dirty = false; + } + } + + void unmap() override + { + flush(); + } + }; + class buffer_view { buffer* m_buffer = nullptr;