1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-23 03:02:53 +01:00

rsx/gl/vk: Improvements

- gl: Do not call makeCurrent every flip - it is already called in set_current()
- gl: Improve ring buffer behaviour; use sliding window to view buffers larger than maximum viewable hardware range
  NV hardware can only view 128M at a time
- gl/vk: Bump transform constant heap size When lots of draw calls are issued, the heap is exhaused very fast (8k per draw)
- gl: Remove CLIENT_STORAGE_BIT from ring buffers. Performance is marginally better without this flag (at least on windows)
This commit is contained in:
kd-11 2018-02-22 11:13:01 +03:00
parent 07cbf3da48
commit 6b23e733d0
6 changed files with 120 additions and 32 deletions

View File

@ -736,14 +736,16 @@ void GLGSRender::on_init_thread()
m_index_ring_buffer.reset(new gl::ring_buffer());
}
m_attrib_ring_buffer->create(gl::buffer::target::texture, std::min<GLsizeiptr>(m_max_texbuffer_size, 256 * 0x100000));
m_index_ring_buffer->create(gl::buffer::target::element_array, std::min<GLsizeiptr>(m_max_texbuffer_size, 64 * 0x100000));
m_transform_constants_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
m_fragment_constants_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
m_vertex_state_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000);
m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000);
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size());
m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size());
m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, m_max_texbuffer_size);
m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, m_max_texbuffer_size);
m_gl_persistent_stream_buffer.copy_from(m_persistent_stream_view);
m_gl_volatile_stream_buffer.copy_from(m_volatile_stream_view);
m_vao.element_array_buffer = *m_index_ring_buffer;

View File

@ -279,6 +279,8 @@ private:
gl::texture_cache m_gl_texture_cache;
gl::buffer_view m_persistent_stream_view;
gl::buffer_view m_volatile_stream_view;
gl::texture m_gl_persistent_stream_buffer;
gl::texture m_gl_volatile_stream_buffer;

View File

@ -87,6 +87,7 @@ namespace gl
bool initialized = false;
bool vendor_INTEL = false;
bool vendor_AMD = false;
bool vendor_NVIDIA = false;
void initialize()
{
@ -104,35 +105,35 @@ namespace gl
if (ext_name == "GL_ARB_shader_draw_parameters")
{
ARB_shader_draw_parameters_supported = true;
find_count --;
find_count--;
continue;
}
if (ext_name == "GL_EXT_direct_state_access")
{
EXT_dsa_supported = true;
find_count --;
find_count--;
continue;
}
if (ext_name == "GL_ARB_direct_state_access")
{
ARB_dsa_supported = true;
find_count --;
find_count--;
continue;
}
if (ext_name == "GL_ARB_buffer_storage")
{
ARB_buffer_storage_supported = true;
find_count --;
find_count--;
continue;
}
if (ext_name == "GL_ARB_texture_buffer_object")
{
ARB_texture_buffer_supported = true;
find_count --;
find_count--;
continue;
}
@ -195,6 +196,10 @@ namespace gl
if (!EXT_dsa_supported && glGetTextureImageEXT && glTextureBufferRangeEXT)
EXT_dsa_supported = true;
}
else if (vendor_string.find("nvidia") != std::string::npos)
{
vendor_NVIDIA = true;
}
#ifdef _WIN32
else if (vendor_string.find("amd") != std::string::npos || vendor_string.find("ati") != std::string::npos)
{
@ -864,7 +869,7 @@ namespace gl
buffer::create();
glBindBuffer((GLenum)m_target, m_id);
glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_CLIENT_STORAGE_BIT | GL_MAP_COHERENT_BIT);
glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
m_memory_mapping = glMapBufferRange((GLenum)m_target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
verify(HERE), m_memory_mapping != nullptr;
@ -886,15 +891,19 @@ namespace gl
if ((offset + alloc_size) > m_size)
{
if (!m_fence.is_empty())
{
m_fence.wait_for_signal();
}
else
{
LOG_ERROR(RSX, "OOM Error: Ring buffer was likely being used without notify() being called");
glFinish();
}
m_data_loc = 0;
offset = 0;
}
if (!m_data_loc)
m_fence.reset();
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
m_data_loc = align(offset + alloc_size, 256);
return std::make_pair(((char*)m_memory_mapping) + offset, offset);
@ -928,7 +937,8 @@ namespace gl
//Notification of a draw command
virtual void notify()
{
if (m_fence.is_empty())
//Insert fence about 25% into the buffer
if (m_fence.is_empty() && (m_data_loc > (m_size >> 2)))
m_fence.reset();
}
};
@ -1046,6 +1056,69 @@ namespace gl
void notify() override {}
};
class buffer_view
{
buffer* m_buffer = nullptr;
u32 m_offset = 0;
u32 m_range = 0;
GLenum m_format = GL_R8UI;
public:
buffer_view(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
: m_buffer(_buffer), m_offset(offset), m_range(range), m_format(format)
{}
buffer_view()
{}
void update(buffer *_buffer, u32 offset, u32 range, GLenum format = GL_R8UI)
{
m_buffer = _buffer;
m_offset = offset;
m_range = range;
m_format = format;
}
u32 offset() const
{
return m_offset;
}
u32 range() const
{
return m_range;
}
u32 format() const
{
return m_format;
}
buffer* buffer() const
{
return m_buffer;
}
bool in_range(u32 address, u32 size, u32& new_offset) const
{
if (address < m_offset)
return false;
const u32 _offset = address - m_offset;
if (m_range < _offset)
return false;
const auto remaining = m_range - _offset;
if (size <= remaining)
{
new_offset = _offset;
return true;
}
return false;
}
};
class vao
{
template<buffer::target BindId, uint GetStateId>
@ -1681,6 +1754,11 @@ namespace gl
__glcheck glTextureBufferRange(id(), gl_format_type, buf.id(), offset, length);
}
void copy_from(buffer_view &view)
{
copy_from(*view.buffer(), view.format(), view.offset(), view.range());
}
void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings)
{
buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf);
@ -2765,17 +2843,4 @@ namespace gl
set_id(0);
}
};
class buffer_view : public buffer
{
public:
buffer_view(GLuint id) : buffer(id)
{
}
~buffer_view()
{
set_id(0);
}
};
}

View File

@ -233,12 +233,32 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
m_vertex_cache->store_range(storage_address, GL_R8UI, required.first, persistent_mapping.second);
}
}
if (!m_persistent_stream_view.in_range(upload_info.persistent_mapping_offset, required.first, upload_info.persistent_mapping_offset))
{
const size_t view_size = ((upload_info.persistent_mapping_offset + m_max_texbuffer_size) > m_attrib_ring_buffer->size()) ?
(m_attrib_ring_buffer->size() - upload_info.persistent_mapping_offset) : m_max_texbuffer_size;
m_persistent_stream_view.update(m_attrib_ring_buffer.get(), upload_info.persistent_mapping_offset, (u32)view_size);
m_gl_persistent_stream_buffer.copy_from(m_persistent_stream_view);
upload_info.persistent_mapping_offset = 0;
}
}
if (required.second > 0)
{
volatile_mapping = m_attrib_ring_buffer->alloc_from_heap(required.second, m_min_texbuffer_alignment);
upload_info.volatile_mapping_offset = volatile_mapping.second;
if (!m_volatile_stream_view.in_range(upload_info.volatile_mapping_offset, required.second, upload_info.volatile_mapping_offset))
{
const size_t view_size = ((upload_info.volatile_mapping_offset + m_max_texbuffer_size) > m_attrib_ring_buffer->size()) ?
(m_attrib_ring_buffer->size() - upload_info.volatile_mapping_offset) : m_max_texbuffer_size;
m_volatile_stream_view.update(m_attrib_ring_buffer.get(), upload_info.volatile_mapping_offset, (u32)view_size);
m_gl_volatile_stream_buffer.copy_from(m_volatile_stream_view);
upload_info.volatile_mapping_offset = 0;
}
}
//Write all the data

View File

@ -40,7 +40,7 @@ namespace vk
//NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px)
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 64
#define VK_UBO_RING_BUFFER_SIZE_M 128
#define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_MAX_ASYNC_CB_COUNT 64

View File

@ -69,6 +69,5 @@ void gl_gs_frame::flip(draw_context_t context, bool skip_frame)
//Do not swap buffers if frame skip is active
if (skip_frame) return;
((QOpenGLContext*)context)->makeCurrent(this);
((QOpenGLContext*)context)->swapBuffers(this);
}