1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 02:32:36 +01:00

gl: Avoid UBO/SSBO binding index collisions

- Some drivers don't like this. Actually only RADV.
- Almost all GPUs going back 15 years have a large number of UBO slots but limited SSBO slots.
  Move UBO slots up as we have tons more headroom there.
This commit is contained in:
kd-11 2022-09-18 01:54:07 +03:00 committed by kd-11
parent c4db65cc08
commit df36c44bc2
6 changed files with 41 additions and 11 deletions

View File

@ -4,6 +4,26 @@
namespace gl
{
struct bind_image_view_safe
{
GLuint m_layer;
GLenum m_target;
GLuint m_value;
gl::command_context& m_commands;
bind_image_view_safe(gl::command_context& cmd, GLuint layer, gl::texture_view* value)
: m_layer(layer), m_target(value->target()), m_commands(cmd)
{
m_value = cmd->get_bound_texture(layer, m_target);
value->bind(cmd, layer);
}
~bind_image_view_safe()
{
m_commands->bind_texture(m_layer, m_target, m_value);
}
};
void compute_task::initialize()
{
// Set up optimal kernel size
@ -311,11 +331,13 @@ namespace gl
m_sampler.apply_defaults();
}
// This method is callable in sensitive code and must restore the GL state on exit
gl::saved_sampler_state save_0(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
gl::saved_sampler_state save_1(GL_COMPUTE_BUFFER_SLOT(1), m_sampler);
depth_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(0));
stencil_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(1));
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(0), depth_view);
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(1), stencil_view);
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
@ -360,9 +382,10 @@ namespace gl
m_sampler.apply_defaults();
}
// This method is callable in sensitive code and must restore the GL state on exit
gl::saved_sampler_state save(GL_COMPUTE_BUFFER_SLOT(0), m_sampler);
gl::bind_image_view_safe(cmd, GL_COMPUTE_BUFFER_SLOT(0), data_view);
data_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(0));
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
@ -380,7 +403,8 @@ namespace gl
const std::pair<std::string_view, std::string> repl_list[] =
{
{ "%set, ", "" },
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
{ "%image_slot", std::to_string(GL_COMPUTE_IMAGE_SLOT(0)) },
{ "%ssbo_slot", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
{ "%ws", std::to_string(optimal_group_size) },
{ "%wks", std::to_string(optimal_kernel_size) }
};

View File

@ -31,7 +31,7 @@ std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::stri
void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
{
OS << "#version 430\n";
OS << "layout(std140, binding = 0) uniform VertexContextBuffer\n";
OS << "layout(std140, binding = " << GL_VERTEX_PARAMS_BIND_SLOT << ") uniform VertexContextBuffer\n";
OS << "{\n";
OS << " mat4 scale_offset_mat;\n";
OS << " ivec4 user_clip_enabled[2];\n";
@ -42,7 +42,7 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << " float z_far;\n";
OS << "};\n\n";
OS << "layout(std140, binding = 1) uniform VertexLayoutBuffer\n";
OS << "layout(std140, binding = " << GL_VERTEX_LAYOUT_BIND_SLOT << ") uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " uint vertex_index_offset;\n";
@ -66,7 +66,7 @@ void GLVertexDecompilerThread::insertConstants(std::stringstream& OS, const std:
{
if (PI.name.starts_with("vc["))
{
OS << "layout(std140, binding = 2) uniform VertexConstantsBuffer\n";
OS << "layout(std140, binding = " << GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT << ") uniform VertexConstantsBuffer\n";
OS << "{\n";
OS << " vec4 " << PI.name << ";\n";
OS << "};\n\n";

View File

@ -8,7 +8,7 @@
#define GL_STREAM_BUFFER_START (GL_STENCIL_MIRRORS_START + 16)
#define GL_TEMP_IMAGE_SLOT 31
#define UBO_SLOT(x) (x)
#define UBO_SLOT(x) (x + 8)
#define SSBO_SLOT(x) (x)
#define GL_VERTEX_PARAMS_BIND_SLOT UBO_SLOT(0)

View File

@ -287,6 +287,12 @@ namespace gl
glUseProgram(program);
}
GLuint get_bound_texture(GLuint layer, GLenum target)
{
ensure(layer < 48);
return bound_textures[layer][target];
}
void bind_texture(GLuint layer, GLenum target, GLuint name, GLboolean force = GL_FALSE)
{
ensure(layer < 48);

View File

@ -2,8 +2,8 @@ R"(
#version 450
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
#define SSBO_LOCATION(x) (x + %loc)
#define IMAGE_LOCATION(x) (x)
#define IMAGE_LOCATION(x) (x + %image_slot)
#define SSBO_LOCATION(x) (x + %ssbo_slot)
layout(%set, binding=IMAGE_LOCATION(0)) uniform writeonly restrict image2D output2D;

View File

@ -250,7 +250,7 @@ namespace vk
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
const u32 src_queue_family = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? current_queue_family : VK_QUEUE_FAMILY_IGNORED;
const u32 dst_queue_family2 = info.sharingMode == VK_SHARING_MODE_EXCLUSIVE ? dst_queue_family : VK_QUEUE_FAMILY_IGNORED;
change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family2, ~0u, 0u);
change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, src_queue_family, dst_queue_family2, ~0u, 0u);
}
current_layout = new_layout;