1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-23 03:02:53 +01:00

vk: Improve overlay passes for resolve/unreolve

- Refactor overlays and resolve passes to support use of push constants instead of relying buffer map/unmap
- Add support for nvidia resolve (NV is the only vendor not supporting shader_stencil_export)
This commit is contained in:
kd-11 2019-06-08 20:26:27 +03:00 committed by kd-11
parent c655036920
commit ca82dd7200
5 changed files with 278 additions and 94 deletions

View File

@ -2015,10 +2015,6 @@ void VKGSRender::clear_surface(u32 mask)
else
fmt::throw_exception("Unreachable" HERE);
}
//Fush unconditionally - parameters might not persist
//TODO: Better parameter management for overlay passes
flush_command_queue();
}
for (auto &rtt : m_rtts.m_bound_render_targets)

View File

@ -586,12 +586,14 @@ namespace vk
std::unordered_map<VkFormat, VkFormatProperties> m_format_properties;
gpu_formats_support m_formats_support{};
gpu_shader_types_support m_shader_types_support{};
bool m_stencil_export_support = false;
std::unique_ptr<mem_allocator_base> m_allocator;
VkDevice dev = VK_NULL_HANDLE;
void get_physical_device_features(VkPhysicalDeviceFeatures& features)
{
supported_extensions instance_extensions(supported_extensions::instance);
supported_extensions device_extensions(supported_extensions::device, nullptr, pgpu);
if (!instance_extensions.is_supported("VK_KHR_get_physical_device_properties2"))
{
@ -599,8 +601,6 @@ namespace vk
}
else
{
supported_extensions device_extensions(supported_extensions::device, nullptr, pgpu);
VkPhysicalDeviceFeatures2KHR features2;
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = nullptr;
@ -621,6 +621,8 @@ namespace vk
m_shader_types_support.allow_int8 = !!shader_support_info.shaderInt8;
features = features2.features;
}
m_stencil_export_support = device_extensions.is_supported("VK_EXT_shader_stencil_export");
}
public:
@ -774,6 +776,11 @@ namespace vk
return m_shader_types_support;
}
bool get_shader_stencil_export_support() const
{
return m_stencil_export_support;
}
mem_allocator_base* get_allocator() const
{
return m_allocator.get();

View File

@ -105,10 +105,17 @@ namespace vk
layout_info.setLayoutCount = 1;
layout_info.pSetLayouts = &m_descriptor_layout;
std::vector<VkPushConstantRange> push_constants = get_push_constants();
if (!push_constants.empty())
{
layout_info.pushConstantRangeCount = u32(push_constants.size());
layout_info.pPushConstantRanges = push_constants.data();
}
CHECK_RESULT(vkCreatePipelineLayout(*m_device, &layout_info, nullptr, &m_pipeline_layout));
}
virtual void update_uniforms(vk::glsl::program* /*program*/)
virtual void update_uniforms(vk::command_buffer& /*cmd*/, vk::glsl::program* /*program*/)
{
}
@ -131,6 +138,14 @@ namespace vk
return fs_inputs;
}
virtual void get_dynamic_state_entries(VkDynamicState* state_descriptors, VkPipelineDynamicStateCreateInfo& info)
{}
virtual std::vector<VkPushConstantRange> get_push_constants()
{
return {};
}
void upload_vertex_data(f32 *data, u32 count)
{
check_heap();
@ -171,6 +186,8 @@ namespace vk
dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
dynamic_state_descriptors[dynamic_state_info.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
get_dynamic_state_entries(dynamic_state_descriptors, dynamic_state_info);
dynamic_state_info.pDynamicStates = dynamic_state_descriptors;
VkVertexInputBindingDescription vb = { 0, 16, VK_VERTEX_INPUT_RATE_VERTEX };
@ -214,7 +231,7 @@ namespace vk
return result;
}
void load_program(const vk::command_buffer& cmd, VkRenderPass pass, const std::vector<vk::image_view*>& src)
void load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector<vk::image_view*>& src)
{
vk::glsl::program *program = nullptr;
auto found = m_program_cache.find(pass);
@ -241,7 +258,7 @@ namespace vk
VK_FALSE, 0.f, 1.f, 0.f, 0.f, m_sampler_filter, m_sampler_filter, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK);
}
update_uniforms(program);
update_uniforms(cmd, program);
program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set);
@ -400,7 +417,7 @@ namespace vk
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
}
void update_uniforms(vk::glsl::program* /*program*/) override
void update_uniforms(vk::command_buffer& /*cmd*/, vk::glsl::program* /*program*/) override
{
m_ubo_offset = (u32)m_ubo.alloc<256>(128);
auto dst = (f32*)m_ubo.map(m_ubo_offset, 128);
@ -694,7 +711,7 @@ namespace vk
false, true, desc->data, owner_uid);
}
void update_uniforms(vk::glsl::program* /*program*/) override
void update_uniforms(vk::command_buffer& /*cmd*/, vk::glsl::program* /*program*/) override
{
m_ubo_offset = (u32)m_ubo.alloc<256>(128);
auto dst = (f32*)m_ubo.map(m_ubo_offset, 128);
@ -808,7 +825,7 @@ namespace vk
{
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n"
"layout(push_constant) uniform static_data{ vec4 regs[2]; };\n"
"layout(location=0) out vec2 tc0;\n"
"layout(location=1) out vec4 color;\n"
"layout(location=2) out vec4 mask;\n"
@ -846,19 +863,29 @@ namespace vk
renderpass_config.set_attachment_count(1);
}
void update_uniforms(vk::glsl::program* /*program*/) override
std::vector<VkPushConstantRange> get_push_constants() override
{
m_ubo_offset = (u32)m_ubo.alloc<256>(128);
auto dst = (f32*)m_ubo.map(m_ubo_offset, 128);
dst[0] = clear_color.r;
dst[1] = clear_color.g;
dst[2] = clear_color.b;
dst[3] = clear_color.a;
dst[4] = colormask.r;
dst[5] = colormask.g;
dst[6] = colormask.b;
dst[7] = colormask.a;
m_ubo.unmap();
VkPushConstantRange constant;
constant.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
constant.offset = 0;
constant.size = 32;
return { constant };
}
void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override
{
f32 data[8];
data[0] = clear_color.r;
data[1] = clear_color.g;
data[2] = clear_color.b;
data[3] = clear_color.a;
data[4] = colormask.r;
data[5] = colormask.g;
data[6] = colormask.b;
data[7] = colormask.a;
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 32, data);
}
void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) override

View File

@ -37,8 +37,20 @@ namespace vk
std::unordered_map<VkFormat, std::unique_ptr<vk::cs_unresolve_task>> g_unresolve_helpers;
std::unique_ptr<vk::depthonly_resolve> g_depth_resolver;
std::unique_ptr<vk::depthonly_unresolve> g_depth_unresolver;
std::unique_ptr<vk::depthstencil_resolve_AMD> g_depthstencil_resolverAMD;
std::unique_ptr<vk::depthstencil_unresolve_AMD> g_depthstencil_unresolverAMD;
std::unique_ptr<vk::stencilonly_resolve> g_stencil_resolver;
std::unique_ptr<vk::stencilonly_unresolve> g_stencil_unresolver;
std::unique_ptr<vk::depthstencil_resolve_EXT> g_depthstencil_resolver;
std::unique_ptr<vk::depthstencil_unresolve_EXT> g_depthstencil_unresolver;
template <typename T, typename ...Args>
void initialize_pass(std::unique_ptr<T>& ptr, vk::render_device& dev, Args&&... extras)
{
if (!ptr)
{
ptr = std::make_unique<T>(std::forward<Args>(extras)...);
ptr->create(dev);
}
}
void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src)
{
@ -63,22 +75,23 @@ namespace vk
if (src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
{
if (!g_depthstencil_resolverAMD)
if (dev.get_shader_stencil_export_support())
{
g_depthstencil_resolverAMD.reset(new vk::depthstencil_resolve_AMD());
g_depthstencil_resolverAMD->create(dev);
initialize_pass(g_depthstencil_resolver, dev);
g_depthstencil_resolver->run(cmd, src, dst, renderpass);
}
else
{
initialize_pass(g_depth_resolver, dev);
initialize_pass(g_stencil_resolver, dev);
g_depthstencil_resolverAMD->run(cmd, src, dst, renderpass);
g_depth_resolver->run(cmd, src, dst, renderpass);
g_stencil_resolver->run(cmd, src, dst, renderpass);
}
}
else
{
if (!g_depth_resolver)
{
g_depth_resolver.reset(new vk::depthonly_resolve());
g_depth_resolver->create(dev);
}
initialize_pass(g_depth_resolver, dev);
g_depth_resolver->run(cmd, src, dst, renderpass);
}
}
@ -107,22 +120,23 @@ namespace vk
if (src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
{
if (!g_depthstencil_unresolverAMD)
if (dev.get_shader_stencil_export_support())
{
g_depthstencil_unresolverAMD.reset(new vk::depthstencil_unresolve_AMD());
g_depthstencil_unresolverAMD->create(dev);
initialize_pass(g_depthstencil_unresolver, dev);
g_depthstencil_unresolver->run(cmd, dst, src, renderpass);
}
else
{
initialize_pass(g_depth_unresolver, dev);
initialize_pass(g_stencil_unresolver, dev);
g_depthstencil_unresolverAMD->run(cmd, dst, src, renderpass);
g_depth_unresolver->run(cmd, dst, src, renderpass);
g_stencil_unresolver->run(cmd, dst, src, renderpass);
}
}
else
{
if (!g_depth_unresolver)
{
g_depth_unresolver.reset(new vk::depthonly_unresolve());
g_depth_unresolver->create(dev);
}
initialize_pass(g_depth_unresolver, dev);
g_depth_unresolver->run(cmd, dst, src, renderpass);
}
}
@ -149,10 +163,16 @@ namespace vk
g_depth_resolver.reset();
}
if (g_depthstencil_resolverAMD)
if (g_stencil_resolver)
{
g_depthstencil_resolverAMD->destroy();
g_depthstencil_resolverAMD.reset();
g_stencil_resolver->destroy();
g_stencil_resolver.reset();
}
if (g_depthstencil_resolver)
{
g_depthstencil_resolver->destroy();
g_depthstencil_resolver.reset();
}
if (g_depth_unresolver)
@ -161,10 +181,16 @@ namespace vk
g_depth_unresolver.reset();
}
if (g_depthstencil_unresolverAMD)
if (g_stencil_unresolver)
{
g_depthstencil_unresolverAMD->destroy();
g_depthstencil_unresolverAMD.reset();
g_stencil_unresolver->destroy();
g_stencil_unresolver.reset();
}
if (g_depthstencil_unresolver)
{
g_depthstencil_unresolver->destroy();
g_depthstencil_unresolver.reset();
}
}
@ -175,7 +201,9 @@ namespace vk
if (g_depth_resolver) g_depth_resolver->free_resources();
if (g_depth_unresolver) g_depth_unresolver->free_resources();
if (g_depthstencil_resolverAMD) g_depthstencil_resolverAMD->free_resources();
if (g_depthstencil_unresolverAMD) g_depthstencil_unresolverAMD->free_resources();
if (g_stencil_resolver) g_stencil_resolver->free_resources();
if (g_stencil_unresolver) g_stencil_unresolver->free_resources();
if (g_depthstencil_resolver) g_depthstencil_resolver->free_resources();
if (g_depthstencil_unresolver) g_depthstencil_unresolver->free_resources();
}
}

View File

@ -165,6 +165,7 @@ namespace vk
{
u8 samples_x = 1;
u8 samples_y = 1;
s32 static_parameters[4];
depth_resolve_base()
{
@ -172,7 +173,7 @@ namespace vk
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
}
void build(const std::string& kernel, const std::string& extensions, bool stencil_texturing, bool input_is_multisampled)
void build(const std::string& kernel, const std::string& extensions, const std::vector<const char*>& inputs)
{
vs_src =
"#version 450\n"
@ -187,17 +188,14 @@ namespace vk
fs_src =
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n";
fs_src += extensions +
fs_src += extensions +
"\n"
"layout(std140, set=0, binding=0) uniform static_data{ ivec4 regs[8]; };\n"
"layout(set=0, binding=1) uniform sampler2D fs0;\n";
"layout(push_constant) uniform static_data{ ivec4 regs[1]; };\n";
if (stencil_texturing)
int binding = 1;
for (const auto& input : inputs)
{
m_num_usable_samplers = 2;
fs_src +=
"layout(set=0, binding=2) uniform usampler2D fs1;\n";
fs_src += "layout(set=0, binding=" + std::to_string(binding++) + ") uniform " + input + ";\n";
}
fs_src +=
@ -208,28 +206,22 @@ namespace vk
fs_src += kernel +
"}\n";
if (input_is_multisampled)
{
auto sampler_loc = fs_src.find("sampler2D fs0");
fs_src.insert(sampler_loc + 9, "MS");
if (stencil_texturing)
{
sampler_loc = fs_src.find("sampler2D fs1");
fs_src.insert(sampler_loc + 9, "MS");
}
}
LOG_ERROR(RSX, "Resolve shader:\n%s", fs_src);
}
void update_uniforms(vk::glsl::program* /*program*/) override
std::vector<VkPushConstantRange> get_push_constants() override
{
m_ubo_offset = (u32)m_ubo.alloc<256>(8);
auto dst = (s32*)m_ubo.map(m_ubo_offset, 128);
dst[0] = samples_x;
dst[1] = samples_y;
m_ubo.unmap();
VkPushConstantRange constant;
constant.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
constant.offset = 0;
constant.size = 16;
return { constant };
}
void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override
{
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8, static_parameters);
}
void update_sample_configuration(vk::image* msaa_image)
@ -248,6 +240,9 @@ namespace vk
default:
fmt::throw_exception("Unsupported sample count %d" HERE, msaa_image->samples());
}
static_parameters[0] = samples_x;
static_parameters[1] = samples_y;
}
};
@ -258,13 +253,12 @@ namespace vk
build(
" ivec2 out_coord = ivec2(gl_FragCoord.xy);\n"
" ivec2 in_coord = (out_coord / regs[0].xy);\n"
" ivec2 sample_loc = out_coord % ivec2(regs[0].xy);\n"
" ivec2 sample_loc = out_coord % regs[0].xy;\n"
" int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n"
" float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n"
" gl_FragDepth = frag_depth;\n",
"",
false,
true);
{ "sampler2DMS fs0" });
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
@ -292,8 +286,7 @@ namespace vk
" float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n"
" gl_FragDepth = frag_depth;\n",
"",
false,
false);
{ "sampler2D fs0" });
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
@ -312,9 +305,142 @@ namespace vk
}
};
struct depthstencil_resolve_AMD : depth_resolve_base
struct stencilonly_resolve : depth_resolve_base
{
depthstencil_resolve_AMD()
VkClearRect region{};
VkClearAttachment clear_info{};
stencilonly_resolve()
{
renderpass_config.enable_stencil_test(
VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace
VK_COMPARE_OP_ALWAYS, // Always pass
0xFF, // Full write-through
0xFF); // Write active bit
renderpass_config.set_stencil_mask(0xFF);
renderpass_config.set_depth_mask(false);
clear_info.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
region.baseArrayLayer = 0;
region.layerCount = 1;
build(
" ivec2 out_coord = ivec2(gl_FragCoord.xy);\n"
" ivec2 in_coord = (out_coord / regs[0].xy);\n"
" ivec2 sample_loc = out_coord % regs[0].xy;\n"
" int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n"
" uint frag_stencil = texelFetch(fs0, in_coord, sample_index).x;\n"
" if ((frag_stencil & uint(regs[0].z)) == 0) discard;\n",
"",
{"usampler2DMS fs0"});
}
void get_dynamic_state_entries(VkDynamicState* state_descriptors, VkPipelineDynamicStateCreateInfo& info) override
{
state_descriptors[info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
}
void emit_geometry(vk::command_buffer& cmd) override
{
vkCmdClearAttachments(cmd, 1, &clear_info, 1, &region);
for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1)
{
vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask);
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask);
overlay_pass::emit_geometry(cmd);
}
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
{
update_sample_configuration(msaa_image);
auto stencil_view = msaa_image->get_view(0xDEADBEEF, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
region.rect.extent.width = resolve_image->width();
region.rect.extent.height = resolve_image->height();
overlay_pass::run(
cmd,
(u16)resolve_image->width(), (u16)resolve_image->height(),
resolve_image, stencil_view,
render_pass);
}
};
struct stencilonly_unresolve : depth_resolve_base
{
VkClearRect region{};
VkClearAttachment clear_info{};
stencilonly_unresolve()
{
renderpass_config.enable_stencil_test(
VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace
VK_COMPARE_OP_ALWAYS, // Always pass
0xFF, // Full write-through
0xFF); // Write active bit
renderpass_config.set_stencil_mask(0xFF);
renderpass_config.set_depth_mask(false);
clear_info.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
region.baseArrayLayer = 0;
region.layerCount = 1;
build(
" ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n"
" pixel_coord *= regs[0].xy;\n"
" pixel_coord.x += (gl_SampleID % regs[0].x);\n"
" pixel_coord.y += (gl_SampleID / regs[0].x);\n"
" uint frag_stencil = texelFetch(fs0, pixel_coord, 0).x;\n"
" if ((frag_stencil & uint(regs[0].z)) == 0) discard;\n",
"",
{ "usampler2D fs0" });
}
void get_dynamic_state_entries(VkDynamicState* state_descriptors, VkPipelineDynamicStateCreateInfo& info) override
{
state_descriptors[info.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
}
void emit_geometry(vk::command_buffer& cmd) override
{
vkCmdClearAttachments(cmd, 1, &clear_info, 1, &region);
for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1)
{
vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask);
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask);
overlay_pass::emit_geometry(cmd);
}
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
{
renderpass_config.set_multisample_state(msaa_image->samples(), 0xFFFF, true, false, false);
renderpass_config.set_multisample_shading_rate(1.f);
update_sample_configuration(msaa_image);
auto stencil_view = resolve_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
region.rect.extent.width = resolve_image->width();
region.rect.extent.height = resolve_image->height();
overlay_pass::run(
cmd,
(u16)msaa_image->width(), (u16)msaa_image->height(),
msaa_image, stencil_view,
render_pass);
}
};
struct depthstencil_resolve_EXT : depth_resolve_base
{
depthstencil_resolve_EXT()
{
renderpass_config.enable_stencil_test(
VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace
@ -322,6 +448,7 @@ namespace vk
0xFF, // Full write-through
0); // Unused
renderpass_config.set_stencil_mask(0xFF);
m_num_usable_samplers = 2;
build(
@ -336,8 +463,7 @@ namespace vk
"#extension GL_ARB_shader_stencil_export : enable\n",
true,
true);
{ "sampler2DMS fs0", "usampler2DMS fs1" });
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
@ -354,9 +480,9 @@ namespace vk
}
};
struct depthstencil_unresolve_AMD : depth_resolve_base
struct depthstencil_unresolve_EXT : depth_resolve_base
{
depthstencil_unresolve_AMD()
depthstencil_unresolve_EXT()
{
renderpass_config.enable_stencil_test(
VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace
@ -364,6 +490,7 @@ namespace vk
0xFF, // Full write-through
0); // Unused
renderpass_config.set_stencil_mask(0xFF);
m_num_usable_samplers = 2;
build(
@ -378,8 +505,7 @@ namespace vk
"#extension GL_ARB_shader_stencil_export : enable\n",
true,
false);
{ "sampler2D fs0", "usampler2D fs1" });
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)