From a229e30b08c0330bad0cf4e5eaf56069cc5ddace Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 10 Oct 2022 18:06:39 +0300 Subject: [PATCH] rsx: Implement RSX-compliant polygon offset --- rpcs3/Emu/RSX/GL/GLDraw.cpp | 26 ++++++++++++++++--- rpcs3/Emu/RSX/VK/VKDraw.cpp | 18 ++++++++++++- rpcs3/Emu/RSX/VK/vkutils/chip_class.h | 2 ++ .../Emu/RSX/VK/vkutils/framebuffer_object.hpp | 12 +++++++++ 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 60bfc1c1e9..49a19acd3b 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -256,9 +256,29 @@ void GLGSRender::update_draw_state() gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE); gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL); - //offset_bias is the constant factor, multiplied by the implementation factor R - //offset_scale is the slope factor, multiplied by the triangle slope factor M - gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias()); + // offset_bias is the constant factor, multiplied by the implementation factor R + // offset_scale is the slope factor, multiplied by the triangle slope factor M + const auto poly_offset_scale = rsx::method_registers.poly_offset_scale(); + auto poly_offset_bias = rsx::method_registers.poly_offset_bias(); + + if (auto ds = m_rtts.m_bound_depth_stencil.second; + ds && ds->get_internal_format() == gl::texture::internal_format::depth24_stencil8) + { + // Check details in VKDraw.cpp about behaviour of RSX vs desktop D24X8 implementations + // TLDR, RSX expects R = 16,777,215 (2^24 - 1) + const auto& caps = gl::get_driver_caps(); + if (caps.vendor_NVIDIA) + { + // R derived to be 8388607 (2^23 - 1) + poly_offset_bias *= 0.5f; + } + else if (caps.vendor_AMD) + { + // R derived to be 4194303 (2^22 - 1) + poly_offset_bias *= 0.25f; + } + } + gl_state.polygon_offset(poly_offset_scale, poly_offset_bias); if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE)) { diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 2d52bf765d..fd59711099 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -5,6 +5,7 @@ #include "VKAsyncScheduler.h" #include "VKGSRender.h" #include "vkutils/buffer_object.h" +#include "vkutils/chip_class.h" namespace vk { @@ -173,7 +174,22 @@ void VKGSRender::update_draw_state() { // offset_bias is the constant factor, multiplied by the implementation factor R // offst_scale is the slope factor, multiplied by the triangle slope factor M - vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale()); + // R is implementation dependent and has to be derived empirically for supported implementations. + // Lucky for us, only NVIDIA currently supports fixed-point 24-bit depth buffers. + + const auto polygon_offset_scale = rsx::method_registers.poly_offset_scale(); + auto polygon_offset_bias = rsx::method_registers.poly_offset_bias(); + + if (m_draw_fbo->depth_format() == VK_FORMAT_D24_UNORM_S8_UINT && is_NVIDIA(vk::get_chip_family())) + { + // Empirically derived to be 0.5 * (2^24 - 1) for fixed type on Pascal. The same seems to apply for other NVIDIA GPUs. + // RSX seems to be using 2^24 - 1 instead making the biases twice as large when using fixed type Z-buffer on NVIDIA. + // Note, that the formula for floating point is complicated, but actually works out for us. + // Since the exponent range for a polygon is around 0, and we have 23 (+1) mantissa bits, R just works out to the same range by chance \o/. + polygon_offset_bias *= 0.5f; + } + + vkCmdSetDepthBias(*m_current_command_buffer, polygon_offset_bias, 0.f, polygon_offset_scale); } else { diff --git a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h index f6229f7d89..8aa6e221be 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h +++ b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h @@ -50,4 +50,6 @@ namespace vk chip_class get_chip_family(); chip_class get_chip_family(u32 vendor_id, u32 device_id); + + static inline bool is_NVIDIA(chip_class chip) { return chip >= chip_class::NV_generic && chip <= chip_class::NV_ampere; } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp b/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp index 3ee09e9f32..ecafa1d827 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp +++ b/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp @@ -63,6 +63,18 @@ namespace vk return attachments[0]->image()->samples(); } + VkFormat format() + { + ensure(!attachments.empty()); + return attachments[0]->image()->format(); + } + + VkFormat depth_format() + { + ensure(!attachments.empty()); + return attachments.back()->image()->format(); + } + bool matches(std::vector fbo_images, u32 width, u32 height) { if (m_width != width || m_height != height)