rsx: Implement RSX-compliant polygon offset

2024-11-25 04:02:42 +01:00 · 2022-10-10 18:06:39 +03:00 · 2022-10-10 18:06:39 +03:00 · a229e30b08
commit a229e30b08
parent d246a37b11
4 changed files with 54 additions and 4 deletions
--- a/rpcs3/Emu/RSX/GL/GLDraw.cpp
+++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp
@ -256,9 +256,29 @@ void GLGSRender::update_draw_state()
 		gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
 		gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);

-		//offset_bias is the constant factor, multiplied by the implementation factor R
-		//offset_scale is the slope factor, multiplied by the triangle slope factor M
-		gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias());
+		// offset_bias is the constant factor, multiplied by the implementation factor R
+		// offset_scale is the slope factor, multiplied by the triangle slope factor M
+		const auto poly_offset_scale = rsx::method_registers.poly_offset_scale();
+		auto poly_offset_bias = rsx::method_registers.poly_offset_bias();
+
+		if (auto ds = m_rtts.m_bound_depth_stencil.second;
+			ds && ds->get_internal_format() == gl::texture::internal_format::depth24_stencil8)
+		{
+			// Check details in VKDraw.cpp about behaviour of RSX vs desktop D24X8 implementations
+			// TLDR, RSX expects R = 16,777,215 (2^24 - 1)
+			const auto& caps = gl::get_driver_caps();
+			if (caps.vendor_NVIDIA)
+			{
+				// R derived to be 8388607 (2^23 - 1)
+				poly_offset_bias *= 0.5f;
+			}
+			else if (caps.vendor_AMD)
+			{
+				// R derived to be 4194303 (2^22 - 1)
+				poly_offset_bias *= 0.25f;
+			}
+		}
+		gl_state.polygon_offset(poly_offset_scale, poly_offset_bias);

 		if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
 		{
--- a/rpcs3/Emu/RSX/VK/VKDraw.cpp
+++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp
@ -5,6 +5,7 @@
 #include "VKAsyncScheduler.h"
 #include "VKGSRender.h"
 #include "vkutils/buffer_object.h"
+#include "vkutils/chip_class.h"

 namespace vk
 {
@ -173,7 +174,22 @@ void VKGSRender::update_draw_state()
 	{
 		// offset_bias is the constant factor, multiplied by the implementation factor R
 		// offst_scale is the slope factor, multiplied by the triangle slope factor M
-		vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale());
+		// R is implementation dependent and has to be derived empirically for supported implementations.
+		// Lucky for us, only NVIDIA currently supports fixed-point 24-bit depth buffers.
+
+		const auto polygon_offset_scale = rsx::method_registers.poly_offset_scale();
+		auto polygon_offset_bias = rsx::method_registers.poly_offset_bias();
+
+		if (m_draw_fbo->depth_format() == VK_FORMAT_D24_UNORM_S8_UINT && is_NVIDIA(vk::get_chip_family()))
+		{
+			// Empirically derived to be 0.5 * (2^24 - 1) for fixed type on Pascal. The same seems to apply for other NVIDIA GPUs.
+			// RSX seems to be using 2^24 - 1 instead making the biases twice as large when using fixed type Z-buffer on NVIDIA.
+			// Note, that the formula for floating point is complicated, but actually works out for us.
+			// Since the exponent range for a polygon is around 0, and we have 23 (+1) mantissa bits, R just works out to the same range by chance \o/.
+			polygon_offset_bias *= 0.5f;
+		}
+
+		vkCmdSetDepthBias(*m_current_command_buffer, polygon_offset_bias, 0.f, polygon_offset_scale);
 	}
 	else
 	{
--- a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
+++ b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
@ -50,4 +50,6 @@ namespace vk

 	chip_class get_chip_family();
 	chip_class get_chip_family(u32 vendor_id, u32 device_id);
+
+	static inline bool is_NVIDIA(chip_class chip) { return chip >= chip_class::NV_generic && chip <= chip_class::NV_ampere; }
 }
--- a/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp
+++ b/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp
@ -63,6 +63,18 @@ namespace vk
 			return attachments[0]->image()->samples();
 		}

+		VkFormat format()
+		{
+			ensure(!attachments.empty());
+			return attachments[0]->image()->format();
+		}
+
+		VkFormat depth_format()
+		{
+			ensure(!attachments.empty());
+			return attachments.back()->image()->format();
+		}
+
 		bool matches(std::vector<vk::image*> fbo_images, u32 width, u32 height)
 		{
 			if (m_width != width || m_height != height)