From a229e30b08c0330bad0cf4e5eaf56069cc5ddace Mon Sep 17 00:00:00 2001
From: kd-11 <karokidii@gmail.com>
Date: Mon, 10 Oct 2022 18:06:39 +0300
Subject: [PATCH] rsx: Implement RSX-compliant polygon offset

---
 rpcs3/Emu/RSX/GL/GLDraw.cpp                   | 26 ++++++++++++++++---
 rpcs3/Emu/RSX/VK/VKDraw.cpp                   | 18 ++++++++++++-
 rpcs3/Emu/RSX/VK/vkutils/chip_class.h         |  2 ++
 .../Emu/RSX/VK/vkutils/framebuffer_object.hpp | 12 +++++++++
 4 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp
index 60bfc1c1e9..49a19acd3b 100644
--- a/rpcs3/Emu/RSX/GL/GLDraw.cpp
+++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp
@@ -256,9 +256,29 @@ void GLGSRender::update_draw_state()
 		gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
 		gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);
 
-		//offset_bias is the constant factor, multiplied by the implementation factor R
-		//offset_scale is the slope factor, multiplied by the triangle slope factor M
-		gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias());
+		// offset_bias is the constant factor, multiplied by the implementation factor R
+		// offset_scale is the slope factor, multiplied by the triangle slope factor M
+		const auto poly_offset_scale = rsx::method_registers.poly_offset_scale();
+		auto poly_offset_bias = rsx::method_registers.poly_offset_bias();
+
+		if (auto ds = m_rtts.m_bound_depth_stencil.second;
+			ds && ds->get_internal_format() == gl::texture::internal_format::depth24_stencil8)
+		{
+			// Check details in VKDraw.cpp about behaviour of RSX vs desktop D24X8 implementations
+			// TLDR, RSX expects R = 16,777,215 (2^24 - 1)
+			const auto& caps = gl::get_driver_caps();
+			if (caps.vendor_NVIDIA)
+			{
+				// R derived to be 8388607 (2^23 - 1)
+				poly_offset_bias *= 0.5f;
+			}
+			else if (caps.vendor_AMD)
+			{
+				// R derived to be 4194303 (2^22 - 1)
+				poly_offset_bias *= 0.25f;
+			}
+		}
+		gl_state.polygon_offset(poly_offset_scale, poly_offset_bias);
 
 		if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
 		{
diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp
index 2d52bf765d..fd59711099 100644
--- a/rpcs3/Emu/RSX/VK/VKDraw.cpp
+++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp
@@ -5,6 +5,7 @@
 #include "VKAsyncScheduler.h"
 #include "VKGSRender.h"
 #include "vkutils/buffer_object.h"
+#include "vkutils/chip_class.h"
 
 namespace vk
 {
@@ -173,7 +174,22 @@ void VKGSRender::update_draw_state()
 	{
 		// offset_bias is the constant factor, multiplied by the implementation factor R
 		// offst_scale is the slope factor, multiplied by the triangle slope factor M
-		vkCmdSetDepthBias(*m_current_command_buffer, rsx::method_registers.poly_offset_bias(), 0.f, rsx::method_registers.poly_offset_scale());
+		// R is implementation dependent and has to be derived empirically for supported implementations.
+		// Lucky for us, only NVIDIA currently supports fixed-point 24-bit depth buffers.
+
+		const auto polygon_offset_scale = rsx::method_registers.poly_offset_scale();
+		auto polygon_offset_bias = rsx::method_registers.poly_offset_bias();
+
+		if (m_draw_fbo->depth_format() == VK_FORMAT_D24_UNORM_S8_UINT && is_NVIDIA(vk::get_chip_family()))
+		{
+			// Empirically derived to be 0.5 * (2^24 - 1) for fixed type on Pascal. The same seems to apply for other NVIDIA GPUs.
+			// RSX seems to be using 2^24 - 1 instead making the biases twice as large when using fixed type Z-buffer on NVIDIA.
+			// Note, that the formula for floating point is complicated, but actually works out for us.
+			// Since the exponent range for a polygon is around 0, and we have 23 (+1) mantissa bits, R just works out to the same range by chance \o/.
+			polygon_offset_bias *= 0.5f;
+		}
+
+		vkCmdSetDepthBias(*m_current_command_buffer, polygon_offset_bias, 0.f, polygon_offset_scale);
 	}
 	else
 	{
diff --git a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
index f6229f7d89..8aa6e221be 100644
--- a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
+++ b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h
@@ -50,4 +50,6 @@ namespace vk
 
 	chip_class get_chip_family();
 	chip_class get_chip_family(u32 vendor_id, u32 device_id);
+
+	static inline bool is_NVIDIA(chip_class chip) { return chip >= chip_class::NV_generic && chip <= chip_class::NV_ampere; }
 }
diff --git a/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp b/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp
index 3ee09e9f32..ecafa1d827 100644
--- a/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp
+++ b/rpcs3/Emu/RSX/VK/vkutils/framebuffer_object.hpp
@@ -63,6 +63,18 @@ namespace vk
 			return attachments[0]->image()->samples();
 		}
 
+		VkFormat format()
+		{
+			ensure(!attachments.empty());
+			return attachments[0]->image()->format();
+		}
+
+		VkFormat depth_format()
+		{
+			ensure(!attachments.empty());
+			return attachments.back()->image()->format();
+		}
+
 		bool matches(std::vector<vk::image*> fbo_images, u32 width, u32 height)
 		{
 			if (m_width != width || m_height != height)