fp32 precision on GPUs is embarassing

- Division seems to suffer from drift easily on GPUs due to limited precision
2024-11-22 18:53:28 +01:00 · 2017-11-18 13:02:10 +03:00 · 2017-11-18 13:02:10 +03:00 · 5cf2d774f3
commit 5cf2d774f3
parent ccc0383f75
1 changed files with 6 additions and 4 deletions
--- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
+++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
@ -306,13 +306,15 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
 	//It is therefore critical that this step is done post-transform and the result re-scaled by w
 	//SEE Naruto: UNS
 	
-	OS << "	float ndc_z = gl_Position.z / gl_Position.w;\n";
-	OS << "	ndc_z = (ndc_z * 2.) - 1.;\n";
-	OS << "	gl_Position.z = ndc_z * gl_Position.w;\n";
+	//NOTE: On GPUs, poor fp32 precision means dividing z by w, then multiplying by w again gives slightly incorrect results
+	//This equation is simplified algebraically to an addition and subreaction which gives more accurate results (Fixes flickering skybox in Dark Souls 2)
+	//OS << "	float ndc_z = gl_Position.z / gl_Position.w;\n";
+	//OS << "	ndc_z = (ndc_z * 2.) - 1.;\n";
+	//OS << "	gl_Position.z = ndc_z * gl_Position.w;\n";
+	OS << "	gl_Position.z = (gl_Position.z + gl_Position.z) - gl_Position.w;\n";
 	OS << "}\n";
 }

-
 void GLVertexDecompilerThread::Task()
 {
 	m_shader = Decompile();