1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-23 03:02:53 +01:00

rsx: Compensate for nvidia's low precision attribute interpolation

- The hw generates inaccurate values when doing perspective-correct
  interpolation of vertex output attributes and makes the comparison (a ==
  b) fail even when they are a fixed constant value.
- Increase equality tolerance when doing comparisons in fragment
  shaders for NV cards only to work around this issue.
- Teepo fix
This commit is contained in:
kd-11 2019-04-16 13:50:39 +03:00 committed by kd-11
parent 463b1b220d
commit 60f3059d22
7 changed files with 120 additions and 37 deletions

View File

@ -733,7 +733,7 @@ bool FragmentProgramDecompiler::handle_sct_scb(u32 opcode)
case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true;
case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true;
case RSX_FP_OPCODE_LIT:
SetDst("lit_legacy($0)");
SetDst("_builtin_lit($0)");
properties.has_lit_op = true;
return true;
case RSX_FP_OPCODE_LIF: SetDst("$Ty(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)", OPFLAGS::skip_type_cast); return true;

View File

@ -5,24 +5,55 @@
namespace program_common
{
static void insert_compare_op(std::ostream& OS)
static void insert_compare_op(std::ostream& OS, bool low_precision)
{
OS <<
"bool comparison_passes(float a, float b, uint func)\n"
"{\n"
" switch (func)\n"
" {\n"
" default:\n"
" case 0: return false; //never\n"
" case 1: return (a < b); //less\n"
" case 2: return (a == b); //equal\n"
" case 3: return (a <= b); //lequal\n"
" case 4: return (a > b); //greater\n"
" case 5: return (a != b); //nequal\n"
" case 6: return (a >= b); //gequal\n"
" case 7: return true; //always\n"
" }\n"
"}\n\n";
if (low_precision)
{
OS <<
"int compare(float a, float b)\n"
"{\n"
" if (abs(a - b) < 0.000001) return 2;\n"
" return (a > b)? 4 : 1;\n"
"}\n\n"
"bool comparison_passes(float a, float b, uint func)\n"
"{\n"
" if (func == 0) return false; // never\n"
" if (func == 7) return true; // always\n\n"
" int op = compare(a, b);\n"
" switch (func)\n"
" {\n"
" case 1: return op == 1; // less\n"
" case 2: return op == 2; // equal\n"
" case 3: return op <= 2; // lequal\n"
" case 4: return op == 4; // greater\n"
" case 5: return op != 2; // nequal\n"
" case 6: return (op == 4 || op == 2); // gequal\n"
" }\n\n"
" return false; // unreachable\n"
"}\n\n";
}
else
{
OS <<
"bool comparison_passes(float a, float b, uint func)\n"
"{\n"
" switch (func)\n"
" {\n"
" default:\n"
" case 0: return false; //never\n"
" case 1: return (a < b); //less\n"
" case 2: return (a == b); //equal\n"
" case 3: return (a <= b); //lequal\n"
" case 4: return (a > b); //greater\n"
" case 5: return (a != b); //nequal\n"
" case 6: return (a >= b); //gequal\n"
" case 7: return true; //always\n"
" }\n"
"}\n\n";
}
}
static void insert_compare_op_vector(std::ostream& OS)
@ -469,13 +500,27 @@ namespace glsl
" ocol3 = " << reg3 << ";\n\n";
}
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true, bool emulate_pcf = false)
struct shader_properties
{
glsl::program_domain domain;
// Applicable in vertex stage
bool require_lit_emulation;
// Only relevant for fragment programs
bool require_wpos;
bool require_depth_conversion;
bool require_texture_ops;
bool emulate_shadow_compare;
bool low_precision_tests;
};
static void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
{
OS << "#define _select mix\n";
OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
if (require_lit_emulation)
if (props.require_lit_emulation)
{
OS <<
"vec4 lit_legacy(vec4 val)"
@ -492,7 +537,7 @@ namespace glsl
"}\n\n";
}
if (domain == glsl::program_domain::glsl_vertex_program)
if (props.domain == glsl::program_domain::glsl_vertex_program)
{
OS <<
"vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n"
@ -512,9 +557,9 @@ namespace glsl
return;
}
program_common::insert_compare_op(OS);
program_common::insert_compare_op(OS, props.low_precision_tests);
if (require_texture_ops && emulate_pcf)
if (props.require_texture_ops && props.emulate_shadow_compare)
{
program_common::insert_compare_op_vector(OS);
}
@ -550,7 +595,7 @@ namespace glsl
" return pow((cs + 0.055) / 1.055, 2.4);\n"
"}\n\n";
if (require_depth_conversion)
if (props.require_depth_conversion)
{
//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
//The A component (Z) is useless (should contain stencil8 or just 1)
@ -594,9 +639,9 @@ namespace glsl
"}\n\n";
}
if (require_texture_ops)
if (props.require_texture_ops)
{
if (emulate_pcf)
if (props.emulate_shadow_compare)
{
OS <<
"vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n"
@ -671,7 +716,7 @@ namespace glsl
"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n";
if (emulate_pcf)
if (props.emulate_shadow_compare)
{
OS <<
"#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n"
@ -692,7 +737,7 @@ namespace glsl
"#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), floatBitsToUint(texture_parameters[index].w))\n\n";
}
if (require_wpos)
if (props.require_wpos)
{
OS <<
"vec4 get_wpos()\n"

View File

@ -132,7 +132,7 @@ void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program)
if (!is_fragment_program)
return;
program_common::insert_compare_op(OS);
program_common::insert_compare_op(OS, false);
OS << "uint packSnorm2x16(float2 val)";
OS << "{\n";

View File

@ -196,8 +196,16 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
glsl::shader_properties properties2;
properties2.domain = glsl::glsl_fragment_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_wpos = properties.has_wpos_input;
properties2.require_texture_ops = properties.has_tex_op;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
glsl::insert_glsl_legacy_function(OS, properties2);
}
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)

View File

@ -154,8 +154,20 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false);
const auto& dev_caps = gl::get_driver_caps();
glsl::shader_properties properties2;
properties2.domain = glsl::glsl_vertex_program;
properties2.require_lit_emulation = properties.has_lit_op;
// Unused
properties2.require_depth_conversion = false;
properties2.require_wpos = false;
properties2.require_texture_ops = false;
properties2.emulate_shadow_compare = false;
properties2.low_precision_tests = false;
insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false);
std::string parameters = "";
for (int i = 0; i < 16; ++i)
@ -306,7 +318,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
//SEE Naruto: UNS
//NOTE: On GPUs, poor fp32 precision means dividing z by w, then multiplying by w again gives slightly incorrect results
//This equation is simplified algebraically to an addition and subreaction which gives more accurate results (Fixes flickering skybox in Dark Souls 2)
//This equation is simplified algebraically to an addition and subtraction which gives more accurate results (Fixes flickering skybox in Dark Souls 2)
//OS << " float ndc_z = gl_Position.z / gl_Position.w;\n";
//OS << " ndc_z = (ndc_z * 2.) - 1.;\n";
//OS << " gl_Position.z = ndc_z * gl_Position.w;\n";

View File

@ -228,8 +228,16 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
{
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
glsl::shader_properties properties2;
properties2.domain = glsl::glsl_fragment_program;
properties2.require_lit_emulation = properties.has_lit_op;
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
properties2.require_wpos = properties.has_wpos_input;
properties2.require_texture_ops = properties.has_tex_op;
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
properties2.low_precision_tests = vk::get_current_renderer()->gpu().get_driver_vendor() == vk::driver_vendor::NVIDIA;
glsl::insert_glsl_legacy_function(OS, properties2);
}
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)

View File

@ -194,7 +194,17 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
{
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
glsl::shader_properties properties2;
properties2.domain = glsl::glsl_vertex_program;
properties2.require_lit_emulation = properties.has_lit_op;
// Unused
properties2.require_depth_conversion = false;
properties2.require_wpos = false;
properties2.require_texture_ops = false;
properties2.emulate_shadow_compare = false;
properties2.low_precision_tests = false;
glsl::insert_glsl_legacy_function(OS, properties2);
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);
std::string parameters = "";