mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 18:53:28 +01:00
rsx: Compensate for nvidia's low precision attribute interpolation
- The hw generates inaccurate values when doing perspective-correct interpolation of vertex output attributes and makes the comparison (a == b) fail even when they are a fixed constant value. - Increase equality tolerance when doing comparisons in fragment shaders for NV cards only to work around this issue. - Teepo fix
This commit is contained in:
parent
463b1b220d
commit
60f3059d22
@ -733,7 +733,7 @@ bool FragmentProgramDecompiler::handle_sct_scb(u32 opcode)
|
||||
case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true;
|
||||
case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true;
|
||||
case RSX_FP_OPCODE_LIT:
|
||||
SetDst("lit_legacy($0)");
|
||||
SetDst("_builtin_lit($0)");
|
||||
properties.has_lit_op = true;
|
||||
return true;
|
||||
case RSX_FP_OPCODE_LIF: SetDst("$Ty(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)", OPFLAGS::skip_type_cast); return true;
|
||||
|
@ -5,24 +5,55 @@
|
||||
|
||||
namespace program_common
|
||||
{
|
||||
static void insert_compare_op(std::ostream& OS)
|
||||
static void insert_compare_op(std::ostream& OS, bool low_precision)
|
||||
{
|
||||
OS <<
|
||||
"bool comparison_passes(float a, float b, uint func)\n"
|
||||
"{\n"
|
||||
" switch (func)\n"
|
||||
" {\n"
|
||||
" default:\n"
|
||||
" case 0: return false; //never\n"
|
||||
" case 1: return (a < b); //less\n"
|
||||
" case 2: return (a == b); //equal\n"
|
||||
" case 3: return (a <= b); //lequal\n"
|
||||
" case 4: return (a > b); //greater\n"
|
||||
" case 5: return (a != b); //nequal\n"
|
||||
" case 6: return (a >= b); //gequal\n"
|
||||
" case 7: return true; //always\n"
|
||||
" }\n"
|
||||
"}\n\n";
|
||||
if (low_precision)
|
||||
{
|
||||
OS <<
|
||||
"int compare(float a, float b)\n"
|
||||
"{\n"
|
||||
" if (abs(a - b) < 0.000001) return 2;\n"
|
||||
" return (a > b)? 4 : 1;\n"
|
||||
"}\n\n"
|
||||
|
||||
"bool comparison_passes(float a, float b, uint func)\n"
|
||||
"{\n"
|
||||
" if (func == 0) return false; // never\n"
|
||||
" if (func == 7) return true; // always\n\n"
|
||||
|
||||
" int op = compare(a, b);\n"
|
||||
" switch (func)\n"
|
||||
" {\n"
|
||||
" case 1: return op == 1; // less\n"
|
||||
" case 2: return op == 2; // equal\n"
|
||||
" case 3: return op <= 2; // lequal\n"
|
||||
" case 4: return op == 4; // greater\n"
|
||||
" case 5: return op != 2; // nequal\n"
|
||||
" case 6: return (op == 4 || op == 2); // gequal\n"
|
||||
" }\n\n"
|
||||
|
||||
" return false; // unreachable\n"
|
||||
"}\n\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
OS <<
|
||||
"bool comparison_passes(float a, float b, uint func)\n"
|
||||
"{\n"
|
||||
" switch (func)\n"
|
||||
" {\n"
|
||||
" default:\n"
|
||||
" case 0: return false; //never\n"
|
||||
" case 1: return (a < b); //less\n"
|
||||
" case 2: return (a == b); //equal\n"
|
||||
" case 3: return (a <= b); //lequal\n"
|
||||
" case 4: return (a > b); //greater\n"
|
||||
" case 5: return (a != b); //nequal\n"
|
||||
" case 6: return (a >= b); //gequal\n"
|
||||
" case 7: return true; //always\n"
|
||||
" }\n"
|
||||
"}\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
static void insert_compare_op_vector(std::ostream& OS)
|
||||
@ -469,13 +500,27 @@ namespace glsl
|
||||
" ocol3 = " << reg3 << ";\n\n";
|
||||
}
|
||||
|
||||
static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true, bool emulate_pcf = false)
|
||||
struct shader_properties
|
||||
{
|
||||
glsl::program_domain domain;
|
||||
// Applicable in vertex stage
|
||||
bool require_lit_emulation;
|
||||
|
||||
// Only relevant for fragment programs
|
||||
bool require_wpos;
|
||||
bool require_depth_conversion;
|
||||
bool require_texture_ops;
|
||||
bool emulate_shadow_compare;
|
||||
bool low_precision_tests;
|
||||
};
|
||||
|
||||
static void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
|
||||
{
|
||||
OS << "#define _select mix\n";
|
||||
OS << "#define _saturate(x) clamp(x, 0., 1.)\n";
|
||||
OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n";
|
||||
|
||||
if (require_lit_emulation)
|
||||
if (props.require_lit_emulation)
|
||||
{
|
||||
OS <<
|
||||
"vec4 lit_legacy(vec4 val)"
|
||||
@ -492,7 +537,7 @@ namespace glsl
|
||||
"}\n\n";
|
||||
}
|
||||
|
||||
if (domain == glsl::program_domain::glsl_vertex_program)
|
||||
if (props.domain == glsl::program_domain::glsl_vertex_program)
|
||||
{
|
||||
OS <<
|
||||
"vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n"
|
||||
@ -512,9 +557,9 @@ namespace glsl
|
||||
return;
|
||||
}
|
||||
|
||||
program_common::insert_compare_op(OS);
|
||||
program_common::insert_compare_op(OS, props.low_precision_tests);
|
||||
|
||||
if (require_texture_ops && emulate_pcf)
|
||||
if (props.require_texture_ops && props.emulate_shadow_compare)
|
||||
{
|
||||
program_common::insert_compare_op_vector(OS);
|
||||
}
|
||||
@ -550,7 +595,7 @@ namespace glsl
|
||||
" return pow((cs + 0.055) / 1.055, 2.4);\n"
|
||||
"}\n\n";
|
||||
|
||||
if (require_depth_conversion)
|
||||
if (props.require_depth_conversion)
|
||||
{
|
||||
//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
|
||||
//The A component (Z) is useless (should contain stencil8 or just 1)
|
||||
@ -594,9 +639,9 @@ namespace glsl
|
||||
"}\n\n";
|
||||
}
|
||||
|
||||
if (require_texture_ops)
|
||||
if (props.require_texture_ops)
|
||||
{
|
||||
if (emulate_pcf)
|
||||
if (props.emulate_shadow_compare)
|
||||
{
|
||||
OS <<
|
||||
"vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n"
|
||||
@ -671,7 +716,7 @@ namespace glsl
|
||||
|
||||
"#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n";
|
||||
|
||||
if (emulate_pcf)
|
||||
if (props.emulate_shadow_compare)
|
||||
{
|
||||
OS <<
|
||||
"#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n"
|
||||
@ -692,7 +737,7 @@ namespace glsl
|
||||
"#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), floatBitsToUint(texture_parameters[index].w))\n\n";
|
||||
}
|
||||
|
||||
if (require_wpos)
|
||||
if (props.require_wpos)
|
||||
{
|
||||
OS <<
|
||||
"vec4 get_wpos()\n"
|
||||
|
@ -132,7 +132,7 @@ void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program)
|
||||
if (!is_fragment_program)
|
||||
return;
|
||||
|
||||
program_common::insert_compare_op(OS);
|
||||
program_common::insert_compare_op(OS, false);
|
||||
|
||||
OS << "uint packSnorm2x16(float2 val)";
|
||||
OS << "{\n";
|
||||
|
@ -196,8 +196,16 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
||||
|
||||
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||
{
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
|
||||
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
|
||||
glsl::shader_properties properties2;
|
||||
properties2.domain = glsl::glsl_fragment_program;
|
||||
properties2.require_lit_emulation = properties.has_lit_op;
|
||||
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||
properties2.require_wpos = properties.has_wpos_input;
|
||||
properties2.require_texture_ops = properties.has_tex_op;
|
||||
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||
properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA;
|
||||
|
||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
||||
}
|
||||
|
||||
void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
|
@ -154,8 +154,20 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
|
||||
|
||||
void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
{
|
||||
insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false);
|
||||
const auto& dev_caps = gl::get_driver_caps();
|
||||
|
||||
glsl::shader_properties properties2;
|
||||
properties2.domain = glsl::glsl_vertex_program;
|
||||
properties2.require_lit_emulation = properties.has_lit_op;
|
||||
// Unused
|
||||
properties2.require_depth_conversion = false;
|
||||
properties2.require_wpos = false;
|
||||
properties2.require_texture_ops = false;
|
||||
properties2.emulate_shadow_compare = false;
|
||||
properties2.low_precision_tests = false;
|
||||
|
||||
insert_glsl_legacy_function(OS, properties2);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false);
|
||||
|
||||
std::string parameters = "";
|
||||
for (int i = 0; i < 16; ++i)
|
||||
@ -306,7 +318,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
|
||||
//SEE Naruto: UNS
|
||||
|
||||
//NOTE: On GPUs, poor fp32 precision means dividing z by w, then multiplying by w again gives slightly incorrect results
|
||||
//This equation is simplified algebraically to an addition and subreaction which gives more accurate results (Fixes flickering skybox in Dark Souls 2)
|
||||
//This equation is simplified algebraically to an addition and subtraction which gives more accurate results (Fixes flickering skybox in Dark Souls 2)
|
||||
//OS << " float ndc_z = gl_Position.z / gl_Position.w;\n";
|
||||
//OS << " ndc_z = (ndc_z * 2.) - 1.;\n";
|
||||
//OS << " gl_Position.z = ndc_z * gl_Position.w;\n";
|
||||
|
@ -228,8 +228,16 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
|
||||
|
||||
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
|
||||
{
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op,
|
||||
m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare);
|
||||
glsl::shader_properties properties2;
|
||||
properties2.domain = glsl::glsl_fragment_program;
|
||||
properties2.require_lit_emulation = properties.has_lit_op;
|
||||
properties2.require_depth_conversion = m_prog.redirected_textures != 0;
|
||||
properties2.require_wpos = properties.has_wpos_input;
|
||||
properties2.require_texture_ops = properties.has_tex_op;
|
||||
properties2.emulate_shadow_compare = device_props.emulate_depth_compare;
|
||||
properties2.low_precision_tests = vk::get_current_renderer()->gpu().get_driver_vendor() == vk::driver_vendor::NVIDIA;
|
||||
|
||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
||||
}
|
||||
|
||||
void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
|
@ -194,7 +194,17 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
|
||||
|
||||
void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
|
||||
{
|
||||
glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op);
|
||||
glsl::shader_properties properties2;
|
||||
properties2.domain = glsl::glsl_vertex_program;
|
||||
properties2.require_lit_emulation = properties.has_lit_op;
|
||||
// Unused
|
||||
properties2.require_depth_conversion = false;
|
||||
properties2.require_wpos = false;
|
||||
properties2.require_texture_ops = false;
|
||||
properties2.emulate_shadow_compare = false;
|
||||
properties2.low_precision_tests = false;
|
||||
|
||||
glsl::insert_glsl_legacy_function(OS, properties2);
|
||||
glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv);
|
||||
|
||||
std::string parameters = "";
|
||||
|
Loading…
Reference in New Issue
Block a user