diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index 455444ae4b..17f5e02383 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -733,7 +733,7 @@ bool FragmentProgramDecompiler::handle_sct_scb(u32 opcode) case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true; case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true; case RSX_FP_OPCODE_LIT: - SetDst("lit_legacy($0)"); + SetDst("_builtin_lit($0)"); properties.has_lit_op = true; return true; case RSX_FP_OPCODE_LIF: SetDst("$Ty(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)", OPFLAGS::skip_type_cast); return true; diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index e017299278..180efc0e11 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -5,24 +5,55 @@ namespace program_common { - static void insert_compare_op(std::ostream& OS) + static void insert_compare_op(std::ostream& OS, bool low_precision) { - OS << - "bool comparison_passes(float a, float b, uint func)\n" - "{\n" - " switch (func)\n" - " {\n" - " default:\n" - " case 0: return false; //never\n" - " case 1: return (a < b); //less\n" - " case 2: return (a == b); //equal\n" - " case 3: return (a <= b); //lequal\n" - " case 4: return (a > b); //greater\n" - " case 5: return (a != b); //nequal\n" - " case 6: return (a >= b); //gequal\n" - " case 7: return true; //always\n" - " }\n" - "}\n\n"; + if (low_precision) + { + OS << + "int compare(float a, float b)\n" + "{\n" + " if (abs(a - b) < 0.000001) return 2;\n" + " return (a > b)? 4 : 1;\n" + "}\n\n" + + "bool comparison_passes(float a, float b, uint func)\n" + "{\n" + " if (func == 0) return false; // never\n" + " if (func == 7) return true; // always\n\n" + + " int op = compare(a, b);\n" + " switch (func)\n" + " {\n" + " case 1: return op == 1; // less\n" + " case 2: return op == 2; // equal\n" + " case 3: return op <= 2; // lequal\n" + " case 4: return op == 4; // greater\n" + " case 5: return op != 2; // nequal\n" + " case 6: return (op == 4 || op == 2); // gequal\n" + " }\n\n" + + " return false; // unreachable\n" + "}\n\n"; + } + else + { + OS << + "bool comparison_passes(float a, float b, uint func)\n" + "{\n" + " switch (func)\n" + " {\n" + " default:\n" + " case 0: return false; //never\n" + " case 1: return (a < b); //less\n" + " case 2: return (a == b); //equal\n" + " case 3: return (a <= b); //lequal\n" + " case 4: return (a > b); //greater\n" + " case 5: return (a != b); //nequal\n" + " case 6: return (a >= b); //gequal\n" + " case 7: return true; //always\n" + " }\n" + "}\n\n"; + } } static void insert_compare_op_vector(std::ostream& OS) @@ -469,13 +500,27 @@ namespace glsl " ocol3 = " << reg3 << ";\n\n"; } - static void insert_glsl_legacy_function(std::ostream& OS, glsl::program_domain domain, bool require_lit_emulation, bool require_depth_conversion = false, bool require_wpos = false, bool require_texture_ops = true, bool emulate_pcf = false) + struct shader_properties + { + glsl::program_domain domain; + // Applicable in vertex stage + bool require_lit_emulation; + + // Only relevant for fragment programs + bool require_wpos; + bool require_depth_conversion; + bool require_texture_ops; + bool emulate_shadow_compare; + bool low_precision_tests; + }; + + static void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props) { OS << "#define _select mix\n"; OS << "#define _saturate(x) clamp(x, 0., 1.)\n"; OS << "#define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f)\n\n"; - if (require_lit_emulation) + if (props.require_lit_emulation) { OS << "vec4 lit_legacy(vec4 val)" @@ -492,7 +537,7 @@ namespace glsl "}\n\n"; } - if (domain == glsl::program_domain::glsl_vertex_program) + if (props.domain == glsl::program_domain::glsl_vertex_program) { OS << "vec4 apply_zclip_xform(vec4 pos, float near_plane, float far_plane)\n" @@ -512,9 +557,9 @@ namespace glsl return; } - program_common::insert_compare_op(OS); + program_common::insert_compare_op(OS, props.low_precision_tests); - if (require_texture_ops && emulate_pcf) + if (props.require_texture_ops && props.emulate_shadow_compare) { program_common::insert_compare_op_vector(OS); } @@ -550,7 +595,7 @@ namespace glsl " return pow((cs + 0.055) / 1.055, 2.4);\n" "}\n\n"; - if (require_depth_conversion) + if (props.require_depth_conversion) { //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) //The A component (Z) is useless (should contain stencil8 or just 1) @@ -594,9 +639,9 @@ namespace glsl "}\n\n"; } - if (require_texture_ops) + if (props.require_texture_ops) { - if (emulate_pcf) + if (props.emulate_shadow_compare) { OS << "vec4 shadowCompare(sampler2D tex, vec3 p, uint func)\n" @@ -671,7 +716,7 @@ namespace glsl "#define TEX2D_DEPTH_RGBA8(index, coord2) process_texel(texture2DReconstruct(TEX_NAME(index), TEX_NAME_STENCIL(index), coord2 * texture_parameters[index].xy, texture_parameters[index].z), floatBitsToUint(texture_parameters[index].w))\n"; - if (emulate_pcf) + if (props.emulate_shadow_compare) { OS << "#define TEX2D_SHADOW(index, coord3) shadowCompare(TEX_NAME(index), coord3 * vec3(texture_parameters[index].xy, 1.), floatBitsToUint(texture_parameters[index].w) >> 8)\n" @@ -692,7 +737,7 @@ namespace glsl "#define TEX3D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), coord4), floatBitsToUint(texture_parameters[index].w))\n\n"; } - if (require_wpos) + if (props.require_wpos) { OS << "vec4 get_wpos()\n" diff --git a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp index 719f932e11..7ca92801c7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp @@ -132,7 +132,7 @@ void insert_d3d12_legacy_function(std::ostream& OS, bool is_fragment_program) if (!is_fragment_program) return; - program_common::insert_compare_op(OS); + program_common::insert_compare_op(OS, false); OS << "uint packSnorm2x16(float2 val)"; OS << "{\n"; diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 6736753ad3..9d7a473c0f 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -196,8 +196,16 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, - m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare); + glsl::shader_properties properties2; + properties2.domain = glsl::glsl_fragment_program; + properties2.require_lit_emulation = properties.has_lit_op; + properties2.require_depth_conversion = m_prog.redirected_textures != 0; + properties2.require_wpos = properties.has_wpos_input; + properties2.require_texture_ops = properties.has_tex_op; + properties2.emulate_shadow_compare = device_props.emulate_depth_compare; + properties2.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA; + + glsl::insert_glsl_legacy_function(OS, properties2); } void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 738d1e9511..c4a9c0a353 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -154,8 +154,20 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) { - insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op); - glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, gl::get_driver_caps().vendor_INTEL==false); + const auto& dev_caps = gl::get_driver_caps(); + + glsl::shader_properties properties2; + properties2.domain = glsl::glsl_vertex_program; + properties2.require_lit_emulation = properties.has_lit_op; + // Unused + properties2.require_depth_conversion = false; + properties2.require_wpos = false; + properties2.require_texture_ops = false; + properties2.emulate_shadow_compare = false; + properties2.low_precision_tests = false; + + insert_glsl_legacy_function(OS, properties2); + glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_opengl4, dev_caps.vendor_INTEL == false); std::string parameters = ""; for (int i = 0; i < 16; ++i) @@ -306,7 +318,7 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS) //SEE Naruto: UNS //NOTE: On GPUs, poor fp32 precision means dividing z by w, then multiplying by w again gives slightly incorrect results - //This equation is simplified algebraically to an addition and subreaction which gives more accurate results (Fixes flickering skybox in Dark Souls 2) + //This equation is simplified algebraically to an addition and subtraction which gives more accurate results (Fixes flickering skybox in Dark Souls 2) //OS << " float ndc_z = gl_Position.z / gl_Position.w;\n"; //OS << " ndc_z = (ndc_z * 2.) - 1.;\n"; //OS << " gl_Position.z = ndc_z * gl_Position.w;\n"; diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 076030cfbc..9e0b9611a3 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -228,8 +228,16 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_fragment_program, properties.has_lit_op, - m_prog.redirected_textures != 0, properties.has_wpos_input, properties.has_tex_op, device_props.emulate_depth_compare); + glsl::shader_properties properties2; + properties2.domain = glsl::glsl_fragment_program; + properties2.require_lit_emulation = properties.has_lit_op; + properties2.require_depth_conversion = m_prog.redirected_textures != 0; + properties2.require_wpos = properties.has_wpos_input; + properties2.require_texture_ops = properties.has_tex_op; + properties2.emulate_shadow_compare = device_props.emulate_depth_compare; + properties2.low_precision_tests = vk::get_current_renderer()->gpu().get_driver_vendor() == vk::driver_vendor::NVIDIA; + + glsl::insert_glsl_legacy_function(OS, properties2); } void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index e49bde0b91..197e970575 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -194,7 +194,17 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) { - glsl::insert_glsl_legacy_function(OS, glsl::glsl_vertex_program, properties.has_lit_op); + glsl::shader_properties properties2; + properties2.domain = glsl::glsl_vertex_program; + properties2.require_lit_emulation = properties.has_lit_op; + // Unused + properties2.require_depth_conversion = false; + properties2.require_wpos = false; + properties2.require_texture_ops = false; + properties2.emulate_shadow_compare = false; + properties2.low_precision_tests = false; + + glsl::insert_glsl_legacy_function(OS, properties2); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_rpirv); std::string parameters = "";