diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp index f073f6e1e3..edf78acd31 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp @@ -216,7 +216,8 @@ std::string VertexProgramDecompiler::Format(const std::string& code) return "if(" + cond + ") "; } }, - { "$cond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetCond), this) } + { "$cond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetCond), this) }, + { "$ifbcond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetOptionalBranchCond), this) } }; return fmt::replace_all(code, repl_list); @@ -257,6 +258,14 @@ std::string VertexProgramDecompiler::GetCond() return "any(" + compareFunction(cond_string_table[d0.cond], "cc" + std::to_string(d0.cond_reg_sel_1) + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle) + ")"; } +std::string VertexProgramDecompiler::GetOptionalBranchCond() +{ + std::string cond_operator = d3.brb_cond_true ? " != " : " == "; + std::string cond = "(transform_branch_bits & (1 << " + std::to_string(d3.branch_index) + "))" + cond_operator + "0"; + + return "if (" + cond + ")"; +} + void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::string& src) { enum @@ -476,15 +485,15 @@ std::string VertexProgramDecompiler::Decompile() switch (d1.sca_opcode) { - case 0x08: //BRA - LOG_ERROR(RSX, "BRA found. Please report to RPCS3 team."); + case RSX_SCA_OPCODE_BRA: is_has_BRA = true; m_jump_lvls.clear(); d3.HEX = m_data[++i]; i += 4; break; - case 0x09: //BRI + case RSX_SCA_OPCODE_BRB: + case RSX_SCA_OPCODE_BRI: d2.HEX = m_data[i++]; d3.HEX = m_data[i]; i += 2; @@ -526,6 +535,21 @@ std::string VertexProgramDecompiler::Decompile() m_cur_instr->open_scopes++; } + auto find_jump_lvl = [this](u32 address) + { + u32 jump = 1; + + for (auto pos : m_jump_lvls) + { + if (address == pos) + break; + + ++jump; + } + + return jump; + }; + for (u32 i = 0; i < m_instr_count; ++i) { m_cur_instr = &m_instructions[i]; @@ -605,7 +629,7 @@ std::string VertexProgramDecompiler::Decompile() case RSX_SCA_OPCODE_LIT: SetDSTSca("lit_legacy($s)"); break; case RSX_SCA_OPCODE_BRA: { - AddCode("$if ($cond)"); + AddCode("$if ($cond) //BRA"); AddCode("{"); m_cur_instr->open_scopes++; AddCode("jump_position = $a$am;"); @@ -616,26 +640,9 @@ std::string VertexProgramDecompiler::Decompile() break; case RSX_SCA_OPCODE_BRI: // works differently (BRI o[1].x(TR) L0;) { - u32 jump_position = 1; + u32 jump_position = find_jump_lvl(GetAddr()); - if (is_has_BRA) - { - jump_position = GetAddr(); - } - else - { - u32 addr = GetAddr(); - - for (auto pos : m_jump_lvls) - { - if (addr == pos) - break; - - ++jump_position; - } - } - - AddCode("$ifcond "); + AddCode("$ifcond //BRI"); AddCode("{"); m_cur_instr->open_scopes++; AddCode(fmt::format("jump_position = %u;", jump_position)); @@ -662,11 +669,31 @@ std::string VertexProgramDecompiler::Decompile() case RSX_SCA_OPCODE_COS: SetDSTSca("cos($s)"); break; case RSX_SCA_OPCODE_BRB: // works differently (BRB o[1].x !b0, L0;) - LOG_ERROR(RSX, "Unimplemented sca_opcode BRB"); + { + LOG_WARNING(RSX, "sca_opcode BRB, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX); + AddCode(fmt::format("//BRB opcode, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX)); + + u32 jump_position = find_jump_lvl(GetAddr()); + + AddCode("$ifbcond //BRB"); + AddCode("{"); + m_cur_instr->open_scopes++; + AddCode(fmt::format("jump_position = %u;", jump_position)); + AddCode("continue;"); + m_cur_instr->close_scopes++; + AddCode("}"); + AddCode(""); + break; + } case RSX_SCA_OPCODE_CLB: break; // works same as BRB - LOG_ERROR(RSX, "Unimplemented sca_opcode CLB"); + LOG_WARNING(RSX, "sca_opcode CLB, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX); + AddCode("//CLB"); + + AddCode("$ifbcond $f(); //CLB"); + AddCode(""); + break; case RSX_SCA_OPCODE_PSH: break; // works differently (PSH o[1].x A0;) @@ -703,5 +730,6 @@ std::string VertexProgramDecompiler::Decompile() { m_funcs.erase(m_funcs.begin() + 2, m_funcs.end()); } + return result; } diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h index d0d99c72fb..a40fff855e 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h @@ -69,6 +69,7 @@ struct VertexProgramDecompiler std::string GetFunc(); std::string GetTex(); std::string GetCond(); + std::string GetOptionalBranchCond(); //Conditional branch expression modified externally at runtime std::string AddAddrMask(); std::string AddAddrReg(); std::string AddAddrRegWithoutMask(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 96a275c73b..ca5145fda2 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -82,18 +82,18 @@ namespace void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) { - size_t heap_offset = m_buffer_data.alloc(256); + size_t heap_offset = m_buffer_data.alloc(512); // Scale offset buffer // Separate constant buffer - void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + 256)); + void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + 512)); fill_scale_offset_data(mapped_buffer); fill_fragment_state_buffer((char *)mapped_buffer + 64, m_fragment_program); - m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + 256)); + m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + 512)); D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - 256 + 512 }; m_device->CreateConstantBufferView(&constant_buffer_view_desc, CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) @@ -108,6 +108,7 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); fill_vertex_program_constants_data(mapped_buffer); + *(reinterpret_cast((char *)mapped_buffer + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits(); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 5c29b017bd..e2058af742 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -6,6 +6,7 @@ #include "D3D12GSRender.h" #include "D3D12Formats.h" #include "../rsx_methods.h" +#include "../rsx_utils.h" #define TO_STRING(x) #x @@ -88,21 +89,7 @@ void D3D12GSRender::load_program() D3D12_BLEND d3d_sfactor_alpha = get_blend_factor_alpha(sfactor_a); D3D12_BLEND d3d_dfactor_alpha = get_blend_factor_alpha(dfactor_a); - FLOAT BlendColor[4]; - - //TODO: Check surface color format for u16 colors - { - u8 blend_color_r = rsx::method_registers.blend_color_8b_r(); - u8 blend_color_g = rsx::method_registers.blend_color_8b_g(); - u8 blend_color_b = rsx::method_registers.blend_color_8b_b(); - u8 blend_color_a = rsx::method_registers.blend_color_8b_a(); - - BlendColor[0] = blend_color_r / 255.f; - BlendColor[1] = blend_color_g / 255.f; - BlendColor[2] = blend_color_b / 255.f; - BlendColor[3] = blend_color_a / 255.f; - } - + auto BlendColor = rsx::get_constant_blend_colors(); bool color_blend_possible = true; if (sfactor_rgb == rsx::blend_factor::constant_alpha || @@ -144,7 +131,7 @@ void D3D12GSRender::load_program() } else { - get_current_resource_storage().command_list->OMSetBlendFactor(BlendColor); + get_current_resource_storage().command_list->OMSetBlendFactor(BlendColor.data()); } prop.Blend.RenderTarget[0].BlendEnable = true; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 8a249f0913..f1db01bc6e 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -79,11 +79,8 @@ void D3D12VertexProgramDecompiler::insertConstants(std::stringstream & OS, const { OS << "cbuffer CONSTANT_BUFFER : register(b1)" << std::endl; OS << "{" << std::endl; - for (const ParamType PT : constants) - { - for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << ";" << std::endl; - } + OS << " float4 vc[468];" << std::endl; + OS << " uint transform_branch_bits;" << std::endl; OS << "};" << std::endl; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 6b5a7afbe1..0221527a3b 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -226,24 +226,8 @@ void GLGSRender::begin() blend_factor(rsx::method_registers.blend_func_sfactor_a()), blend_factor(rsx::method_registers.blend_func_dfactor_a())); - if (rsx::method_registers.surface_color() == rsx::surface_color_format::w16z16y16x16) //TODO: check another color formats - { - u16 blend_color_r = rsx::method_registers.blend_color_16b_r(); - u16 blend_color_g = rsx::method_registers.blend_color_16b_g(); - u16 blend_color_b = rsx::method_registers.blend_color_16b_b(); - u16 blend_color_a = rsx::method_registers.blend_color_16b_a(); - - __glcheck glBlendColor(blend_color_r / 65535.f, blend_color_g / 65535.f, blend_color_b / 65535.f, blend_color_a / 65535.f); - } - else - { - u8 blend_color_r = rsx::method_registers.blend_color_8b_r(); - u8 blend_color_g = rsx::method_registers.blend_color_8b_g(); - u8 blend_color_b = rsx::method_registers.blend_color_8b_b(); - u8 blend_color_a = rsx::method_registers.blend_color_8b_a(); - - __glcheck glBlendColor(blend_color_r / 255.f, blend_color_g / 255.f, blend_color_b / 255.f, blend_color_a / 255.f); - } + auto blend_colors = rsx::get_constant_blend_colors(); + __glcheck glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]); __glcheck glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()), blend_equation(rsx::method_registers.blend_equation_a())); @@ -733,6 +717,7 @@ bool GLGSRender::load_program() m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); m_program->use(); + if (old_program == m_program && !m_transform_constants_dirty) { //This path is taken alot so the savings are tangible @@ -743,6 +728,7 @@ bool GLGSRender::load_program() float fog0, fog1; u32 alpha_tested; float alpha_ref; + u32 transform_branch_bits; } tmp = {}; @@ -758,7 +744,9 @@ bool GLGSRender::load_program() tmp.fog1 = rsx::method_registers.fog_params_1(); tmp.alpha_tested = rsx::method_registers.alpha_test_enabled(); tmp.alpha_ref = rsx::method_registers.alpha_ref(); + tmp.transform_branch_bits = rsx::method_registers.transform_branch_bits(); + //TODO: Faster comparison algorithm size_t old_hash = m_transform_buffer_hash; m_transform_buffer_hash = 0; @@ -795,6 +783,7 @@ bool GLGSRender::load_program() buf = static_cast(mapping.first); vertex_constants_offset = mapping.second; fill_vertex_program_constants_data(buf); + *(reinterpret_cast(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits(); // Fragment constants mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align); diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index fff8cee9c7..0227b24b5e 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -92,6 +92,7 @@ void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std OS << "layout(std140, binding = 1) uniform VertexConstantsBuffer" << std::endl; OS << "{" << std::endl; OS << " vec4 vc[468];" << std::endl; + OS << " uint transform_branch_bits;" << std::endl; OS << "};" << std::endl << std::endl; for (const ParamType &PT: constants) diff --git a/rpcs3/Emu/RSX/RSXVertexProgram.h b/rpcs3/Emu/RSX/RSXVertexProgram.h index 9b74ccf687..9167420c78 100644 --- a/rpcs3/Emu/RSX/RSXVertexProgram.h +++ b/rpcs3/Emu/RSX/RSXVertexProgram.h @@ -121,7 +121,7 @@ union D2 struct { u32 : 8; - u32 tex_num : 2; /* Actual field may be 4 bits wide, but we only have 4 TIUs */ + u32 tex_num : 2; // Actual field may be 4 bits wide, but we only have 4 TIUs u32 : 22; }; }; @@ -146,9 +146,12 @@ union D3 u32 sca_writemask_x : 1; u32 src2l : 11; }; + struct { - u32 : 29; + u32 : 23; + u32 branch_index : 5; //Index into transform_program_branch_bits [x] + u32 brb_cond_true : 1; //If set, branch is taken if (b[x]) else if (!b[x]) u32 iaddrl : 3; }; }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 63bcfbc8db..b7fcf1972c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3,6 +3,7 @@ #include "Emu/System.h" #include "VKGSRender.h" #include "../rsx_methods.h" +#include "../rsx_utils.h" #include "../Common/BufferUtils.h" #include "VKFormats.h" @@ -1041,7 +1042,6 @@ bool VKGSRender::load_program() VkBlendOp equation_rgb = vk::get_blend_op(rsx::method_registers.blend_equation_rgb()); VkBlendOp equation_a = vk::get_blend_op(rsx::method_registers.blend_equation_a()); - //TODO: Separate target blending for (u8 idx = 0; idx < m_draw_buffers_count; ++idx) { properties.att_state[render_targets[idx]].blendEnable = VK_TRUE; @@ -1052,6 +1052,12 @@ bool VKGSRender::load_program() properties.att_state[render_targets[idx]].colorBlendOp = equation_rgb; properties.att_state[render_targets[idx]].alphaBlendOp = equation_a; } + + auto blend_colors = rsx::get_constant_blend_colors(); + properties.cs.blendConstants[0] = blend_colors[0]; + properties.cs.blendConstants[1] = blend_colors[1]; + properties.cs.blendConstants[2] = blend_colors[2]; + properties.cs.blendConstants[3] = blend_colors[3]; } else { @@ -1181,6 +1187,7 @@ bool VKGSRender::load_program() const size_t vertex_constants_offset = m_uniform_buffer_ring_info.alloc<256>(512 * 4 * sizeof(float)); buf = (u8*)m_uniform_buffer_ring_info.map(vertex_constants_offset, 512 * 4 * sizeof(float)); fill_vertex_program_constants_data(buf); + *(reinterpret_cast(buf + (468 * 4 * sizeof(float)))) = rsx::method_registers.transform_branch_bits(); m_uniform_buffer_ring_info.unmap(); const size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 9cfb163920..cebc400a43 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -102,6 +102,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer" << std::endl; OS << "{" << std::endl; OS << " vec4 vc[468];" << std::endl; + OS << " uint transform_branch_bits;" << std::endl; OS << "};" << std::endl << std::endl; vk::glsl::program_input in; diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index e6b42ce28d..1141b14887 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -1140,6 +1140,11 @@ namespace rsx { return decode().transform_constant_load(); } + + u32 transform_branch_bits() + { + return registers[NV4097_SET_TRANSFORM_BRANCH_BITS]; + } }; extern rsx_state method_registers; diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index af68d725c2..2b231da3c8 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -105,4 +105,28 @@ namespace rsx fill_scale_offset_matrix(buffer, transpose, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z); } + + //Convert decoded integer values for CONSTANT_BLEND_FACTOR into f32 array in 0-1 range + std::array get_constant_blend_colors() + { + //TODO: check another color formats (probably all integer formats with > 8-bits wide channels) + if (rsx::method_registers.surface_color() == rsx::surface_color_format::w16z16y16x16) + { + u16 blend_color_r = rsx::method_registers.blend_color_16b_r(); + u16 blend_color_g = rsx::method_registers.blend_color_16b_g(); + u16 blend_color_b = rsx::method_registers.blend_color_16b_b(); + u16 blend_color_a = rsx::method_registers.blend_color_16b_a(); + + return { blend_color_r / 65535.f, blend_color_g / 65535.f, blend_color_b / 65535.f, blend_color_a / 65535.f }; + } + else + { + u8 blend_color_r = rsx::method_registers.blend_color_8b_r(); + u8 blend_color_g = rsx::method_registers.blend_color_8b_g(); + u8 blend_color_b = rsx::method_registers.blend_color_8b_b(); + u8 blend_color_a = rsx::method_registers.blend_color_8b_a(); + + return { blend_color_r / 255.f, blend_color_g / 255.f, blend_color_b / 255.f, blend_color_a / 255.f }; + } + } } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index b6a5be8332..1357484c13 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -120,4 +120,6 @@ namespace rsx float scale_x, float scale_y, float scale_z); void fill_window_matrix(void *dest, bool transpose); void fill_viewport_matrix(void *buffer, bool transpose); + + std::array get_constant_blend_colors(); }