From 3fc471eb322c0902f2559383e99f4c4753763437 Mon Sep 17 00:00:00 2001 From: raven02 Date: Wed, 17 Dec 2014 22:01:59 +0800 Subject: [PATCH] RSX: shaders fix and vertex texture implementation --- Utilities/StrFmt.cpp | 9 +- rpcs3/Emu/FS/VFS.cpp | 15 +- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 517 +++++++++++++--------- rpcs3/Emu/RSX/GL/GLFragmentProgram.h | 1 + rpcs3/Emu/RSX/GL/GLGSRender.cpp | 51 ++- rpcs3/Emu/RSX/GL/GLGSRender.h | 1 + rpcs3/Emu/RSX/GL/GLProgram.cpp | 7 + rpcs3/Emu/RSX/GL/GLProgram.h | 1 + rpcs3/Emu/RSX/GL/GLShaderParam.h | 108 ++++- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 216 ++++++--- rpcs3/Emu/RSX/GL/GLVertexProgram.h | 3 + rpcs3/Emu/RSX/RSXTexture.cpp | 203 +++++++++ rpcs3/Emu/RSX/RSXTexture.h | 63 ++- rpcs3/Emu/RSX/RSXThread.cpp | 52 ++- rpcs3/Emu/RSX/RSXThread.h | 1 + rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp | 4 +- rpcs3/Emu/SysCalls/Modules/sceNp.cpp | 11 +- rpcs3/Gui/MainFrame.cpp | 4 + rpcs3/Ini.h | 4 + rpcs3/rpcs3.cpp | 164 ++++++- 20 files changed, 1101 insertions(+), 334 deletions(-) diff --git a/Utilities/StrFmt.cpp b/Utilities/StrFmt.cpp index 991f63ec35..8fbdeda31c 100644 --- a/Utilities/StrFmt.cpp +++ b/Utilities/StrFmt.cpp @@ -137,14 +137,17 @@ std::vector fmt::split(const std::string& source, std::initializer_ std::string fmt::merge(std::vector source, const std::string& separator) { + if (!source.size()) + return ""; + std::string result; - for (auto &s : source) + for (int i = 0; i < source.size() - 1; ++i) { - result += s + separator; + result += source[i] + separator; } - return result; + return result + source[source.size() - 1]; } std::string fmt::merge(std::initializer_list> sources, const std::string& separator) diff --git a/rpcs3/Emu/FS/VFS.cpp b/rpcs3/Emu/FS/VFS.cpp index d963d9c91f..01489759a6 100644 --- a/rpcs3/Emu/FS/VFS.cpp +++ b/rpcs3/Emu/FS/VFS.cpp @@ -32,21 +32,12 @@ std::string simplify_path(const std::string& path, bool is_dir) { std::vector path_blocks = simplify_path_blocks(path); - std::string result; - if (path_blocks.empty()) - return result; + return ""; - if (is_dir) - { - result = fmt::merge(path_blocks, "/"); - } - else - { - result = fmt::merge(std::vector(path_blocks.begin(), path_blocks.end() - 1), "/") + path_blocks[path_blocks.size() - 1]; - } + std::string result = fmt::merge(path_blocks, "/"); - return result; + return is_dir ? result + "/" : result; } VFS::~VFS() diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index f72d5f9350..e62608832d 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -6,9 +6,9 @@ void GLFragmentDecompilerThread::SetDst(std::string code, bool append_mask) { - if(!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) return; + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) return; - switch(src1.scale) + switch (src1.scale) { case 0: break; case 1: code = "(" + code + " * 2.0)"; break; @@ -21,27 +21,39 @@ void GLFragmentDecompilerThread::SetDst(std::string code, bool append_mask) default: LOG_ERROR(RSX, "Bad scale: %d", fmt::by_value(src1.scale)); Emu.Pause(); - break; + break; } - if(dst.saturate) + if (dst.saturate) { code = "clamp(" + code + ", 0.0, 1.0)"; } - std::string dest; + code += (append_mask ? "$m" : ""); + + if (dst.no_dest) + { + if (dst.set_cond) + { + AddCode("$ifcond " + m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + code + ";"); + } + else + { + AddCode("$ifcond " + code + ";"); + } + + return; + } + + std::string dest = AddReg(dst.dest_reg, dst.fp16) + "$m"; + + AddCodeCond(dest, code); + //AddCode("$ifcond " + dest + code + (append_mask ? "$m;" : ";")); if (dst.set_cond) { - dest += m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = "; + AddCode(m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_mod_reg_index)) + "$m = " + dest + ";"); } - - if (!dst.no_dest) - { - dest += AddReg(dst.dest_reg, dst.fp16) + "$m = "; - } - - AddCode("$ifcond " + dest + code + (append_mask ? "$m;" : ";")); } void GLFragmentDecompilerThread::AddCode(const std::string& code) @@ -58,17 +70,17 @@ std::string GLFragmentDecompilerThread::GetMask() 'x', 'y', 'z', 'w', }; - if(dst.mask_x) ret += dst_mask[0]; - if(dst.mask_y) ret += dst_mask[1]; - if(dst.mask_z) ret += dst_mask[2]; - if(dst.mask_w) ret += dst_mask[3]; + if (dst.mask_x) ret += dst_mask[0]; + if (dst.mask_y) ret += dst_mask[1]; + if (dst.mask_z) ret += dst_mask[2]; + if (dst.mask_w) ret += dst_mask[3]; return ret.empty() || strncmp(ret.c_str(), dst_mask, 4) == 0 ? "" : ("." + ret); } std::string GLFragmentDecompilerThread::AddReg(u32 index, int fp16) { - return m_parr.AddParam(PARAM_NONE, "vec4", std::string(fp16 ? "h" : "r") + std::to_string(index), "vec4(0.0, 0.0, 0.0, 0.0)"); + return m_parr.AddParam(PARAM_NONE, "vec4", std::string(fp16 ? "h" : "r") + std::to_string(index), "vec4(0.0)"); } bool GLFragmentDecompilerThread::HasReg(u32 index, int fp16) @@ -79,13 +91,13 @@ bool GLFragmentDecompilerThread::HasReg(u32 index, int fp16) std::string GLFragmentDecompilerThread::AddCond() { - return m_parr.AddParam(PARAM_NONE , "vec4", "cc" + std::to_string(src0.cond_reg_index)); + return m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(src0.cond_reg_index)); } std::string GLFragmentDecompilerThread::AddConst() { std::string name = std::string("fc") + std::to_string(m_size + 4 * 4); - if(m_parr.HasParam(PARAM_UNIFORM, "vec4", name)) + if (m_parr.HasParam(PARAM_UNIFORM, "vec4", name)) { return name; } @@ -118,11 +130,11 @@ std::string GLFragmentDecompilerThread::Format(const std::string& code) { "$t", std::bind(std::mem_fn(&GLFragmentDecompilerThread::AddTex), this) }, { "$m", std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetMask), this) }, { "$ifcond ", [this]() -> std::string - { - const std::string& cond = GetCond(); - if (cond == "true") return ""; - return "if(" + cond + ") "; - } + { + const std::string& cond = GetCond(); + if (cond == "true") return ""; + return "if(" + cond + ") "; + } }, { "$cond", std::bind(std::mem_fn(&GLFragmentDecompilerThread::GetCond), this) }, { "$c", std::bind(std::mem_fn(&GLFragmentDecompilerThread::AddConst), this) } @@ -179,15 +191,83 @@ std::string GLFragmentDecompilerThread::GetCond() return "any(" + cond + "(" + AddCond() + swizzle + ", vec4(0.0)))"; } +void GLFragmentDecompilerThread::AddCodeCond(const std::string& dst, const std::string& src) +{ + if (src0.exec_if_gr && src0.exec_if_lt && src0.exec_if_eq) + { + AddCode(dst + " = " + src + ";"); + return; + } + + if (!src0.exec_if_gr && !src0.exec_if_lt && !src0.exec_if_eq) + { + AddCode("//" + dst + " = " + src + ";"); + return; + } + + static const char f[4] = { 'x', 'y', 'z', 'w' }; + + std::string swizzle, cond; + swizzle += f[src0.cond_swizzle_x]; + swizzle += f[src0.cond_swizzle_y]; + swizzle += f[src0.cond_swizzle_z]; + swizzle += f[src0.cond_swizzle_w]; + swizzle = swizzle == "xyzw" ? "" : "." + swizzle; + + if (src0.exec_if_gr && src0.exec_if_eq) + { + cond = "greaterThanEqual"; + } + else if (src0.exec_if_lt && src0.exec_if_eq) + { + cond = "lessThanEqual"; + } + else if (src0.exec_if_gr && src0.exec_if_lt) + { + cond = "notEqual"; + } + else if (src0.exec_if_gr) + { + cond = "greaterThan"; + } + else if (src0.exec_if_lt) + { + cond = "lessThan"; + } + else //if(src0.exec_if_eq) + { + cond = "equal"; + } + + cond = cond + "(" + AddCond() + swizzle + ", vec4(0.0))"; + + ShaderVar dst_var(dst); + dst_var.symplify(); + + //const char *c_mask = f; + + if (dst_var.swizzles[0].length() == 1) + { + AddCode("if (" + cond + ".x) " + dst + " = vec4(" + src + ").x;"); + } + else + { + for (int i = 0; i < dst_var.swizzles[0].length(); ++i) + { + AddCode("if (" + cond + "." + f[i] + ") " + dst + "." + f[i] + " = " + src + "." + f[i] + ";"); + } + } +} + template std::string GLFragmentDecompilerThread::GetSRC(T src) { std::string ret; - switch(src.reg_type) + switch (src.reg_type) { case 0: //tmp ret += AddReg(src.tmp_reg_index, src.fp16); - break; + break; case 1: //input { @@ -200,11 +280,11 @@ template std::string GLFragmentDecompilerThread::GetSRC(T src) "ssa" }; - switch(dst.src_attr_reg_num) + switch (dst.src_attr_reg_num) { case 0x00: ret += reg_table[0]; break; default: - if(dst.src_attr_reg_num < sizeof(reg_table)/sizeof(reg_table[0])) + if (dst.src_attr_reg_num < sizeof(reg_table) / sizeof(reg_table[0])) { ret += m_parr.AddParam(PARAM_IN, "vec4", reg_table[dst.src_attr_reg_num]); } @@ -214,22 +294,22 @@ template std::string GLFragmentDecompilerThread::GetSRC(T src) ret += m_parr.AddParam(PARAM_IN, "vec4", "unk"); Emu.Pause(); } - break; + break; } } break; case 2: //const ret += AddConst(); - break; + break; default: LOG_ERROR(RSX, "Bad src type %d", fmt::by_value(src.reg_type)); Emu.Pause(); - break; + break; } - static const char f[4] = {'x', 'y', 'z', 'w'}; + static const char f[4] = { 'x', 'y', 'z', 'w' }; std::string swizzle = ""; swizzle += f[src.swizzle_x]; @@ -237,10 +317,10 @@ template std::string GLFragmentDecompilerThread::GetSRC(T src) swizzle += f[src.swizzle_z]; swizzle += f[src.swizzle_w]; - if(strncmp(swizzle.c_str(), f, 4) != 0) ret += "." + swizzle; + if (strncmp(swizzle.c_str(), f, 4) != 0) ret += "." + swizzle; - if(src.abs) ret = "abs(" + ret + ")"; - if(src.neg) ret = "-" + ret; + if (src.abs) ret = "abs(" + ret + ")"; + if (src.neg) ret = "-" + ret; return ret; } @@ -251,10 +331,9 @@ std::string GLFragmentDecompilerThread::BuildCode() const std::pair table[] = { { "ocol0", m_ctrl & 0x40 ? "r0" : "h0" }, - { "ocol1", m_ctrl & 0x40 ? "r2" : "h2" }, - { "ocol2", m_ctrl & 0x40 ? "r3" : "h4" }, - { "ocol3", m_ctrl & 0x40 ? "r4" : "h6" }, - { "ocol4", m_ctrl & 0x40 ? "r5" : "h8" }, + { "ocol1", m_ctrl & 0x40 ? "r2" : "h4" }, + { "ocol2", m_ctrl & 0x40 ? "r3" : "h6" }, + { "ocol3", m_ctrl & 0x40 ? "r4" : "h8" }, }; for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) @@ -263,7 +342,7 @@ std::string GLFragmentDecompilerThread::BuildCode() AddCode(m_parr.AddParam(PARAM_OUT, "vec4", table[i].first, i) + " = " + table[i].second + ";"); } - if (m_ctrl & 0xe) main += "\tgl_FragDepth = r1.z;\n"; + if (m_ctrl & 0xe) main += m_ctrl & 0x40 ? "\tgl_FragDepth = r1.z;\n" : "\tgl_FragDepth = h2.z;\n"; std::string p; @@ -285,7 +364,16 @@ void GLFragmentDecompilerThread::Task() m_loop_count = 0; m_code_level = 1; - while(true) + enum + { + FORCE_NONE, + FORCE_SCT, + FORCE_SCB, + }; + + int forced_unit = FORCE_NONE; + + while (true) { for (auto finded = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); finded != m_end_offsets.end(); @@ -318,164 +406,195 @@ void GLFragmentDecompilerThread::Task() const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); - switch(opcode) + auto SCT = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; + case RSX_FP_OPCODE_DIV: SetDst("($0 / $1)"); break; + case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt($1))"); break; + case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; + case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; + case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; + case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; + case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; + case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; + case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; + case RSX_FP_OPCODE_MOV: SetDst("$0"); break; + case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; + case RSX_FP_OPCODE_RCP: SetDst("1 / $0"); break; + case RSX_FP_OPCODE_RSQ: SetDst("inversesqrt(abs($0))"); break; + case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; + case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; + case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; + case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; + case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; + case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; + + default: + return false; + } + + return true; + }; + + auto SCB = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; + case RSX_FP_OPCODE_COS: SetDst("cos($0)"); break; + case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; + case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; + case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; + case RSX_FP_OPCODE_DP2A: SetDst("vec4($0.x * $1.x + $0.y * $1.y + $2.x)"); break; + case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); break; + case RSX_FP_OPCODE_EX2: SetDst("exp2($0)"); break; + case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); break; + case RSX_FP_OPCODE_FRC: SetDst("fract($0)"); break; + case RSX_FP_OPCODE_LIF: SetDst("vec4(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); break; + case RSX_FP_OPCODE_LG2: SetDst("log2($0)"); break; + case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; + case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; + case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; + case RSX_FP_OPCODE_MOV: SetDst("$0"); break; + case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; + case RSX_FP_OPCODE_PK2: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK2"); break; + case RSX_FP_OPCODE_PK4: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK4"); break; + case RSX_FP_OPCODE_PK16: LOG_ERROR(RSX, "Unimplemented SCB instruction: PK16"); break; + case RSX_FP_OPCODE_PKB: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKB"); break; + case RSX_FP_OPCODE_PKG: LOG_ERROR(RSX, "Unimplemented SCB instruction: PKG"); break; + case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; + case RSX_FP_OPCODE_SFL: SetDst("vec4(0.0)"); break; + case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; + case RSX_FP_OPCODE_SIN: SetDst("sin($0)"); break; + case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; + case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; + case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; + case RSX_FP_OPCODE_STR: SetDst("vec4(1.0)"); break; + + default: + return false; + } + + return true; + }; + + auto TEX_SRB = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_DDX: SetDst("dFdx($0)"); break; + case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; + case RSX_FP_OPCODE_NRM: SetDst("normalize($0)"); break; + case RSX_FP_OPCODE_TEX: SetDst("texture($t, $0.xy)"); break; + case RSX_FP_OPCODE_TXP: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXP"); break; + case RSX_FP_OPCODE_TXD: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXD"); break; + case RSX_FP_OPCODE_TXB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXB"); break; + case RSX_FP_OPCODE_TXL: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: TXL"); break; + case RSX_FP_OPCODE_UP2: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP2"); break; + case RSX_FP_OPCODE_UP4: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP4"); break; + case RSX_FP_OPCODE_UP16: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UP16"); break; + case RSX_FP_OPCODE_UPB: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPB"); break; + case RSX_FP_OPCODE_UPG: LOG_ERROR(RSX, "Unimplemented TEX_SRB instruction: UPG"); break; + + default: + return false; + } + + return true; + }; + + auto SIP = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_BRK: SetDst("break"); break; + case RSX_FP_OPCODE_CAL: LOG_ERROR(RSX, "Unimplemented SIP instruction: CAL"); break; + case RSX_FP_OPCODE_FENCT: forced_unit = FORCE_SCT; break; + case RSX_FP_OPCODE_FENCB: forced_unit = FORCE_SCB; break; + case RSX_FP_OPCODE_IFE: + AddCode("if($cond)"); + m_else_offsets.push_back(src1.else_offset << 2); + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + break; + case RSX_FP_OPCODE_LOOP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_REP: + if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) + { + AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); + } + else + { + AddCode(fmt::Format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + m_loop_count++; + m_end_offsets.push_back(src2.end_offset << 2); + AddCode("{"); + m_code_level++; + } + break; + case RSX_FP_OPCODE_RET: SetDst("return"); break; + + default: + return false; + } + + return true; + }; + + switch (opcode) { case RSX_FP_OPCODE_NOP: break; - case RSX_FP_OPCODE_MOV: SetDst("$0"); break; - case RSX_FP_OPCODE_MUL: SetDst("($0 * $1)"); break; - case RSX_FP_OPCODE_ADD: SetDst("($0 + $1)"); break; - case RSX_FP_OPCODE_MAD: SetDst("($0 * $1 + $2)"); break; - case RSX_FP_OPCODE_DP3: SetDst("vec4(dot($0.xyz, $1.xyz))"); break; - case RSX_FP_OPCODE_DP4: SetDst("vec4(dot($0, $1))"); break; - case RSX_FP_OPCODE_DST: SetDst("vec4(distance($0, $1))"); break; - case RSX_FP_OPCODE_MIN: SetDst("min($0, $1)"); break; - case RSX_FP_OPCODE_MAX: SetDst("max($0, $1)"); break; - case RSX_FP_OPCODE_SLT: SetDst("vec4(lessThan($0, $1))"); break; - case RSX_FP_OPCODE_SGE: SetDst("vec4(greaterThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SLE: SetDst("vec4(lessThanEqual($0, $1))"); break; - case RSX_FP_OPCODE_SGT: SetDst("vec4(greaterThan($0, $1))"); break; - case RSX_FP_OPCODE_SNE: SetDst("vec4(notEqual($0, $1))"); break; - case RSX_FP_OPCODE_SEQ: SetDst("vec4(equal($0, $1))"); break; - case RSX_FP_OPCODE_FRC: SetDst("fract($0)"); break; - case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); break; case RSX_FP_OPCODE_KIL: SetDst("discard", false); break; - case RSX_FP_OPCODE_PK4: - LOG_ERROR(RSX, "Unimplemented fp_opcode PK4"); - break; - case RSX_FP_OPCODE_UP4: - LOG_ERROR(RSX, "Unimplemented fp_opcode UP4"); - break; - case RSX_FP_OPCODE_DDX: SetDst("dFdx($0)"); break; - case RSX_FP_OPCODE_DDY: SetDst("dFdy($0)"); break; - case RSX_FP_OPCODE_TEX: SetDst("texture($t, $0.xy)"); break; - case RSX_FP_OPCODE_TXP: - LOG_ERROR(RSX, "Unimplemented fp_opcode TXP"); - break; - case RSX_FP_OPCODE_TXD: - LOG_ERROR(RSX, "Unimplemented fp_opcode TXD"); - break; - case RSX_FP_OPCODE_RCP: SetDst("(1 / $0)"); break; - case RSX_FP_OPCODE_RSQ: SetDst("inversesqrt(abs($0))"); break; - case RSX_FP_OPCODE_EX2: SetDst("exp2($0)"); break; - case RSX_FP_OPCODE_LG2: SetDst("log2($0)"); break; - case RSX_FP_OPCODE_LIT: SetDst("vec4(1.0, $0.x, ($0.x > 0 ? exp2($0.w * log2($0.y)) : 0.0), 1.0)"); break; - case RSX_FP_OPCODE_LRP: SetDst("($0 * ($1 - $2) + $2)"); break; - case RSX_FP_OPCODE_STR: SetDst("vec4(equal($0, vec4(1.0)))"); break; - case RSX_FP_OPCODE_SFL: SetDst("vec4(equal($0, vec4(0.0)))"); break; - case RSX_FP_OPCODE_COS: SetDst("cos($0)"); break; - case RSX_FP_OPCODE_SIN: SetDst("sin($0)"); break; - case RSX_FP_OPCODE_PK2: - LOG_ERROR(RSX, "Unimplemented fp_opcode PK2"); - break; - case RSX_FP_OPCODE_UP2: - LOG_ERROR(RSX, "Unimplemented fp_opcode UP2"); - break; - case RSX_FP_OPCODE_POW: SetDst("pow($0, $1)"); break; - case RSX_FP_OPCODE_PKB: - LOG_ERROR(RSX, "Unimplemented fp_opcode PKB"); - break; - case RSX_FP_OPCODE_UPB: - LOG_ERROR(RSX, "Unimplemented fp_opcode UPB"); - break; - case RSX_FP_OPCODE_PK16: - LOG_ERROR(RSX, "Unimplemented fp_opcode PK16"); - break; - case RSX_FP_OPCODE_UP16: - LOG_ERROR(RSX, "Unimplemented fp_opcode UP16"); - break; - case RSX_FP_OPCODE_BEM: - LOG_ERROR(RSX, "Unimplemented fp_opcode BEM"); - break; - case RSX_FP_OPCODE_PKG: - LOG_ERROR(RSX, "Unimplemented fp_opcode PKG"); - break; - case RSX_FP_OPCODE_UPG: - LOG_ERROR(RSX, "Unimplemented fp_opcode UPG"); - break; - case RSX_FP_OPCODE_DP2A: SetDst("($0.x * $1.x + $0.y * $1.y + $2.x)"); break; - case RSX_FP_OPCODE_TXL: break; - LOG_ERROR(RSX, "Unimplemented fp_opcode TXL"); - break; - case RSX_FP_OPCODE_TXB: break; - LOG_ERROR(RSX, "Unimplemented fp_opcode TXB"); - break; - case RSX_FP_OPCODE_TEXBEM: - LOG_ERROR(RSX, "Unimplemented fp_opcode TEXBEM"); - break; - case RSX_FP_OPCODE_TXPBEM: - LOG_ERROR(RSX, "Unimplemented fp_opcode TXPBEM"); - break; - case RSX_FP_OPCODE_BEMLUM: - LOG_ERROR(RSX, "Unimplemented fp_opcode BEMLUM"); - break; - case RSX_FP_OPCODE_REFL: SetDst("($0 - 2.0 * $1 * dot($0, $1))"); break; - case RSX_FP_OPCODE_TIMESWTEX: - LOG_ERROR(RSX, "Unimplemented fp_opcode TIMESWTEX"); - break; - case RSX_FP_OPCODE_DP2: SetDst("vec4(dot($0.xy, $1.xy))"); break; - case RSX_FP_OPCODE_NRM: SetDst("normalize($0.xyz)"); break; - case RSX_FP_OPCODE_DIV: SetDst("($0 / $1)"); break; - case RSX_FP_OPCODE_DIVSQ: SetDst("($0 / sqrt($1))"); break; - case RSX_FP_OPCODE_LIF: SetDst("vec4(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); break; - case RSX_FP_OPCODE_FENCT: break; - case RSX_FP_OPCODE_FENCB: break; - case RSX_FP_OPCODE_BRK: SetDst("break"); break; - case RSX_FP_OPCODE_CAL: - LOG_ERROR(RSX, "Unimplemented fp_opcode CAL"); - break; - case RSX_FP_OPCODE_IFE: - AddCode("if($cond)"); - m_else_offsets.push_back(src1.else_offset << 2); - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - break; - - case RSX_FP_OPCODE_LOOP: - if(!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_REP: - if(!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::Format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::Format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_RET: - SetDst("return"); - break; default: - LOG_ERROR(RSX, "Unknown fp opcode 0x%x (inst %d)", opcode, m_size / (4 * 4)); - Emu.Pause(); - break; + if (forced_unit == FORCE_NONE) + { + if (SIP()) break; + if (SCT()) break; + if (TEX_SRB()) break; + if (SCB()) break; + } + else if (forced_unit == FORCE_SCT) + { + forced_unit = FORCE_NONE; + if (SCT()) break; + } + else if (forced_unit == FORCE_SCB) + { + forced_unit = FORCE_NONE; + if (SCB()) break; + } + + LOG_ERROR(RSX, "Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, forced_unit); + break; } m_size += m_offset; - if(dst.end) break; + if (dst.end) break; assert(m_offset % sizeof(u32) == 0); data += m_offset / sizeof(u32); @@ -488,7 +607,7 @@ void GLFragmentDecompilerThread::Task() m_parr.params.clear(); } -GLShaderProgram::GLShaderProgram() +GLShaderProgram::GLShaderProgram() : m_decompiler_thread(nullptr) , m_id(0) { @@ -496,14 +615,14 @@ GLShaderProgram::GLShaderProgram() GLShaderProgram::~GLShaderProgram() { - if(m_decompiler_thread) + if (m_decompiler_thread) { Wait(); - if(m_decompiler_thread->IsAlive()) + if (m_decompiler_thread->IsAlive()) { m_decompiler_thread->Stop(); } - + delete m_decompiler_thread; m_decompiler_thread = nullptr; } @@ -513,7 +632,7 @@ GLShaderProgram::~GLShaderProgram() void GLShaderProgram::Wait() { - if(m_decompiler_thread && m_decompiler_thread->IsAlive()) + if (m_decompiler_thread && m_decompiler_thread->IsAlive()) { m_decompiler_thread->Join(); } @@ -527,14 +646,14 @@ void GLShaderProgram::Decompile(RSXShaderProgram& prog) void GLShaderProgram::DecompileAsync(RSXShaderProgram& prog) { - if(m_decompiler_thread) + if (m_decompiler_thread) { Wait(); - if(m_decompiler_thread->IsAlive()) + if (m_decompiler_thread->IsAlive()) { m_decompiler_thread->Stop(); } - + delete m_decompiler_thread; m_decompiler_thread = nullptr; } diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h index f05dfc30e0..9666035d07 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.h +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.h @@ -155,6 +155,7 @@ struct GLFragmentDecompilerThread : public ThreadBase std::string AddTex(); std::string Format(const std::string& code); + void AddCodeCond(const std::string& dst, const std::string& src); std::string GetCond(); template std::string GetSRC(T src); std::string BuildCode(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 9e9fdd70de..c55c1962a4 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -425,7 +425,7 @@ void GLTexture::Init(RSXTexture& tex) free(unswizzledPixels); } - break; break; + break; case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8 & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN): { @@ -450,7 +450,7 @@ void GLTexture::Init(RSXTexture& tex) free(unswizzledPixels); } - break; break; + break; default: LOG_ERROR(RSX, "Init tex error: Bad tex format (0x%x | %s | 0x%x)", format, (is_swizzled ? "swizzled" : "linear"), tex.GetFormat() & 0x40); @@ -1647,6 +1647,21 @@ void GLGSRender::InitDrawBuffers() LOG_ERROR(RSX, "Bad surface color target: %d", m_surface_color_target); break; } + + if (m_read_buffer) + { + u32 format = GL_BGRA; + CellGcmDisplayInfo* buffers = vm::get_ptr(m_gcm_buffers_addr); + u32 addr = GetAddress(buffers[m_gcm_current_buffer].offset, CELL_GCM_LOCATION_LOCAL); + + if (Memory.IsGoodAddr(addr)) + { + u32 width = buffers[m_gcm_current_buffer].width; + u32 height = buffers[m_gcm_current_buffer].height; + + glDrawPixels(width, height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, vm::get_ptr(addr)); + } + } } void GLGSRender::ExecCMD(u32 cmd) @@ -1721,24 +1736,6 @@ void GLGSRender::ExecCMD() checkForGlError("glColorMask"); } - if (!m_indexed_array.m_count && !m_draw_array_count) - { - u32 min_vertex_size = ~0; - for(auto &i : m_vertex_data) - { - if (!i.size) - continue; - - u32 vertex_size = i.data.size() / (i.size * i.GetTypeSize()); - - if (min_vertex_size > vertex_size) - min_vertex_size = vertex_size; - } - - m_draw_array_count = min_vertex_size; - m_draw_array_first = 0; - } - Enable(m_set_depth_test, GL_DEPTH_TEST); Enable(m_set_alpha_test, GL_ALPHA_TEST); Enable(m_set_depth_bounds_test, GL_DEPTH_BOUNDS_TEST_EXT); @@ -1995,6 +1992,20 @@ void GLGSRender::ExecCMD() checkForGlError(fmt::Format("m_gl_textures[%d].Init", i)); } + for (u32 i = 0; i < m_textures_count; ++i) + { + if (!m_vertex_textures[i].IsEnabled()) continue; + + glActiveTexture(GL_TEXTURE0 + m_textures_count + i); + checkForGlError("glActiveTexture"); + m_gl_vertex_textures[i].Create(); + m_gl_vertex_textures[i].Bind(); + checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Bind", i)); + m_program.SetTex(i); + m_gl_vertex_textures[i].Init(m_vertex_textures[i]); + checkForGlError(fmt::Format("m_gl_vertex_textures[%d].Init", i)); + } + m_vao.Bind(); if(m_indexed_array.m_count) { diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index a598404c8a..b89484fb87 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -148,6 +148,7 @@ private: GLVertexProgram m_vertex_prog; GLTexture m_gl_textures[m_textures_count]; + GLTexture m_gl_vertex_textures[m_textures_count]; GLvao m_vao; GLvbo m_vbo; diff --git a/rpcs3/Emu/RSX/GL/GLProgram.cpp b/rpcs3/Emu/RSX/GL/GLProgram.cpp index e7e7fdaa0a..32f9d709bd 100644 --- a/rpcs3/Emu/RSX/GL/GLProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLProgram.cpp @@ -98,6 +98,13 @@ void GLProgram::SetTex(u32 index) checkForGlError(fmt::Format("SetTex(%u - %d - %d)", id, index, loc)); } +void GLProgram::SetVTex(u32 index) +{ + int loc = GetLocation(fmt::Format("vtex%u", index)); + glProgramUniform1i(id, loc, index); + checkForGlError(fmt::Format("SetVTex(%u - %d - %d)", id, index, loc)); +} + void GLProgram::Delete() { if(!IsCreated()) return; diff --git a/rpcs3/Emu/RSX/GL/GLProgram.h b/rpcs3/Emu/RSX/GL/GLProgram.h index 321a2e3cf5..076b3ce01f 100644 --- a/rpcs3/Emu/RSX/GL/GLProgram.h +++ b/rpcs3/Emu/RSX/GL/GLProgram.h @@ -24,5 +24,6 @@ public: void Use(); void UnUse(); void SetTex(u32 index); + void SetVTex(u32 index); void Delete(); }; diff --git a/rpcs3/Emu/RSX/GL/GLShaderParam.h b/rpcs3/Emu/RSX/GL/GLShaderParam.h index 80fa5a3dec..f90402eda0 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderParam.h +++ b/rpcs3/Emu/RSX/GL/GLShaderParam.h @@ -1,5 +1,6 @@ #pragma once #include "OpenGL.h" +#include enum GLParamFlag { @@ -41,9 +42,9 @@ struct GLParamType bool SearchName(const std::string& name) { - for(u32 i=0; iSearchName(name)) t->items.emplace_back(name, -1, value); + if (!t->SearchName(name)) t->items.emplace_back(name, -1, value); } else { @@ -125,9 +126,9 @@ struct GLParamArray type = GetParamFlag(flag) + type; GLParamType* t = SearchParam(type); - if(t) + if (t) { - if(!t->SearchName(name)) t->items.emplace_back(name, location); + if (!t->SearchName(name)) t->items.emplace_back(name, location); } else { @@ -139,3 +140,90 @@ struct GLParamArray return name; } }; + +class ShaderVar +{ +public: + std::string name; + std::vector swizzles; + + ShaderVar() = default; + ShaderVar(const std::string& var) + { + auto var_blocks = fmt::split(var, { "." }); + + if (var_blocks.size() == 0) + { + assert(0); + } + + name = var_blocks[0]; + + if (var_blocks.size() == 1) + { + swizzles.push_back("xyzw"); + } + else + { + swizzles = std::vector(var_blocks.begin() + 1, var_blocks.end()); + } + } + + int get_vector_size() const + { + return swizzles[swizzles.size() - 1].length(); + } + + ShaderVar& symplify() + { + std::unordered_map swizzle; + + static std::unordered_map pos_to_swizzle = + { + { 0, 'x' }, + { 1, 'y' }, + { 2, 'z' }, + { 3, 'w' } + }; + + for (auto &i : pos_to_swizzle) + { + swizzle[i.second] = swizzles[0].length() > i.first ? swizzles[0][i.first] : 0; + } + + for (int i = 1; i < swizzles.size(); ++i) + { + std::unordered_map new_swizzle; + + for (auto &sw : pos_to_swizzle) + { + new_swizzle[sw.second] = swizzle[swizzles[i].length() <= sw.first ? '\0' : swizzles[i][sw.first]]; + } + + swizzle = new_swizzle; + } + + swizzles.clear(); + std::string new_swizzle; + + for (auto &i : pos_to_swizzle) + { + if (swizzle[i.second] != '\0') + new_swizzle += swizzle[i.second]; + } + + swizzles.push_back(new_swizzle); + + return *this; + } + + std::string get() const + { + if (swizzles.size() == 1 && swizzles[0] == "xyzw") + { + return name; + } + + return name + "." + fmt::merge({ swizzles }, "."); + } +}; \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index fffaf8f42a..fe99a79847 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -8,19 +8,19 @@ std::string GLVertexDecompilerThread::GetMask(bool is_sca) { std::string ret; - if(is_sca) + if (is_sca) { - if(d3.sca_writemask_x) ret += "x"; - if(d3.sca_writemask_y) ret += "y"; - if(d3.sca_writemask_z) ret += "z"; - if(d3.sca_writemask_w) ret += "w"; + if (d3.sca_writemask_x) ret += "x"; + if (d3.sca_writemask_y) ret += "y"; + if (d3.sca_writemask_z) ret += "z"; + if (d3.sca_writemask_w) ret += "w"; } else { - if(d3.vec_writemask_x) ret += "x"; - if(d3.vec_writemask_y) ret += "y"; - if(d3.vec_writemask_z) ret += "z"; - if(d3.vec_writemask_w) ret += "w"; + if (d3.vec_writemask_x) ret += "x"; + if (d3.vec_writemask_y) ret += "y"; + if (d3.vec_writemask_z) ret += "z"; + if (d3.vec_writemask_w) ret += "w"; } return ret.empty() || ret == "xyzw" ? "" : ("." + ret); @@ -40,17 +40,17 @@ std::string GLVertexDecompilerThread::GetDST(bool isSca) { std::string ret; - switch(isSca ? 0x1f : d3.dst) + switch (isSca ? 0x1f : d3.dst) { case 0x1f: ret += m_parr.AddParam(PARAM_NONE, "vec4", std::string("tmp") + std::to_string(isSca ? d3.sca_dst_tmp : d0.dst_tmp)); - break; + break; default: if (d3.dst > 15) LOG_ERROR(RSX, "dst index out of range: %u", d3.dst); ret += m_parr.AddParam(PARAM_NONE, "vec4", std::string("dst_reg") + std::to_string(d3.dst), d3.dst == 0 ? "vec4(0.0f, 0.0f, 0.0f, 1.0f)" : "vec4(0.0)"); - break; + break; } return ret; @@ -58,7 +58,7 @@ std::string GLVertexDecompilerThread::GetDST(bool isSca) std::string GLVertexDecompilerThread::GetSRC(const u32 n) { - static const std::string reg_table[] = + static const std::string reg_table[] = { "in_pos", "in_weight", "in_normal", "in_diff_color", "in_spec_color", @@ -70,13 +70,13 @@ std::string GLVertexDecompilerThread::GetSRC(const u32 n) std::string ret; - switch(src[n].reg_type) + switch (src[n].reg_type) { case 1: //temp ret += m_parr.AddParam(PARAM_NONE, "vec4", "tmp" + std::to_string(src[n].tmp_src)); - break; + break; case 2: //input - if (d1.input_src < (sizeof(reg_table)/sizeof(reg_table[0]))) + if (d1.input_src < (sizeof(reg_table) / sizeof(reg_table[0]))) { ret += m_parr.AddParam(PARAM_IN, "vec4", reg_table[d1.input_src], d1.input_src); } @@ -85,16 +85,16 @@ std::string GLVertexDecompilerThread::GetSRC(const u32 n) LOG_ERROR(RSX, "Bad input src num: %d", fmt::by_value(d1.input_src)); ret += m_parr.AddParam(PARAM_IN, "vec4", "in_unk", d1.input_src); } - break; + break; case 3: //const m_parr.AddParam(PARAM_UNIFORM, "vec4", std::string("vc[468]")); ret += std::string("vc[") + std::to_string(d1.const_src) + (d3.index_const ? " + " + AddAddrReg() : "") + "]"; - break; + break; default: LOG_ERROR(RSX, "Bad src%u reg type: %d", n, fmt::by_value(src[n].reg_type)); Emu.Pause(); - break; + break; } static const std::string f = "xyzw"; @@ -106,26 +106,26 @@ std::string GLVertexDecompilerThread::GetSRC(const u32 n) swizzle += f[src[n].swz_z]; swizzle += f[src[n].swz_w]; - if(swizzle != f) ret += '.' + swizzle; + if (swizzle != f) ret += '.' + swizzle; bool abs; - - switch(n) + + switch (n) { case 0: abs = d0.src0_abs; break; case 1: abs = d0.src1_abs; break; case 2: abs = d0.src2_abs; break; } - - if(abs) ret = "abs(" + ret + ")"; - if(src[n].neg) ret = "-" + ret; + + if (abs) ret = "abs(" + ret + ")"; + if (src[n].neg) ret = "-" + ret; return ret; } void GLVertexDecompilerThread::SetDST(bool is_sca, std::string value) { - if(d0.cond == 0) return; + if (d0.cond == 0) return; enum { @@ -138,12 +138,12 @@ void GLVertexDecompilerThread::SetDST(bool is_sca, std::string value) value += mask; - if(is_sca && d0.vec_result) + if (is_sca && d0.vec_result) { - value = "vec4(" + value + ")" + mask; + //value = "vec4(" + value + ")"; } - if(d0.staturate) + if (d0.staturate) { value = "clamp(" + value + ", 0.0, 1.0)"; } @@ -152,22 +152,20 @@ void GLVertexDecompilerThread::SetDST(bool is_sca, std::string value) if (d0.cond_update_enable_0 && d0.cond_update_enable_1) { - dest += m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(d0.cond_reg_sel_1), "vec4(0.0)") + mask + " = "; + dest = m_parr.AddParam(PARAM_NONE, "vec4", "cc" + std::to_string(d0.cond_reg_sel_1), "vec4(0.0)") + mask; } - - if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) + else if (d3.dst != 0x1f || (is_sca ? d3.sca_dst_tmp != 0x3f : d0.dst_tmp != 0x3f)) { - dest += GetDST(is_sca) + mask + " = "; + dest = GetDST(is_sca) + mask; } - std::string code; + //std::string code; + //if (d0.cond_test_enable) + // code += "$ifcond "; + //code += dest + value; + //AddCode(code + ";"); - if (d0.cond_test_enable) - code += "$ifcond "; - - code += dest + value; - - AddCode(code + ";"); + AddCodeCond(dest, value); } std::string GLVertexDecompilerThread::GetFunc() @@ -188,6 +186,11 @@ std::string GLVertexDecompilerThread::GetFunc() return name + "()"; } +std::string GLVertexDecompilerThread::GetTex() +{ + return m_parr.AddParam(PARAM_UNIFORM, "sampler2D", std::string("vtex") + std::to_string(/*?.tex_num*/0)); +} + std::string GLVertexDecompilerThread::Format(const std::string& code) { const std::pair> repl_list[] = @@ -200,14 +203,16 @@ std::string GLVertexDecompilerThread::Format(const std::string& code) { "$am", std::bind(std::mem_fn(&GLVertexDecompilerThread::AddAddrMask), this) }, { "$a", std::bind(std::mem_fn(&GLVertexDecompilerThread::AddAddrReg), this) }, - { "$fa", [this]()->std::string {return std::to_string(GetAddr()); } }, + { "$t", std::bind(std::mem_fn(&GLVertexDecompilerThread::GetTex), this) }, + + { "$fa", [this]()->std::string { return std::to_string(GetAddr()); } }, { "$f()", std::bind(std::mem_fn(&GLVertexDecompilerThread::GetFunc), this) }, { "$ifcond ", [this]() -> std::string - { - const std::string& cond = GetCond(); - if (cond == "true") return ""; - return "if(" + cond + ") "; - } + { + const std::string& cond = GetCond(); + if (cond == "true") return ""; + return "if(" + cond + ") "; + } }, { "$cond", std::bind(std::mem_fn(&GLVertexDecompilerThread::GetCond), this) } }; @@ -252,6 +257,70 @@ std::string GLVertexDecompilerThread::GetCond() return fmt::Format("any(%s(cc%d%s, vec4(0.0)%s))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str(), swizzle.c_str()); } +void GLVertexDecompilerThread::AddCodeCond(const std::string& dst, const std::string& src) +{ + enum + { + lt = 0x1, + eq = 0x2, + gt = 0x4, + }; + + + if (!d0.cond_test_enable || d0.cond == (lt | gt | eq)) + { + AddCode(dst + " = " + src + ";"); + return; + } + + if (d0.cond == 0) + { + AddCode("//" + dst + " = " + src + ";"); + return; + } + + static const char* cond_string_table[(lt | gt | eq) + 1] = + { + "error", + "lessThan", + "equal", + "lessThanEqual", + "greaterThan", + "notEqual", + "greaterThanEqual", + "error" + }; + + static const char f[4] = { 'x', 'y', 'z', 'w' }; + + std::string swizzle; + swizzle += f[d0.mask_x]; + swizzle += f[d0.mask_y]; + swizzle += f[d0.mask_z]; + swizzle += f[d0.mask_w]; + + swizzle = swizzle == "xyzw" ? "" : "." + swizzle; + + std::string cond = fmt::Format("%s(cc%d%s, vec4(0.0))", cond_string_table[d0.cond], d0.cond_reg_sel_1, swizzle.c_str()); + + ShaderVar dst_var(dst); + dst_var.symplify(); + + //const char *c_mask = f; + + if (dst_var.swizzles[0].length() == 1) + { + AddCode("if (" + cond + ".x) " + dst + " = vec4(" + src + ").x;"); + } + else + { + for (int i = 0; i < dst_var.swizzles[0].length(); ++i) + { + AddCode("if (" + cond + "." + f[i] + ") " + dst + "." + f[i] + " = " + src + "." + f[i] + ";"); + } + } +} + std::string GLVertexDecompilerThread::AddAddrMask() { @@ -290,21 +359,21 @@ std::string GLVertexDecompilerThread::BuildFuncBody(const FuncInfo& func) { std::string result; - for(uint i=func.offset; i 0 ? exp2($s.w * log2($s.y)) : 0.0), 1.0)"); break; + case RSX_SCA_OPCODE_LIT: SetDSTSca("vec4(1.0, $s.x, ($s.x > 0.0 ? exp($s.w * log2($s.y)) : 0.0), 1.0)"); break; case RSX_SCA_OPCODE_BRA: { AddCode("$if ($cond)"); @@ -599,33 +668,33 @@ void GLVertexDecompilerThread::Task() break; case RSX_SCA_OPCODE_CAL: // works same as BRI - AddCode("$ifcond $f(); //CAL"); + AddCode("$ifcond $f(); //CAL"); break; - case RSX_SCA_OPCODE_CLI: + case RSX_SCA_OPCODE_CLI: // works same as BRI - AddCode("$ifcond $f(); //CLI"); + AddCode("$ifcond $f(); //CLI"); break; case RSX_SCA_OPCODE_RET: // works like BRI but shorter (RET o[1].x(TR);) - AddCode("$ifcond return;"); + AddCode("$ifcond return;"); break; case RSX_SCA_OPCODE_LG2: SetDSTSca("log2($s)"); break; case RSX_SCA_OPCODE_EX2: SetDSTSca("exp2($s)"); break; case RSX_SCA_OPCODE_SIN: SetDSTSca("sin($s)"); break; case RSX_SCA_OPCODE_COS: SetDSTSca("cos($s)"); break; - case RSX_SCA_OPCODE_BRB: + case RSX_SCA_OPCODE_BRB: // works differently (BRB o[1].x !b0, L0;) LOG_ERROR(RSX, "Unimplemented sca_opcode BRB"); break; - case RSX_SCA_OPCODE_CLB: break; + case RSX_SCA_OPCODE_CLB: break; // works same as BRB LOG_ERROR(RSX, "Unimplemented sca_opcode CLB"); break; - case RSX_SCA_OPCODE_PSH: break; + case RSX_SCA_OPCODE_PSH: break; // works differently (PSH o[1].x A0;) LOG_ERROR(RSX, "Unimplemented sca_opcode PSH"); break; - case RSX_SCA_OPCODE_POP: break; + case RSX_SCA_OPCODE_POP: break; // works differently (POP o[1].x;) LOG_ERROR(RSX, "Unimplemented sca_opcode POP"); break; @@ -634,7 +703,7 @@ void GLVertexDecompilerThread::Task() AddCode(fmt::Format("//Unknown vp sca_opcode 0x%x", fmt::by_value(d1.sca_opcode))); LOG_ERROR(RSX, "Unknown vp sca_opcode 0x%x", fmt::by_value(d1.sca_opcode)); Emu.Pause(); - break; + break; } switch (d1.vec_opcode) @@ -662,12 +731,13 @@ void GLVertexDecompilerThread::Task() case RSX_VEC_OPCODE_SNE: SetDSTVec("vec4(notEqual($0, $1))"); break; case RSX_VEC_OPCODE_STR: SetDSTVec("vec4(equal($0, vec4(1.0)))"); break; case RSX_VEC_OPCODE_SSG: SetDSTVec("sign($0)"); break; + case RSX_VEC_OPCODE_TEX: SetDSTVec("texture($t, $0.xy)"); break; default: AddCode(fmt::Format("//Unknown vp opcode 0x%x", fmt::by_value(d1.vec_opcode))); LOG_ERROR(RSX, "Unknown vp opcode 0x%x", fmt::by_value(d1.vec_opcode)); Emu.Pause(); - break; + break; } if (d3.end) @@ -710,10 +780,10 @@ GLVertexProgram::GLVertexProgram() GLVertexProgram::~GLVertexProgram() { - if(m_decompiler_thread) + if (m_decompiler_thread) { Wait(); - if(m_decompiler_thread->IsAlive()) + if (m_decompiler_thread->IsAlive()) { m_decompiler_thread->Stop(); } @@ -727,7 +797,7 @@ GLVertexProgram::~GLVertexProgram() void GLVertexProgram::Wait() { - if(m_decompiler_thread && m_decompiler_thread->IsAlive()) + if (m_decompiler_thread && m_decompiler_thread->IsAlive()) { m_decompiler_thread->Join(); } @@ -759,7 +829,7 @@ void GLVertexProgram::DecompileAsync(RSXVertexProgram& prog) void GLVertexProgram::Compile() { - if(id) glDeleteShader(id); + if (id) glDeleteShader(id); id = glCreateShader(GL_VERTEX_SHADER); @@ -768,16 +838,16 @@ void GLVertexProgram::Compile() glShaderSource(id, 1, &str, &strlen); glCompileShader(id); - + GLint r = GL_FALSE; glGetShaderiv(id, GL_COMPILE_STATUS, &r); - if(r != GL_TRUE) + if (r != GL_TRUE) { glGetShaderiv(id, GL_INFO_LOG_LENGTH, &r); - if(r) + if (r) { - char* buf = new char[r+1](); + char* buf = new char[r + 1](); GLsizei len; glGetShaderInfoLog(id, r, &len, buf); LOG_ERROR(RSX, "Failed to compile vertex shader: %s", buf); @@ -796,7 +866,7 @@ void GLVertexProgram::Delete() parr.params.clear(); shader.clear(); - if(id) + if (id) { if (Emu.IsStopped()) { diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.h b/rpcs3/Emu/RSX/GL/GLVertexProgram.h index af8a666200..de4c3d07cd 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.h +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.h @@ -54,6 +54,7 @@ enum vec_opcode RSX_VEC_OPCODE_SNE = 0x14, RSX_VEC_OPCODE_STR = 0x15, RSX_VEC_OPCODE_SSG = 0x16, + RSX_VEC_OPCODE_TEX = 0x19, }; struct GLVertexDecompilerThread : public ThreadBase @@ -237,12 +238,14 @@ struct GLVertexDecompilerThread : public ThreadBase std::string GetDST(bool is_sca = false); std::string GetSRC(const u32 n); std::string GetFunc(); + std::string GetTex(); std::string GetCond(); std::string AddAddrMask(); std::string AddAddrReg(); u32 GetAddr(); std::string Format(const std::string& code); + void AddCodeCond(const std::string& dst, const std::string& src); void AddCode(const std::string& code); void SetDST(bool is_sca, std::string value); void SetDSTVec(const std::string& code); diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp index 6904da2420..7a21e076d3 100644 --- a/rpcs3/Emu/RSX/RSXTexture.cpp +++ b/rpcs3/Emu/RSX/RSXTexture.cpp @@ -210,3 +210,206 @@ void RSXTexture::SetControl3(u16 depth, u32 pitch) m_depth = depth; m_pitch = pitch; } + +RSXVertexTexture::RSXVertexTexture() : RSXTexture() +{ +} + +RSXVertexTexture::RSXVertexTexture(u8 index) : RSXTexture(index) +{ +} + +void RSXVertexTexture::Init() +{ + // Offset + methodRegisters[NV4097_SET_VERTEX_TEXTURE_OFFSET + (m_index * 32)] = 0; + + // Format + methodRegisters[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 32)] = 0; + + // Address + methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] = + ((/*wraps*/1) | ((/*anisoBias*/0) << 4) | ((/*wrapt*/1) << 8) | ((/*unsignedRemap*/0) << 12) | + ((/*wrapr*/3) << 16) | ((/*gamma*/0) << 20) | ((/*signedRemap*/0) << 24) | ((/*zfunc*/0) << 28)); + + // Control0 + methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 32)] = + (((/*alphakill*/0) << 2) | (/*maxaniso*/0) << 4) | ((/*maxlod*/0xc00) << 7) | ((/*minlod*/0) << 19) | ((/*enable*/0) << 31); + + // Control1 + //methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL1 + (m_index * 32)] = 0xE4; + + // Filter + methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] = + ((/*bias*/0) | ((/*conv*/1) << 13) | ((/*min*/5) << 16) | ((/*mag*/2) << 24) + | ((/*as*/0) << 28) | ((/*rs*/0) << 29) | ((/*gs*/0) << 30) | ((/*bs*/0) << 31)); + + // Image Rect + methodRegisters[NV4097_SET_VERTEX_TEXTURE_IMAGE_RECT + (m_index * 32)] = (/*height*/1) | ((/*width*/1) << 16); + + // Border Color + methodRegisters[NV4097_SET_VERTEX_TEXTURE_BORDER_COLOR + (m_index * 32)] = 0; +} + +u32 RSXVertexTexture::GetOffset() const +{ + return methodRegisters[NV4097_SET_VERTEX_TEXTURE_OFFSET + (m_index * 32)]; +} + +u8 RSXVertexTexture::GetLocation() const +{ + return (methodRegisters[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 32)] & 0x3) - 1; +} + +bool RSXVertexTexture::isCubemap() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 32)] >> 2) & 0x1); +} + +u8 RSXVertexTexture::GetBorderType() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 32)] >> 3) & 0x1); +} + +u8 RSXVertexTexture::GetDimension() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 32)] >> 4) & 0xf); +} + +u8 RSXVertexTexture::GetFormat() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 32)] >> 8) & 0xff); +} + +u16 RSXVertexTexture::GetMipmap() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FORMAT + (m_index * 32)] >> 16) & 0xffff); +} + +u8 RSXVertexTexture::GetWrapS() const +{ + return 1; + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)]) & 0xf); +} + +u8 RSXVertexTexture::GetWrapT() const +{ + return 1; + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] >> 8) & 0xf); +} + +u8 RSXVertexTexture::GetWrapR() const +{ + return 1; + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] >> 16) & 0xf); +} + +u8 RSXVertexTexture::GetUnsignedRemap() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] >> 12) & 0xf); +} + +u8 RSXVertexTexture::GetZfunc() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] >> 28) & 0xf); +} + +u8 RSXVertexTexture::GetGamma() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] >> 20) & 0xf); +} + +u8 RSXVertexTexture::GetAnisoBias() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] >> 4) & 0xf); +} + +u8 RSXVertexTexture::GetSignedRemap() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 32)] >> 24) & 0xf); +} + +bool RSXVertexTexture::IsEnabled() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 32)] >> 31) & 0x1); +} + +u16 RSXVertexTexture::GetMinLOD() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 32)] >> 19) & 0xfff); +} + +u16 RSXVertexTexture::GetMaxLOD() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 32)] >> 7) & 0xfff); +} + +u8 RSXVertexTexture::GetMaxAniso() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 32)] >> 4) & 0x7); +} + +bool RSXVertexTexture::IsAlphaKillEnabled() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL0 + (m_index * 32)] >> 2) & 0x1); +} + +u32 RSXVertexTexture::GetRemap() const +{ + return 0 | (1 << 2) | (2 << 4) | (3 << 6);//(methodRegisters[NV4097_SET_VERTEX_TEXTURE_CONTROL1 + (m_index * 32)]); +} + +u16 RSXVertexTexture::GetBias() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)]) & 0x1fff); +} + +u8 RSXVertexTexture::GetMinFilter() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] >> 16) & 0x7); +} + +u8 RSXVertexTexture::GetMagFilter() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] >> 24) & 0x7); +} + +u8 RSXVertexTexture::GetConvolutionFilter() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] >> 13) & 0xf); +} + +bool RSXVertexTexture::isASigned() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] >> 28) & 0x1); +} + +bool RSXVertexTexture::isRSigned() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] >> 29) & 0x1); +} + +bool RSXVertexTexture::isGSigned() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] >> 30) & 0x1); +} + +bool RSXVertexTexture::isBSigned() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_FILTER + (m_index * 32)] >> 31) & 0x1); +} + +u16 RSXVertexTexture::GetWidth() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_IMAGE_RECT + (m_index * 32)] >> 16) & 0xffff); +} + +u16 RSXVertexTexture::GetHeight() const +{ + return ((methodRegisters[NV4097_SET_VERTEX_TEXTURE_IMAGE_RECT + (m_index * 32)]) & 0xffff); +} + +u32 RSXVertexTexture::GetBorderColor() const +{ + return methodRegisters[NV4097_SET_VERTEX_TEXTURE_BORDER_COLOR + (m_index * 32)]; +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXTexture.h b/rpcs3/Emu/RSX/RSXTexture.h index d5db3480fb..c595dba082 100644 --- a/rpcs3/Emu/RSX/RSXTexture.h +++ b/rpcs3/Emu/RSX/RSXTexture.h @@ -2,6 +2,7 @@ class RSXTexture { +protected: u8 m_index; public: @@ -11,6 +12,64 @@ public: public: RSXTexture(); RSXTexture(u8 index); + virtual void Init(); + + // Offset + virtual u32 GetOffset() const; + + // Format + virtual u8 GetLocation() const; + virtual bool isCubemap() const; + virtual u8 GetBorderType() const; + virtual u8 GetDimension() const; + virtual u8 GetFormat() const; + virtual u16 GetMipmap() const; + + // Address + virtual u8 GetWrapS() const; + virtual u8 GetWrapT() const; + virtual u8 GetWrapR() const; + virtual u8 GetUnsignedRemap() const; + virtual u8 GetZfunc() const; + virtual u8 GetGamma() const; + virtual u8 GetAnisoBias() const; + virtual u8 GetSignedRemap() const; + + // Control0 + virtual bool IsEnabled() const; + virtual u16 GetMinLOD() const; + virtual u16 GetMaxLOD() const; + virtual u8 GetMaxAniso() const; + virtual bool IsAlphaKillEnabled() const; + + // Control1 + virtual u32 GetRemap() const; + + // Filter + virtual u16 GetBias() const; + virtual u8 GetMinFilter() const; + virtual u8 GetMagFilter() const; + virtual u8 GetConvolutionFilter() const; + virtual bool isASigned() const; + virtual bool isRSigned() const; + virtual bool isGSigned() const; + virtual bool isBSigned() const; + + // Image Rect + virtual u16 GetWidth() const; + virtual u16 GetHeight() const; + + // Border Color + virtual u32 GetBorderColor() const; + + void SetControl3(u16 depth, u32 pitch); +}; + +class RSXVertexTexture : public RSXTexture +{ +public: + RSXVertexTexture(); + RSXVertexTexture(u8 index); void Init(); // Offset @@ -60,6 +119,4 @@ public: // Border Color u32 GetBorderColor() const; - - void SetControl3(u16 depth, u32 pitch); -}; +}; \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 311dd3143c..62606796a8 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -181,6 +181,12 @@ u32 RSXThread::OutOfArgsCount(const uint x, const u32 cmd, const u32 count, cons return 0; } +#define case_4(a, m) \ + case a + m: \ + case a + m * 2: \ + case a + m * 3: \ + index = (cmd - a) / m; \ + case a \ #define case_16(a, m) \ case a + m: \ @@ -252,8 +258,6 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const m_used_gcm_commands.insert(cmd); - //static u32 draw_array_count = 0; - switch(cmd) { // NV406E @@ -392,6 +396,29 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const } break; + // Vertex Texture + case_4(NV4097_SET_VERTEX_TEXTURE_FORMAT, 0x20) : + case_4(NV4097_SET_VERTEX_TEXTURE_OFFSET, 0x20) : + case_4(NV4097_SET_VERTEX_TEXTURE_FILTER, 0x20) : + case_4(NV4097_SET_VERTEX_TEXTURE_ADDRESS, 0x20) : + case_4(NV4097_SET_VERTEX_TEXTURE_IMAGE_RECT, 0x20) : + case_4(NV4097_SET_VERTEX_TEXTURE_BORDER_COLOR, 0x20) : + case_4(NV4097_SET_VERTEX_TEXTURE_CONTROL0, 0x20) : + { + // Done using methodRegisters in RSXTexture.cpp + } + break; + + case_4(NV4097_SET_VERTEX_TEXTURE_CONTROL3, 0x20) : + { + RSXVertexTexture& tex = m_vertex_textures[index]; + const u32 a0 = ARGS(0); + u32 pitch = a0 & 0xFFFFF; + u16 depth = a0 >> 20; + tex.SetControl3(depth, pitch); + } + break; + // Vertex data case_16(NV4097_SET_VERTEX_DATA4UB_M, 4): { @@ -971,10 +998,27 @@ void RSXThread::DoCmd(const u32 fcmd, const u32 cmd, const u32 args_addr, const const u32 a0 = ARGS(0); //LOG_WARNING(RSX, "NV4097_SET_BEGIN_END: 0x%x", a0); + if (!m_indexed_array.m_count && !m_draw_array_count) + { + u32 min_vertex_size = ~0; + for (auto &i : m_vertex_data) + { + if (!i.size) + continue; - m_read_buffer = false; + u32 vertex_size = i.data.size() / (i.size * i.GetTypeSize()); - if(a0) + if (min_vertex_size > vertex_size) + min_vertex_size = vertex_size; + } + + m_draw_array_count = min_vertex_size; + m_draw_array_first = 0; + } + + m_read_buffer = Ini.GSReadColorBuffer.GetValue() || (!m_indexed_array.m_count && !m_draw_array_count); + + if (a0) { Begin(a0); } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index d38b45fec1..e93af15a85 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -106,6 +106,7 @@ public: GcmTileInfo m_tiles[m_tiles_count]; GcmZcullInfo m_zculls[m_zculls_count]; RSXTexture m_textures[m_textures_count]; + RSXVertexTexture m_vertex_textures[m_textures_count]; RSXVertexData m_vertex_data[m_vertex_count]; RSXIndexArrayData m_indexed_array; std::vector m_fragment_constants; diff --git a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp index 2f6b5a5bd0..8e08a1faf7 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellGcmSys.cpp @@ -369,7 +369,7 @@ s32 _cellGcmInitBody(vm::ptr context, u32 cmdSize, u32 ioSiz u32 ctx_begin = ioAddress/* + 0x1000*/; u32 ctx_size = 0x6ffc; current_context.begin = ctx_begin; - current_context.end = ctx_begin + ctx_size; + current_context.end = ctx_begin + ctx_size - 4; current_context.current = current_context.begin; current_context.callback.set(be_t::make(Emu.GetRSXCallback() - 4)); @@ -1172,7 +1172,7 @@ s32 cellGcmCallback(vm::ptr context, u32 count) GSLockCurrent gslock(GS_LOCK_WAIT_FLUSH); - if (1) + if (0) { auto& ctrl = vm::get_ref(gcm_info.control_addr); be_t res = be_t::make(context->current - context->begin - ctrl.put.read_relaxed()); diff --git a/rpcs3/Emu/SysCalls/Modules/sceNp.cpp b/rpcs3/Emu/SysCalls/Modules/sceNp.cpp index ab3645a1cb..a05edc415c 100644 --- a/rpcs3/Emu/SysCalls/Modules/sceNp.cpp +++ b/rpcs3/Emu/SysCalls/Modules/sceNp.cpp @@ -116,17 +116,16 @@ int npDrmIsAvailable(u32 k_licensee_addr, vm::ptr drm_path) std::string rap_path("/dev_hdd0/home/" + pf_str + "/exdata/"); // Search dev_usb000 for a compatible RAP file. - vfsDir *raps_dir = new vfsDir(rap_path); - if (!raps_dir->IsOpened()) + vfsDir raps_dir(rap_path); + if (!raps_dir.IsOpened()) sceNp->Warning("npDrmIsAvailable: Can't find RAP file for DRM!"); else { - const std::vector &entries = raps_dir->GetEntries(); - for (auto &entry : entries) + for (const DirEntryInfo *entry : raps_dir) { - if (entry.name.find(titleID) != std::string::npos) + if (entry->name.find(titleID) != std::string::npos) { - rap_path += entry.name; + rap_path += entry->name; break; } } diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index 6c096cda47..b64f1aad56 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -414,6 +414,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) wxCheckBox* chbox_gs_log_prog = new wxCheckBox(p_graphics, wxID_ANY, "Log vertex/fragment programs"); wxCheckBox* chbox_gs_dump_depth = new wxCheckBox(p_graphics, wxID_ANY, "Write Depth Buffer"); wxCheckBox* chbox_gs_dump_color = new wxCheckBox(p_graphics, wxID_ANY, "Write Color Buffers"); + wxCheckBox* chbox_gs_read_color = new wxCheckBox(p_graphics, wxID_ANY, "Read Color Buffer"); wxCheckBox* chbox_gs_vsync = new wxCheckBox(p_graphics, wxID_ANY, "VSync"); wxCheckBox* chbox_gs_3dmonitor = new wxCheckBox(p_graphics, wxID_ANY, "3D Monitor"); wxCheckBox* chbox_audio_dump = new wxCheckBox(p_audio, wxID_ANY, "Dump to file"); @@ -501,6 +502,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) chbox_gs_log_prog ->SetValue(Ini.GSLogPrograms.GetValue()); chbox_gs_dump_depth ->SetValue(Ini.GSDumpDepthBuffer.GetValue()); chbox_gs_dump_color ->SetValue(Ini.GSDumpColorBuffers.GetValue()); + chbox_gs_read_color ->SetValue(Ini.GSReadColorBuffer.GetValue()); chbox_gs_vsync ->SetValue(Ini.GSVSyncEnable.GetValue()); chbox_gs_3dmonitor ->SetValue(Ini.GS3DTV.GetValue()); chbox_audio_dump ->SetValue(Ini.AudioDumpToFile.GetValue()); @@ -568,6 +570,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) s_subpanel_graphics->Add(chbox_gs_log_prog, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(chbox_gs_dump_depth, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(chbox_gs_dump_color, wxSizerFlags().Border(wxALL, 5).Expand()); + s_subpanel_graphics->Add(chbox_gs_read_color, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(chbox_gs_vsync, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics->Add(chbox_gs_3dmonitor, wxSizerFlags().Border(wxALL, 5).Expand()); @@ -628,6 +631,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) Ini.GSLogPrograms.SetValue(chbox_gs_log_prog->GetValue()); Ini.GSDumpDepthBuffer.SetValue(chbox_gs_dump_depth->GetValue()); Ini.GSDumpColorBuffers.SetValue(chbox_gs_dump_color->GetValue()); + Ini.GSReadColorBuffer.SetValue(chbox_gs_read_color->GetValue()); Ini.GSVSyncEnable.SetValue(chbox_gs_vsync->GetValue()); Ini.GS3DTV.SetValue(chbox_gs_3dmonitor->GetValue()); Ini.PadHandlerMode.SetValue(cbox_pad_handler->GetSelection()); diff --git a/rpcs3/Ini.h b/rpcs3/Ini.h index cbfc6d9fe4..732d2b43d6 100644 --- a/rpcs3/Ini.h +++ b/rpcs3/Ini.h @@ -107,6 +107,7 @@ public: IniEntry GSLogPrograms; IniEntry GSDumpColorBuffers; IniEntry GSDumpDepthBuffer; + IniEntry GSReadColorBuffer; IniEntry GSVSyncEnable; IniEntry GS3DTV; @@ -182,6 +183,7 @@ public: GSLogPrograms.Init("GS_LogPrograms", path); GSDumpColorBuffers.Init("GS_DumpColorBuffers", path); GSDumpDepthBuffer.Init("GS_DumpDepthBuffer", path); + GSReadColorBuffer.Init("GS_GSReadColorBuffer", path); GSVSyncEnable.Init("GS_VSyncEnable", path); GS3DTV.Init("GS_3DTV", path); @@ -253,6 +255,7 @@ public: GSLogPrograms.Load(false); GSDumpColorBuffers.Load(false); GSDumpDepthBuffer.Load(false); + GSReadColorBuffer.Load(false); GSVSyncEnable.Load(false); GS3DTV.Load(false); @@ -325,6 +328,7 @@ public: GSLogPrograms.Save(); GSDumpColorBuffers.Save(); GSDumpDepthBuffer.Save(); + GSReadColorBuffer.Save(); GSVSyncEnable.Save(); GS3DTV.Save(); diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index 2ec10d0ce1..93ad015b74 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -42,6 +42,163 @@ Rpcs3App* TheApp; std::string simplify_path(const std::string& path, bool is_dir); +typedef be_t CGprofile; +typedef int CGbool; +typedef be_t CGresource; +typedef be_t CGenum; +typedef be_t CGtype; + +typedef be_t CgBinaryOffset; +typedef CgBinaryOffset CgBinaryEmbeddedConstantOffset; +typedef CgBinaryOffset CgBinaryFloatOffset; +typedef CgBinaryOffset CgBinaryStringOffset; +typedef CgBinaryOffset CgBinaryParameterOffset; + +// a few typedefs +typedef struct CgBinaryParameter CgBinaryParameter; +typedef struct CgBinaryEmbeddedConstant CgBinaryEmbeddedConstant; +typedef struct CgBinaryVertexProgram CgBinaryVertexProgram; +typedef struct CgBinaryFragmentProgram CgBinaryFragmentProgram; +typedef struct CgBinaryProgram CgBinaryProgram; + +// fragment programs have their constants embedded in the microcode +struct CgBinaryEmbeddedConstant +{ + be_t ucodeCount; // occurances + be_t ucodeOffset[1]; // offsets that need to be patched follow +}; + +// describe a binary program parameter (CgParameter is opaque) +struct CgBinaryParameter +{ + CGtype type; // cgGetParameterType() + CGresource res; // cgGetParameterResource() + CGenum var; // cgGetParameterVariability() + be_t resIndex; // cgGetParameterResourceIndex() + CgBinaryStringOffset name; // cgGetParameterName() + CgBinaryFloatOffset defaultValue; // default constant value + CgBinaryEmbeddedConstantOffset embeddedConst; // embedded constant information + CgBinaryStringOffset semantic; // cgGetParameterSemantic() + CGenum direction; // cgGetParameterDirection() + be_t paramno; // 0..n: cgGetParameterIndex() -1: globals + CGbool isReferenced; // cgIsParameterReferenced() + CGbool isShared; // cgIsParameterShared() +}; + +// attributes needed for vshaders +struct CgBinaryVertexProgram +{ + be_t instructionCount; // #instructions + be_t instructionSlot; // load address (indexed reads!) + be_t registerCount; // R registers count + be_t attributeInputMask; // attributes vs reads from + be_t attributeOutputMask; // attributes vs writes (uses SET_VERTEX_ATTRIB_OUTPUT_MASK bits) + be_t userClipMask; // user clip plane enables (for SET_USER_CLIP_PLANE_CONTROL) +}; + +typedef enum { + CgBinaryPTTNone = 0, + CgBinaryPTT2x16 = 1, + CgBinaryPTT1x32 = 2, +} CgBinaryPartialTexType; + +// attributes needed for pshaders +struct CgBinaryFragmentProgram +{ + be_t instructionCount; // #instructions + be_t attributeInputMask; // attributes fp reads (uses SET_VERTEX_ATTRIB_OUTPUT_MASK bits) + be_t partialTexType; // texid 0..15 use two bits each marking whether the texture format requires partial load: see CgBinaryPartialTexType + be_t texCoordsInputMask; // tex coords used by frag prog. (tex is bit n) + be_t texCoords2D; // tex coords that are 2d (tex is bit n) + be_t texCoordsCentroid; // tex coords that are centroid (tex is bit n) + unsigned char registerCount; // R registers count + unsigned char outputFromH0; // final color from R0 or H0 + unsigned char depthReplace; // fp generated z epth value + unsigned char pixelKill; // fp uses kill operations +}; + +#include "Emu/RSX/GL/GLFragmentProgram.h" +#include "Emu/RSX/GL/GLVertexProgram.h" +// defines a binary program -- *all* address/offsets are relative to the begining of CgBinaryProgram +struct CgBinaryProgram +{ + // vertex/pixel shader identification (BE/LE as well) + CGprofile profile; + + // binary revision (used to verify binary and driver structs match) + be_t binaryFormatRevision; + + // total size of this struct including profile and totalSize field! + be_t totalSize; + + // parameter usually queried using cgGet[First/Next]LeafParameter + be_t parameterCount; + CgBinaryParameterOffset parameterArray; + + // depending on profile points to a CgBinaryVertexProgram or CgBinaryFragmentProgram struct + CgBinaryOffset program; + + // raw ucode data + be_t ucodeSize; + CgBinaryOffset ucode; + + // variable length data follows + unsigned char data[1]; +}; + +void compile_shader(std::string path) +{ + ShaderVar var("r0.yz.x"); + var.symplify(); + LOG_ERROR(GENERAL, var.get().c_str()); + + u32 ptr; + { + wxFile f(path); + + if (!f.IsOpened()) + return; + + size_t size = f.Length(); + vm::ps3::init(); + ptr = vm::alloc(size); + f.Read(vm::get_ptr(ptr), size); + f.Close(); + } + + CgBinaryProgram& prog = vm::get_ref(ptr); + LOG_ERROR(GENERAL, "%d - %x", (u32)prog.profile, (u32)prog.binaryFormatRevision); + + std::string shader; + GLParamArray param_array; + u32 size; + + if (prog.profile == 7004) + { + CgBinaryFragmentProgram& fprog = vm::get_ref(ptr + prog.program); + + u32 ctrl = (fprog.outputFromH0 ? 0 : 0x40) | (fprog.depthReplace ? 0xe : 0); + + GLFragmentDecompilerThread(shader, param_array, ptr + prog.ucode, size, ctrl).Task(); + } + else + { + CgBinaryVertexProgram& vprog = vm::get_ref(ptr + prog.program); + + std::vector data; + be_t* vdata = vm::get_ptr>(ptr + prog.ucode); + for (u32 i = 0; i < prog.ucodeSize; ++i, ++vdata) + { + data.push_back(vdata[i]); + } + + GLVertexDecompilerThread(data, shader, param_array).Task(); + } + + LOG_ERROR(GENERAL, shader.c_str()); + vm::close(); +} + bool Rpcs3App::OnInit() { SetSendDbgCommandCallback([](DbgCommand id, CPUThread* t) @@ -146,6 +303,9 @@ bool Rpcs3App::OnInit() OnArguments(); + compile_shader("compile_shader0.spo"); + compile_shader("compile_shader1.spo"); + return true; } @@ -187,9 +347,9 @@ Rpcs3App::Rpcs3App() timeBeginPeriod(1); #endif - #if defined(__unix__) && !defined(__APPLE__) +#if defined(__unix__) && !defined(__APPLE__) XInitThreads(); - #endif +#endif } GameInfo CurGameInfo;