diff --git a/.travis.yml b/.travis.yml index d5b5186333..d05e2b979d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,10 +20,6 @@ env: # Test mode is for testing if it's working with Coverity. Change to true if testing, to avoid reaching the quota. - coverity_scan_script_test_mode=false -branches: - except: - - ppu_recompiler - matrix: exclude: - os: osx diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 94e8cb1684..e21023c2c0 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -102,6 +102,23 @@ enum x64_reg_t : u32 X64_IMM16, X64_IMM32, + X64_BIT_O = 0x90, + X64_BIT_NO, + X64_BIT_C, + X64_BIT_NC, + X64_BIT_Z, + X64_BIT_NZ, + X64_BIT_BE, + X64_BIT_NBE, + X64_BIT_S, + X64_BIT_NS, + X64_BIT_P, + X64_BIT_NP, + X64_BIT_L, + X64_BIT_NL, + X64_BIT_LE, + X64_BIT_NLE, + X64R_ECX = X64R_CL, }; @@ -109,16 +126,22 @@ enum x64_op_t : u32 { X64OP_NONE, X64OP_LOAD, // obtain and put the value into x64 register + X64OP_LOAD_BE, X64OP_STORE, // take the value from x64 register or an immediate and use it + X64OP_STORE_BE, X64OP_MOVS, X64OP_STOS, X64OP_XCHG, X64OP_CMPXCHG, - X64OP_LOAD_AND_STORE, // lock and [mem], reg - X64OP_LOAD_OR_STORE, // lock or [mem], reg (TODO) - X64OP_LOAD_XOR_STORE, // lock xor [mem], reg (TODO) - X64OP_INC, // lock inc [mem] (TODO) - X64OP_DEC, // lock dec [mem] (TODO) + X64OP_AND, // lock and [mem], ... + X64OP_OR, // lock or [mem], ... + X64OP_XOR, // lock xor [mem], ... + X64OP_INC, // lock inc [mem] + X64OP_DEC, // lock dec [mem] + X64OP_ADD, // lock add [mem], ... + X64OP_ADC, // lock adc [mem], ... + X64OP_SUB, // lock sub [mem], ... + X64OP_SBB, // lock sbb [mem], ... }; void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, size_t& out_size, size_t& out_length) @@ -321,6 +344,56 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz } break; } + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x9a: + case 0x9b: + case 0x9c: + case 0x9d: + case 0x9e: + case 0x9f: + { + if (!lock) // SETcc + { + out_op = X64OP_STORE; + out_reg = x64_reg_t(X64_BIT_O + op2 - 0x90); // 0x90 .. 0x9f + out_size = 1; + out_length += get_modRM_size(code); + return; + } + break; + } + case 0x38: + { + out_length++, code++; + + switch (op3) + { + case 0xf0: + case 0xf1: + { + if (!repne) // MOVBE + { + out_op = op3 == 0xf0 ? X64OP_LOAD_BE : X64OP_STORE_BE; + out_reg = get_modRM_reg(code, rex); + out_size = get_op_size(rex, oso); + out_length += get_modRM_size(code); + return; + } + + break; + } + } + + break; + } } break; @@ -329,7 +402,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz { if (!oso) { - out_op = X64OP_LOAD_AND_STORE; + out_op = X64OP_AND; out_reg = rex & 8 ? get_modRM_reg(code, rex) : get_modRM_reg_lh(code); out_size = 1; out_length += get_modRM_size(code); @@ -341,7 +414,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz { if (true) { - out_op = X64OP_LOAD_AND_STORE; + out_op = X64OP_AND; out_reg = get_modRM_reg(code, rex); out_size = get_op_size(rex, oso); out_length += get_modRM_size(code); @@ -349,6 +422,63 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz } break; } + case 0x80: + { + switch (auto mod_code = get_modRM_reg(code, 0)) + { + //case 0: out_op = X64OP_ADD; break; // TODO: strange info in instruction manual + case 1: out_op = X64OP_OR; break; + case 2: out_op = X64OP_ADC; break; + case 3: out_op = X64OP_SBB; break; + case 4: out_op = X64OP_AND; break; + case 5: out_op = X64OP_SUB; break; + case 6: out_op = X64OP_XOR; break; + default: out_op = X64OP_NONE; break; // CMP + } + + out_reg = X64_IMM8; + out_size = 1; + out_length += get_modRM_size(code) + 1; + return; + } + case 0x81: + { + switch (auto mod_code = get_modRM_reg(code, 0)) + { + case 0: out_op = X64OP_ADD; break; + case 1: out_op = X64OP_OR; break; + case 2: out_op = X64OP_ADC; break; + case 3: out_op = X64OP_SBB; break; + case 4: out_op = X64OP_AND; break; + case 5: out_op = X64OP_SUB; break; + case 6: out_op = X64OP_XOR; break; + default: out_op = X64OP_NONE; break; // CMP + } + + out_reg = oso ? X64_IMM16 : X64_IMM32; + out_size = get_op_size(rex, oso); + out_length += get_modRM_size(code) + (oso ? 2 : 4); + return; + } + case 0x83: + { + switch (auto mod_code = get_modRM_reg(code, 0)) + { + case 0: out_op = X64OP_ADD; break; + case 1: out_op = X64OP_OR; break; + case 2: out_op = X64OP_ADC; break; + case 3: out_op = X64OP_SBB; break; + case 4: out_op = X64OP_AND; break; + case 5: out_op = X64OP_SUB; break; + case 6: out_op = X64OP_XOR; break; + default: out_op = X64OP_NONE; break; // CMP + } + + out_reg = X64_IMM8; + out_size = get_op_size(rex, oso); + out_length += get_modRM_size(code) + 1; + return; + } case 0x86: { if (!oso) // XCHG r8/m8, r8 @@ -459,7 +589,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz } case 0xc6: { - if (!lock && !oso && get_modRM_reg(code, 0) == X64R_RAX) // MOV r8/m8, imm8 + if (!lock && !oso && get_modRM_reg(code, 0) == 0) // MOV r8/m8, imm8 { out_op = X64OP_STORE; out_reg = X64_IMM8; @@ -471,7 +601,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz } case 0xc7: { - if (!lock && get_modRM_reg(code, 0) == X64R_RAX) // MOV r/m, imm16/imm32 (16, 32, 64) + if (!lock && get_modRM_reg(code, 0) == 0) // MOV r/m, imm16/imm32 (16, 32, 64) { out_op = X64OP_STORE; out_reg = oso ? X64_IMM16 : X64_IMM32; @@ -599,6 +729,9 @@ bool get_x64_reg_value(x64_context* context, x64_reg_t reg, size_t d_size, size_ switch (d_size) { case 1: out_value = (u8)imm_value; return true; + case 2: out_value = (u16)imm_value; return true; // sign-extended + case 4: out_value = (u32)imm_value; return true; // sign-extended + case 8: out_value = (u64)imm_value; return true; // sign-extended } } else if (reg == X64_IMM16) @@ -625,6 +758,29 @@ bool get_x64_reg_value(x64_context* context, x64_reg_t reg, size_t d_size, size_ out_value = (u32)RCX(context); return true; } + else if (reg >= X64_BIT_O && reg <= X64_BIT_NLE) + { + const u32 _cf = EFLAGS(context) & 0x1; + const u32 _zf = EFLAGS(context) & 0x40; + const u32 _sf = EFLAGS(context) & 0x80; + const u32 _of = EFLAGS(context) & 0x800; + const u32 _pf = EFLAGS(context) & 0x4; + const u32 _l = (_sf << 4) ^ _of; // SF != OF + + switch (reg & ~1) + { + case X64_BIT_O: out_value = !!_of ^ (reg & 1); break; + case X64_BIT_C: out_value = !!_cf ^ (reg & 1); break; + case X64_BIT_Z: out_value = !!_zf ^ (reg & 1); break; + case X64_BIT_BE: out_value = !!(_cf | _zf) ^ (reg & 1); break; + case X64_BIT_S: out_value = !!_sf ^ (reg & 1); break; + case X64_BIT_P: out_value = !!_pf ^ (reg & 1); break; + case X64_BIT_L: out_value = !!_l ^ (reg & 1); break; + case X64_BIT_LE: out_value = !!(_l | _zf) ^ (reg & 1); break; + } + + return true; + } LOG_ERROR(MEMORY, "get_x64_reg_value(): invalid arguments (reg=%d, d_size=%lld, i_size=%lld)", reg, d_size, i_size); return false; @@ -649,7 +805,7 @@ bool put_x64_reg_value(x64_context* context, x64_reg_t reg, size_t d_size, u64 v return false; } -bool set_x64_cmp_flags(x64_context* context, size_t d_size, u64 x, u64 y) +bool set_x64_cmp_flags(x64_context* context, size_t d_size, u64 x, u64 y, bool carry = true) { switch (d_size) { @@ -664,11 +820,11 @@ bool set_x64_cmp_flags(x64_context* context, size_t d_size, u64 x, u64 y) const u64 diff = x - y; const u64 summ = x + y; - if (((x & y) | ((x ^ y) & ~summ)) & sign) + if (carry && ((x & y) | ((x ^ y) & ~summ)) & sign) { EFLAGS(context) |= 0x1; // set CF } - else + else if (carry) { EFLAGS(context) &= ~0x1; // clear CF } @@ -833,9 +989,10 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) switch (op) { case X64OP_LOAD: + case X64OP_LOAD_BE: { u32 value; - if (is_writing || !thread->read_reg(addr, value) || !put_x64_reg_value(context, reg, d_size, se_storage::swap(value))) + if (is_writing || !thread->read_reg(addr, value) || !put_x64_reg_value(context, reg, d_size, op == X64OP_LOAD ? se_storage::swap(value) : value)) { return false; } @@ -843,9 +1000,10 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) break; } case X64OP_STORE: + case X64OP_STORE_BE: { u64 reg_value; - if (!is_writing || !get_x64_reg_value(context, reg, d_size, i_size, reg_value) || !thread->write_reg(addr, se_storage::swap((u32)reg_value))) + if (!is_writing || !get_x64_reg_value(context, reg, d_size, i_size, reg_value) || !thread->write_reg(addr, op == X64OP_STORE ? se_storage::swap((u32)reg_value) : (u32)reg_value)) { return false; } @@ -881,8 +1039,9 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) switch (op) { case X64OP_STORE: + case X64OP_STORE_BE: { - if (d_size == 16) + if (d_size == 16 && op == X64OP_STORE) { if (reg - X64R_XMM0 >= 16) { @@ -900,7 +1059,44 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) return false; } - std::memcpy(vm::base_priv(addr), ®_value, d_size); + if (op == X64OP_STORE_BE && d_size == 2) + { + reg_value = se_storage::swap((u16)reg_value); + } + else if (op == X64OP_STORE_BE && d_size == 4) + { + reg_value = se_storage::swap((u32)reg_value); + } + else if (op == X64OP_STORE_BE && d_size == 8) + { + reg_value = se_storage::swap(reg_value); + } + else if (op == X64OP_STORE_BE) + { + return false; + } + + if (d_size == 1) + { + *(volatile u8*)vm::base_priv(addr) = (u8)reg_value; + } + else if (d_size == 2 && addr % 2 == 0) + { + *(volatile u16*)vm::base_priv(addr) = (u16)reg_value; + } + else if (d_size == 4 && addr % 4 == 0) + { + *(volatile u32*)vm::base_priv(addr) = (u32)reg_value; + } + else if (d_size == 8 && addr % 8 == 0) + { + *(volatile u64*)vm::base_priv(addr) = (u64)reg_value; + } + else + { + std::memcpy(vm::base_priv(addr), ®_value, d_size); + } + break; } case X64OP_MOVS: @@ -1060,7 +1256,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) } break; } - case X64OP_LOAD_AND_STORE: + case X64OP_AND: { u64 value; if (!get_x64_reg_value(context, reg, d_size, i_size, value)) @@ -1083,6 +1279,182 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) } break; } + case X64OP_OR: + { + u64 value; + if (!get_x64_reg_value(context, reg, d_size, i_size, value)) + { + return false; + } + + switch (d_size) + { + case 1: value = *(atomic_t*)vm::base_priv(addr) |= (u8)value; break; + case 2: value = *(atomic_t*)vm::base_priv(addr) |= (u16)value; break; + case 4: value = *(atomic_t*)vm::base_priv(addr) |= (u32)value; break; + case 8: value = *(atomic_t*)vm::base_priv(addr) |= (u64)value; break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, value, 0)) + { + return false; + } + break; + } + case X64OP_XOR: + { + u64 value; + if (!get_x64_reg_value(context, reg, d_size, i_size, value)) + { + return false; + } + + switch (d_size) + { + case 1: value = *(atomic_t*)vm::base_priv(addr) ^= (u8)value; break; + case 2: value = *(atomic_t*)vm::base_priv(addr) ^= (u16)value; break; + case 4: value = *(atomic_t*)vm::base_priv(addr) ^= (u32)value; break; + case 8: value = *(atomic_t*)vm::base_priv(addr) ^= (u64)value; break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, value, 0)) + { + return false; + } + break; + } + case X64OP_INC: + { + u64 value; + + switch (d_size) + { + case 1: value = ++*(atomic_t*)vm::base_priv(addr); break; + case 2: value = ++*(atomic_t*)vm::base_priv(addr); break; + case 4: value = ++*(atomic_t*)vm::base_priv(addr); break; + case 8: value = ++*(atomic_t*)vm::base_priv(addr); break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, value, 1, false)) // ??? + { + return false; + } + break; + } + case X64OP_DEC: + { + u64 value; + + switch (d_size) + { + case 1: value = --*(atomic_t*)vm::base_priv(addr); break; + case 2: value = --*(atomic_t*)vm::base_priv(addr); break; + case 4: value = --*(atomic_t*)vm::base_priv(addr); break; + case 8: value = --*(atomic_t*)vm::base_priv(addr); break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, value, -1, false)) // ??? + { + return false; + } + break; + } + case X64OP_ADD: + { + u64 value, new_value; + if (!get_x64_reg_value(context, reg, d_size, i_size, value)) + { + return false; + } + + switch (d_size) + { + case 1: new_value = *(atomic_t*)vm::base_priv(addr) += (u8)value; break; + case 2: new_value = *(atomic_t*)vm::base_priv(addr) += (u16)value; break; + case 4: new_value = *(atomic_t*)vm::base_priv(addr) += (u32)value; break; + case 8: new_value = *(atomic_t*)vm::base_priv(addr) += (u64)value; break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, new_value, value)) // ??? + { + return false; + } + break; + } + case X64OP_ADC: + { + u64 value, new_value; + if (!get_x64_reg_value(context, reg, d_size, i_size, value)) + { + return false; + } + + switch (d_size) + { + case 1: new_value = *(atomic_t*)vm::base_priv(addr) += (u8)(value + (EFLAGS(context) & 1)); break; + case 2: new_value = *(atomic_t*)vm::base_priv(addr) += (u16)(value + (EFLAGS(context) & 1)); break; + case 4: new_value = *(atomic_t*)vm::base_priv(addr) += (u32)(value + (EFLAGS(context) & 1)); break; + case 8: new_value = *(atomic_t*)vm::base_priv(addr) += (u64)(value + (EFLAGS(context) & 1)); break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, new_value, value + (EFLAGS(context) & 1))) // ??? + { + return false; + } + break; + } + case X64OP_SUB: + { + u64 value, new_value; + if (!get_x64_reg_value(context, reg, d_size, i_size, value)) + { + return false; + } + + switch (d_size) + { + case 1: new_value = *(atomic_t*)vm::base_priv(addr) -= (u8)value; break; + case 2: new_value = *(atomic_t*)vm::base_priv(addr) -= (u16)value; break; + case 4: new_value = *(atomic_t*)vm::base_priv(addr) -= (u32)value; break; + case 8: new_value = *(atomic_t*)vm::base_priv(addr) -= (u64)value; break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, new_value, 0 - value)) // ??? + { + return false; + } + break; + } + case X64OP_SBB: + { + u64 value, new_value; + if (!get_x64_reg_value(context, reg, d_size, i_size, value)) + { + return false; + } + + switch (d_size) + { + case 1: new_value = *(atomic_t*)vm::base_priv(addr) -= (u8)(value + (EFLAGS(context) & 1)); break; + case 2: new_value = *(atomic_t*)vm::base_priv(addr) -= (u16)(value + (EFLAGS(context) & 1)); break; + case 4: new_value = *(atomic_t*)vm::base_priv(addr) -= (u32)(value + (EFLAGS(context) & 1)); break; + case 8: new_value = *(atomic_t*)vm::base_priv(addr) -= (u64)(value + (EFLAGS(context) & 1)); break; + default: return false; + } + + if (!set_x64_cmp_flags(context, d_size, new_value, 0 - (value + (EFLAGS(context) & 1)))) // ??? + { + return false; + } + break; + } default: { LOG_ERROR(MEMORY, "Invalid or unsupported operation (op=%d, reg=%d, d_size=%lld, a_size=0x%llx, i_size=%lld)", op, reg, d_size, a_size, i_size); diff --git a/Vulkan/CMakeLists.txt b/Vulkan/CMakeLists.txt index 099905a96a..9af4a0da0a 100644 --- a/Vulkan/CMakeLists.txt +++ b/Vulkan/CMakeLists.txt @@ -1,10 +1,10 @@ -if(APPLE OR WIN32 AND NOT MSVC) +if(APPLE) else() -add_subdirectory( glslang ) set(BUILD_TESTS OFF CACHE BOOL "Build tests" FORCE) set(BUILD_DEMOS OFF CACHE BOOL "Build demos" FORCE) # TravisCI break build with layers and vkjson set(BUILD_LAYERS OFF CACHE BOOL "Build demos" FORCE) set(BUILD_VKJSON OFF CACHE BOOL "Build demos" FORCE) add_subdirectory( Vulkan-LoaderAndValidationLayers ) +add_subdirectory( glslang ) endif() diff --git a/appveyor.yml b/appveyor.yml index 8ce9345d6d..9661ab661a 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -8,10 +8,6 @@ clone_folder: c:\rpcs3 clone_depth: 3 test: off -branches: - except: - - ppu_recompiler - before_build: # until git for win 2.5 release with commit checkout - git submodule update --init 3rdparty/ffmpeg 3rdparty/pugixml asmjit 3rdparty/GSL 3rdparty/libpng Vulkan/glslang Vulkan/Vulkan-LoaderAndValidationLayers Utilities/yaml-cpp @@ -25,7 +21,8 @@ build_script: install: - ps: Start-FileDownload 'https://402331b94f8e4b87ae2ef4677347f7956cf3861f.googledrive.com/host/0B6v_qtb9hkicfmt0NG0wTTRtUmF4X3VTQk5Oc2JidEVKVnUteDA1dXdrYlNsVW9kREpsSHc/wxWidgets.7z' - - ps: Start-FileDownload 'https://402331b94f8e4b87ae2ef4677347f7956cf3861f.googledrive.com/host/0B6v_qtb9hkicfmt0NG0wTTRtUmF4X3VTQk5Oc2JidEVKVnUteDA1dXdrYlNsVW9kREpsSHc/llvmlibs.7z' +# - ps: Start-FileDownload 'https://402331b94f8e4b87ae2ef4677347f7956cf3861f.googledrive.com/host/0B6v_qtb9hkicfmt0NG0wTTRtUmF4X3VTQk5Oc2JidEVKVnUteDA1dXdrYlNsVW9kREpsSHc/llvmlibs.7z' + - ps: Start-FileDownload 'https://drive.google.com/uc?export=download&id=0B-98fOyaZKJ5YWVnb29JZXFQWkU' -FileName llvmlibs.7z - ps: Start-FileDownload 'https://402331b94f8e4b87ae2ef4677347f7956cf3861f.googledrive.com/host/0B6v_qtb9hkicfmt0NG0wTTRtUmF4X3VTQk5Oc2JidEVKVnUteDA1dXdrYlNsVW9kREpsSHc/zlib.7z' - set WXWIN=C:\rpcs3\wxWidgets - set OPENALDIR=C:\rpcs3\3rdparty\OpenAL diff --git a/llvm b/llvm index 19ade095e8..051e787f26 160000 --- a/llvm +++ b/llvm @@ -1 +1 @@ -Subproject commit 19ade095e8c3ea61f84b71074433309f0c7c7b3b +Subproject commit 051e787f26dbfdc26cf61a57bc82ca00dcb812e8 diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 4b4cd91f49..3a2d74af8d 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -25,7 +25,6 @@ elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") endif() if(WIN32) add_compile_options(-pthread) - add_compile_options(-D__GXX_ABI_VERSION=1009) endif() endif() @@ -94,7 +93,7 @@ if(NOT MSVC) endif() find_package(OpenGL REQUIRED) find_package(OpenAL REQUIRED) -#find_package(LLVM CONFIG) # TODO +find_package(LLVM 3.8 CONFIG) include("${wxWidgets_USE_FILE}") @@ -136,16 +135,14 @@ if(WIN32) endif() if(NOT LLVM_FOUND) - Message("LLVM not found! LLVM 3.6 is required. RPCS3 will be compiled without LLVM support.") -elseif(${LLVM_PACKAGE_VERSION} VERSION_LESS "3.6" OR ${LLVM_PACKAGE_VERSION} VERSION_EQUAL "3.7" OR ${LLVM_PACKAGE_VERSION} VERSION_GREATER "3.7") - Message("LLVM ${LLVM_PACKAGE_VERSION} is not supported! LLVM 3.6 is required. RPCS3 will be compiled without LLVM support.") + Message("LLVM not found! LLVM 3.8 is required. RPCS3 will be compiled without LLVM support.") else() add_definitions(${LLVM_DEFINITIONS}) add_definitions(-DLLVM_AVAILABLE) if (CMAKE_BUILD_TYPE STREQUAL "Release") - llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize x86codegen x86disassembler) + llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize ipo x86codegen x86disassembler) else() - llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize x86codegen x86disassembler mcdisassembler) + llvm_map_components_to_libnames(LLVM_LIBS mcjit vectorize ipo x86codegen x86disassembler mcdisassembler) endif() endif() @@ -172,7 +169,7 @@ RPCS3_SRC "${RPCS3_SRC_DIR}/../asmjit/src/asmjit/*.cpp" ) -if(APPLE OR WIN32 AND NOT MSVC) +if(APPLE) set (EXCLUDE_DIR "/RSX/VK/") foreach (TMP_PATH ${RPCS3_SRC}) string (FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND) @@ -186,14 +183,15 @@ add_executable(rpcs3 ${RPCS3_SRC}) if(MSVC) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SUBSYSTEM:WINDOWS /NODEFAULTLIB:libc.lib /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcd.lib /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:msvcrtd.lib") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:libc.lib /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcd.lib /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:msvcrtd.lib") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SUBSYSTEM:WINDOWS /DYNAMICBASE:NO /BASE:0x10000 /FIXED") endif() if(WIN32) - target_link_libraries(rpcs3 ws2_32.lib Winmm.lib) + target_link_libraries(rpcs3 ws2_32.lib Winmm.lib VKstatic.1 glslang OSDependent OGLCompiler SPIRV) if(NOT MSVC) - target_link_libraries(rpcs3 ${OPENGL_LIBRARIES} ${GLEW_LIBRARY} opengl32.lib glu32.lib -lstdc++.dll -lpthread.dll) + target_link_libraries(rpcs3 ${OPENGL_LIBRARIES} ${GLEW_LIBRARY} opengl32.lib glu32.lib libpthread) else() - target_link_libraries(rpcs3 dxgi.lib d2d1.lib dwrite.lib VKstatic.1 glslang OSDependent OGLCompiler SPIRV) + target_link_libraries(rpcs3 dxgi.lib d2d1.lib dwrite.lib) endif() target_link_libraries(rpcs3 avformat.lib avcodec.lib avutil.lib swresample.lib swscale.lib png16_static ${wxWidgets_LIBRARIES} ${OPENAL_LIBRARY} ${ADDITIONAL_LIBS}) else() diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 89cbb179cf..63f8d50769 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -1,7 +1,7 @@ #pragma once -#include "Utilities/Thread.h" -#include "Utilities/BitSet.h" +#include "../Utilities/Thread.h" +#include "../Utilities/BitSet.h" // CPU Thread Type enum class cpu_type : u32 diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp new file mode 100644 index 0000000000..4f3ad59106 --- /dev/null +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -0,0 +1 @@ +#include "CPUTranslator.h" diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h new file mode 100644 index 0000000000..6f70f09bee --- /dev/null +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -0,0 +1 @@ +#pragma once diff --git a/rpcs3/Emu/Cell/Modules/sysPrxForUser.cpp b/rpcs3/Emu/Cell/Modules/sysPrxForUser.cpp index d82f10ac30..41c61a4f0e 100644 --- a/rpcs3/Emu/Cell/Modules/sysPrxForUser.cpp +++ b/rpcs3/Emu/Cell/Modules/sysPrxForUser.cpp @@ -74,7 +74,7 @@ void sys_initialize_tls(PPUThread& ppu, u64 main_thread_id, u32 tls_seg_addr, u3 // Allocate TLS for main thread ppu.GPR[13] = ppu_alloc_tls() + 0x7000 + TLS_SYS; - sysPrxForUser.notice("TLS initialized (addr=0x%x, size=0x%x, max=0x%zu)", g_tls_addr - 0x30, g_tls_size, g_tls_max); + sysPrxForUser.notice("TLS initialized (addr=0x%x, size=0x%x, max=0x%x)", g_tls_addr - 0x30, g_tls_size, g_tls_max); // TODO g_spu_printf_agcb = vm::null; diff --git a/rpcs3/Emu/Cell/Modules/sys_ppu_thread_.cpp b/rpcs3/Emu/Cell/Modules/sys_ppu_thread_.cpp index b37289ec3f..07bc69d545 100644 --- a/rpcs3/Emu/Cell/Modules/sys_ppu_thread_.cpp +++ b/rpcs3/Emu/Cell/Modules/sys_ppu_thread_.cpp @@ -1,8 +1,10 @@ #include "stdafx.h" #include "Emu/System.h" #include "Emu/Cell/PPUModule.h" +#include "Emu/IdManager.h" #include "Emu/Cell/lv2/sys_ppu_thread.h" +#include "Emu/Cell/lv2/sys_event.h" #include "sysPrxForUser.h" extern logs::channel sysPrxForUser; @@ -29,8 +31,30 @@ s32 sys_ppu_thread_create(vm::ptr thread_id, u32 entry, u64 arg, s32 prio, return res; } + if (flags & SYS_PPU_THREAD_CREATE_INTERRUPT) + { + return CELL_OK; + } + // Run the thread - return flags & SYS_PPU_THREAD_CREATE_INTERRUPT ? CELL_OK : sys_ppu_thread_start(static_cast(*thread_id)); + if (s32 res = sys_ppu_thread_start(static_cast(*thread_id))) + { + return res; + } + + // Dirty hack for sound: confirm the creation of _mxr000 event queue + if (std::memcmp(threadname.get_ptr(), "_cellsurMixerMain", 18) == 0) + { + while (!idm::select([](u32, lv2_event_queue_t& eq) + { + return eq.name == "_mxr000\0"_u64; + })) + { + thread_ctrl::sleep(50000); + } + } + + return CELL_OK; } s32 sys_ppu_thread_get_id(PPUThread& ppu, vm::ptr thread_id) diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index b7b18ac650..3716028f5e 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -395,3 +395,401 @@ struct ppu_itype return value; } }; + +struct ppu_iname +{ + // Aggregate to store instruction name + struct type { const char* name; }; + + // Enable address-of operator for ppu_decoder<> + friend constexpr const char* operator &(type value) + { + return value.name; + } + +#define NAME(x) static constexpr type x{#x}; + + NAME(UNK) + NAME(MFVSCR) + NAME(MTVSCR) + NAME(VADDCUW) + NAME(VADDFP) + NAME(VADDSBS) + NAME(VADDSHS) + NAME(VADDSWS) + NAME(VADDUBM) + NAME(VADDUBS) + NAME(VADDUHM) + NAME(VADDUHS) + NAME(VADDUWM) + NAME(VADDUWS) + NAME(VAND) + NAME(VANDC) + NAME(VAVGSB) + NAME(VAVGSH) + NAME(VAVGSW) + NAME(VAVGUB) + NAME(VAVGUH) + NAME(VAVGUW) + NAME(VCFSX) + NAME(VCFUX) + NAME(VCMPBFP) + NAME(VCMPEQFP) + NAME(VCMPEQUB) + NAME(VCMPEQUH) + NAME(VCMPEQUW) + NAME(VCMPGEFP) + NAME(VCMPGTFP) + NAME(VCMPGTSB) + NAME(VCMPGTSH) + NAME(VCMPGTSW) + NAME(VCMPGTUB) + NAME(VCMPGTUH) + NAME(VCMPGTUW) + NAME(VCTSXS) + NAME(VCTUXS) + NAME(VEXPTEFP) + NAME(VLOGEFP) + NAME(VMADDFP) + NAME(VMAXFP) + NAME(VMAXSB) + NAME(VMAXSH) + NAME(VMAXSW) + NAME(VMAXUB) + NAME(VMAXUH) + NAME(VMAXUW) + NAME(VMHADDSHS) + NAME(VMHRADDSHS) + NAME(VMINFP) + NAME(VMINSB) + NAME(VMINSH) + NAME(VMINSW) + NAME(VMINUB) + NAME(VMINUH) + NAME(VMINUW) + NAME(VMLADDUHM) + NAME(VMRGHB) + NAME(VMRGHH) + NAME(VMRGHW) + NAME(VMRGLB) + NAME(VMRGLH) + NAME(VMRGLW) + NAME(VMSUMMBM) + NAME(VMSUMSHM) + NAME(VMSUMSHS) + NAME(VMSUMUBM) + NAME(VMSUMUHM) + NAME(VMSUMUHS) + NAME(VMULESB) + NAME(VMULESH) + NAME(VMULEUB) + NAME(VMULEUH) + NAME(VMULOSB) + NAME(VMULOSH) + NAME(VMULOUB) + NAME(VMULOUH) + NAME(VNMSUBFP) + NAME(VNOR) + NAME(VOR) + NAME(VPERM) + NAME(VPKPX) + NAME(VPKSHSS) + NAME(VPKSHUS) + NAME(VPKSWSS) + NAME(VPKSWUS) + NAME(VPKUHUM) + NAME(VPKUHUS) + NAME(VPKUWUM) + NAME(VPKUWUS) + NAME(VREFP) + NAME(VRFIM) + NAME(VRFIN) + NAME(VRFIP) + NAME(VRFIZ) + NAME(VRLB) + NAME(VRLH) + NAME(VRLW) + NAME(VRSQRTEFP) + NAME(VSEL) + NAME(VSL) + NAME(VSLB) + NAME(VSLDOI) + NAME(VSLH) + NAME(VSLO) + NAME(VSLW) + NAME(VSPLTB) + NAME(VSPLTH) + NAME(VSPLTISB) + NAME(VSPLTISH) + NAME(VSPLTISW) + NAME(VSPLTW) + NAME(VSR) + NAME(VSRAB) + NAME(VSRAH) + NAME(VSRAW) + NAME(VSRB) + NAME(VSRH) + NAME(VSRO) + NAME(VSRW) + NAME(VSUBCUW) + NAME(VSUBFP) + NAME(VSUBSBS) + NAME(VSUBSHS) + NAME(VSUBSWS) + NAME(VSUBUBM) + NAME(VSUBUBS) + NAME(VSUBUHM) + NAME(VSUBUHS) + NAME(VSUBUWM) + NAME(VSUBUWS) + NAME(VSUMSWS) + NAME(VSUM2SWS) + NAME(VSUM4SBS) + NAME(VSUM4SHS) + NAME(VSUM4UBS) + NAME(VUPKHPX) + NAME(VUPKHSB) + NAME(VUPKHSH) + NAME(VUPKLPX) + NAME(VUPKLSB) + NAME(VUPKLSH) + NAME(VXOR) + NAME(TDI) + NAME(TWI) + NAME(MULLI) + NAME(SUBFIC) + NAME(CMPLI) + NAME(CMPI) + NAME(ADDIC) + NAME(ADDI) + NAME(ADDIS) + NAME(BC) + NAME(HACK) + NAME(SC) + NAME(B) + NAME(MCRF) + NAME(BCLR) + NAME(CRNOR) + NAME(CRANDC) + NAME(ISYNC) + NAME(CRXOR) + NAME(CRNAND) + NAME(CRAND) + NAME(CREQV) + NAME(CRORC) + NAME(CROR) + NAME(BCCTR) + NAME(RLWIMI) + NAME(RLWINM) + NAME(RLWNM) + NAME(ORI) + NAME(ORIS) + NAME(XORI) + NAME(XORIS) + NAME(ANDI) + NAME(ANDIS) + NAME(RLDICL) + NAME(RLDICR) + NAME(RLDIC) + NAME(RLDIMI) + NAME(RLDCL) + NAME(RLDCR) + NAME(CMP) + NAME(TW) + NAME(LVSL) + NAME(LVEBX) + NAME(SUBFC) + NAME(ADDC) + NAME(MULHDU) + NAME(MULHWU) + NAME(MFOCRF) + NAME(LWARX) + NAME(LDX) + NAME(LWZX) + NAME(SLW) + NAME(CNTLZW) + NAME(SLD) + NAME(AND) + NAME(CMPL) + NAME(LVSR) + NAME(LVEHX) + NAME(SUBF) + NAME(LDUX) + NAME(DCBST) + NAME(LWZUX) + NAME(CNTLZD) + NAME(ANDC) + NAME(TD) + NAME(LVEWX) + NAME(MULHD) + NAME(MULHW) + NAME(LDARX) + NAME(DCBF) + NAME(LBZX) + NAME(LVX) + NAME(NEG) + NAME(LBZUX) + NAME(NOR) + NAME(STVEBX) + NAME(SUBFE) + NAME(ADDE) + NAME(MTOCRF) + NAME(STDX) + NAME(STWCX) + NAME(STWX) + NAME(STVEHX) + NAME(STDUX) + NAME(STWUX) + NAME(STVEWX) + NAME(SUBFZE) + NAME(ADDZE) + NAME(STDCX) + NAME(STBX) + NAME(STVX) + NAME(SUBFME) + NAME(MULLD) + NAME(ADDME) + NAME(MULLW) + NAME(DCBTST) + NAME(STBUX) + NAME(ADD) + NAME(DCBT) + NAME(LHZX) + NAME(EQV) + NAME(ECIWX) + NAME(LHZUX) + NAME(XOR) + NAME(MFSPR) + NAME(LWAX) + NAME(DST) + NAME(LHAX) + NAME(LVXL) + NAME(MFTB) + NAME(LWAUX) + NAME(DSTST) + NAME(LHAUX) + NAME(STHX) + NAME(ORC) + NAME(ECOWX) + NAME(STHUX) + NAME(OR) + NAME(DIVDU) + NAME(DIVWU) + NAME(MTSPR) + NAME(DCBI) + NAME(NAND) + NAME(STVXL) + NAME(DIVD) + NAME(DIVW) + NAME(LVLX) + NAME(LDBRX) + NAME(LSWX) + NAME(LWBRX) + NAME(LFSX) + NAME(SRW) + NAME(SRD) + NAME(LVRX) + NAME(LSWI) + NAME(LFSUX) + NAME(SYNC) + NAME(LFDX) + NAME(LFDUX) + NAME(STVLX) + NAME(STDBRX) + NAME(STSWX) + NAME(STWBRX) + NAME(STFSX) + NAME(STVRX) + NAME(STFSUX) + NAME(STSWI) + NAME(STFDX) + NAME(STFDUX) + NAME(LVLXL) + NAME(LHBRX) + NAME(SRAW) + NAME(SRAD) + NAME(LVRXL) + NAME(DSS) + NAME(SRAWI) + NAME(SRADI) + NAME(EIEIO) + NAME(STVLXL) + NAME(STHBRX) + NAME(EXTSH) + NAME(STVRXL) + NAME(EXTSB) + NAME(STFIWX) + NAME(EXTSW) + NAME(ICBI) + NAME(DCBZ) + NAME(LWZ) + NAME(LWZU) + NAME(LBZ) + NAME(LBZU) + NAME(STW) + NAME(STWU) + NAME(STB) + NAME(STBU) + NAME(LHZ) + NAME(LHZU) + NAME(LHA) + NAME(LHAU) + NAME(STH) + NAME(STHU) + NAME(LMW) + NAME(STMW) + NAME(LFS) + NAME(LFSU) + NAME(LFD) + NAME(LFDU) + NAME(STFS) + NAME(STFSU) + NAME(STFD) + NAME(STFDU) + NAME(LD) + NAME(LDU) + NAME(LWA) + NAME(STD) + NAME(STDU) + NAME(FDIVS) + NAME(FSUBS) + NAME(FADDS) + NAME(FSQRTS) + NAME(FRES) + NAME(FMULS) + NAME(FMADDS) + NAME(FMSUBS) + NAME(FNMSUBS) + NAME(FNMADDS) + NAME(MTFSB1) + NAME(MCRFS) + NAME(MTFSB0) + NAME(MTFSFI) + NAME(MFFS) + NAME(MTFSF) + NAME(FCMPU) + NAME(FRSP) + NAME(FCTIW) + NAME(FCTIWZ) + NAME(FDIV) + NAME(FSUB) + NAME(FADD) + NAME(FSQRT) + NAME(FSEL) + NAME(FMUL) + NAME(FRSQRTE) + NAME(FMSUB) + NAME(FMADD) + NAME(FNMSUB) + NAME(FNMADD) + NAME(FCMPO) + NAME(FNEG) + NAME(FMR) + NAME(FNABS) + NAME(FABS) + NAME(FCTID) + NAME(FCTIDZ) + NAME(FCFID) + +#undef NAME +}; diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index c15b27762d..e2c2acf35e 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -53,7 +53,7 @@ inline __m128i sse_cmpgt_epu32(__m128i A, __m128i B) return _mm_cmpgt_epi32(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); } -inline __m128 sse_exp2_ps(__m128 A) +extern __m128 sse_exp2_ps(__m128 A) { const auto x0 = _mm_max_ps(_mm_min_ps(A, _mm_set1_ps(127.4999961f)), _mm_set1_ps(-127.4999961f)); const auto x1 = _mm_add_ps(x0, _mm_set1_ps(0.5f)); @@ -65,7 +65,7 @@ inline __m128 sse_exp2_ps(__m128 A) return _mm_mul_ps(_mm_add_ps(_mm_add_ps(x6, x6), _mm_set1_ps(1.0f)), _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(x2, _mm_set1_epi32(127)), 23))); } -inline __m128 sse_log2_ps(__m128 A) +extern __m128 sse_log2_ps(__m128 A) { const auto _1 = _mm_set1_ps(1.0f); const auto _c = _mm_set1_ps(1.442695040f); @@ -81,6 +81,125 @@ inline __m128 sse_log2_ps(__m128 A) return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8)); } +extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C) +{ + const auto index = _mm_andnot_si128(C, _mm_set1_epi8(0x1f)); + const auto mask = _mm_cmpgt_epi8(index, _mm_set1_epi8(0xf)); + const auto sa = _mm_shuffle_epi8(A, index); + const auto sb = _mm_shuffle_epi8(B, index); + return _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb)); +} + +extern __m128i sse_altivec_lvsl(u64 addr) +{ + alignas(16) static const u64 lvsl_values[0x10][2] = + { + { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + }; + + return _mm_load_si128((__m128i*)lvsl_values[addr & 0xf]); +} + +extern __m128i sse_altivec_lvsr(u64 addr) +{ + alignas(16) static const u64 lvsr_values[0x10][2] = + { + { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + }; + + return _mm_load_si128((__m128i*)lvsr_values[addr & 0xf]); +} + +static const __m128i lvlx_masks[0x10] = +{ + _mm_set_epi8(0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf), + _mm_set_epi8(0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1), + _mm_set_epi8(0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1), + _mm_set_epi8(0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1), + _mm_set_epi8(0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1), + _mm_set_epi8(0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1), + _mm_set_epi8(0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1), +}; + +static const __m128i lvrx_masks[0x10] = +{ + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8), + _mm_set_epi8(-1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9), + _mm_set_epi8(-1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa), + _mm_set_epi8(-1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb), + _mm_set_epi8(-1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc), + _mm_set_epi8(-1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd), + _mm_set_epi8(-1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe), +}; + +extern __m128i sse_cellbe_lvlx(u64 addr) +{ + return _mm_shuffle_epi8(_mm_load_si128((__m128i*)vm::base(addr & ~0xf)), lvlx_masks[addr & 0xf]); +} + +extern void sse_cellbe_stvlx(u64 addr, __m128i a) +{ + _mm_maskmoveu_si128(_mm_shuffle_epi8(a, lvlx_masks[addr & 0xf]), lvrx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf)); +} + +extern __m128i sse_cellbe_lvrx(u64 addr) +{ + return _mm_shuffle_epi8(_mm_load_si128((__m128i*)vm::base(addr & ~0xf)), lvrx_masks[addr & 0xf]); +} + +extern void sse_cellbe_stvrx(u64 addr, __m128i a) +{ + _mm_maskmoveu_si128(_mm_shuffle_epi8(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf)); +} + template struct add_flags_result_t { @@ -807,11 +926,7 @@ bool ppu_interpreter::VOR(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::VPERM(PPUThread& ppu, ppu_opcode_t op) { - const auto index = _mm_andnot_si128(ppu.VR[op.vc].vi, _mm_set1_epi8(0x1f)); - const auto mask = _mm_cmpgt_epi8(index, _mm_set1_epi8(0xf)); - const auto sa = _mm_shuffle_epi8(ppu.VR[op.va].vi, index); - const auto sb = _mm_shuffle_epi8(ppu.VR[op.vb].vi, index); - ppu.VR[op.vd].vi = _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb)); + ppu.VR[op.vd].vi = sse_altivec_vperm(ppu.VR[op.va].vi, ppu.VR[op.vb].vi, ppu.VR[op.vc].vi); return true; } @@ -2052,29 +2167,7 @@ bool ppu_interpreter::TW(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::LVSL(PPUThread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - - static const u64 lvsl_values[0x10][2] = - { - { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, - { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, - { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, - { 0x0B0C0D0E0F101112, 0x030405060708090A }, - { 0x0C0D0E0F10111213, 0x0405060708090A0B }, - { 0x0D0E0F1011121314, 0x05060708090A0B0C }, - { 0x0E0F101112131415, 0x060708090A0B0C0D }, - { 0x0F10111213141516, 0x0708090A0B0C0D0E }, - { 0x1011121314151617, 0x08090A0B0C0D0E0F }, - { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, - { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, - { 0x131415161718191A, 0x0B0C0D0E0F101112 }, - { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, - { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, - { 0x161718191A1B1C1D, 0x0E0F101112131415 }, - { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, - }; - - ppu.VR[op.vd]._u64[0] = lvsl_values[addr & 0xf][0]; - ppu.VR[op.vd]._u64[1] = lvsl_values[addr & 0xf][1]; + ppu.VR[op.vd].vi = sse_altivec_lvsl(addr); return true; } @@ -2218,29 +2311,7 @@ bool ppu_interpreter::CMPL(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::LVSR(PPUThread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - - static const u64 lvsr_values[0x10][2] = - { - { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, - { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, - { 0x161718191A1B1C1D, 0x0E0F101112131415 }, - { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, - { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, - { 0x131415161718191A, 0x0B0C0D0E0F101112 }, - { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, - { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, - { 0x1011121314151617, 0x08090A0B0C0D0E0F }, - { 0x0F10111213141516, 0x0708090A0B0C0D0E }, - { 0x0E0F101112131415, 0x060708090A0B0C0D }, - { 0x0D0E0F1011121314, 0x05060708090A0B0C }, - { 0x0C0D0E0F10111213, 0x0405060708090A0B }, - { 0x0B0C0D0E0F101112, 0x030405060708090A }, - { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, - { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, - }; - - ppu.VR[op.vd]._u64[0] = lvsr_values[addr & 0xf][0]; - ppu.VR[op.vd]._u64[1] = lvsr_values[addr & 0xf][1]; + ppu.VR[op.vd].vi = sse_altivec_lvsr(addr); return true; } @@ -2867,10 +2938,7 @@ bool ppu_interpreter::DIVW(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::LVLX(PPUThread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u32 eb = addr & 0xf; - - ppu.VR[op.vd].clear(); - for (u32 i = 0; i < 16u - eb; ++i) ppu.VR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i, HERE)); + ppu.VR[op.vd].vi = sse_cellbe_lvlx(addr); return true; } @@ -2934,10 +3002,7 @@ bool ppu_interpreter::SRD(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::LVRX(PPUThread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u8 eb = addr & 0xf; - - ppu.VR[op.vd].clear(); - for (u32 i = 16 - eb; i < 16; ++i) ppu.VR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i - 16, HERE)); + ppu.VR[op.vd].vi = sse_cellbe_lvrx(addr); return true; } @@ -3005,9 +3070,7 @@ bool ppu_interpreter::LFDUX(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::STVLX(PPUThread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u32 eb = addr & 0xf; - - for (u32 i = 0; i < 16u - eb; ++i) vm::write8(vm::cast(addr + i, HERE), ppu.VR[op.vs]._u8[15 - i]); + sse_cellbe_stvlx(addr, ppu.VR[op.vs].vi); return true; } @@ -3055,9 +3118,7 @@ bool ppu_interpreter::STFSX(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::STVRX(PPUThread& ppu, ppu_opcode_t op) { const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u8 eb = addr & 0xf; - - for (u32 i = 16 - eb; i < 16; ++i) vm::write8(vm::cast(addr + i - 16, HERE), ppu.VR[op.vs]._u8[15 - i]); + sse_cellbe_stvrx(addr, ppu.VR[op.vs].vi); return true; } @@ -3116,12 +3177,7 @@ bool ppu_interpreter::STFDUX(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::LVLXL(PPUThread& ppu, ppu_opcode_t op) { - const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u32 eb = addr & 0xf; - - ppu.VR[op.vd].clear(); - for (u32 i = 0; i < 16u - eb; ++i) ppu.VR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i, HERE)); - return true; + return LVLX(ppu, op); } bool ppu_interpreter::LHBRX(PPUThread& ppu, ppu_opcode_t op) @@ -3171,12 +3227,7 @@ bool ppu_interpreter::SRAD(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::LVRXL(PPUThread& ppu, ppu_opcode_t op) { - const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u8 eb = addr & 0xf; - - ppu.VR[op.vd].clear(); - for (u32 i = 16 - eb; i < 16; ++i) ppu.VR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i - 16, HERE)); - return true; + return LVRX(ppu, op); } bool ppu_interpreter::DSS(PPUThread& ppu, ppu_opcode_t op) @@ -3213,11 +3264,7 @@ bool ppu_interpreter::EIEIO(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::STVLXL(PPUThread& ppu, ppu_opcode_t op) { - const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u32 eb = addr & 0xf; - - for (u32 i = 0; i < 16u - eb; ++i) vm::write8(vm::cast(addr + i, HERE), ppu.VR[op.vs]._u8[15 - i]); - return true; + return STVLX(ppu, op); } bool ppu_interpreter::STHBRX(PPUThread& ppu, ppu_opcode_t op) @@ -3236,11 +3283,7 @@ bool ppu_interpreter::EXTSH(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::STVRXL(PPUThread& ppu, ppu_opcode_t op) { - const u64 addr = op.ra ? ppu.GPR[op.ra] + ppu.GPR[op.rb] : ppu.GPR[op.rb]; - const u8 eb = addr & 0xf; - - for (u32 i = 16 - eb; i < 16; ++i) vm::write8(vm::cast(addr + i - 16, HERE), ppu.VR[op.vs]._u8[15 - i]); - return true; + return STVRX(ppu, op); } bool ppu_interpreter::EXTSB(PPUThread& ppu, ppu_opcode_t op) @@ -3529,7 +3572,9 @@ bool ppu_interpreter::FSQRTS(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::FRES(PPUThread& ppu, ppu_opcode_t op) { - ppu.FPR[op.frd] = f32(1.0 / ppu.FPR[op.frb]); + f32 value = f32(ppu.FPR[op.frb]); + _mm_store_ss(&value, _mm_rcp_ss(_mm_load_ss(&value))); + ppu.FPR[op.frd] = value; VERIFY(!op.rc); //if (UNLIKELY(op.rc)) ppu.SetCR(1, ppu.FG, ppu.FL, ppu.FE, ppu.FU); return true; } @@ -3633,14 +3678,14 @@ bool ppu_interpreter::FRSP(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTIW(PPUThread& ppu, ppu_opcode_t op) { - (s32&)ppu.FPR[op.frd] = std::lrint(ppu.FPR[op.frb]); + (s32&)ppu.FPR[op.frd] = s32(ppu.FPR[op.frb]); VERIFY(!op.rc); //if (UNLIKELY(op.rc)) ppu.SetCR(1, ppu.FG, ppu.FL, ppu.FE, ppu.FU); return true; } bool ppu_interpreter::FCTIWZ(PPUThread& ppu, ppu_opcode_t op) { - (s32&)ppu.FPR[op.frd] = static_cast(ppu.FPR[op.frb]); + (s32&)ppu.FPR[op.frd] = _mm_cvttsd_si32(_mm_load_sd(&ppu.FPR[op.frb])); VERIFY(!op.rc); //if (UNLIKELY(op.rc)) ppu.SetCR(1, ppu.FG, ppu.FL, ppu.FE, ppu.FU); return true; } @@ -3689,7 +3734,9 @@ bool ppu_interpreter::FMUL(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::FRSQRTE(PPUThread& ppu, ppu_opcode_t op) { - ppu.FPR[op.frd] = 1.0 / std::sqrt(ppu.FPR[op.frb]); + f32 value = f32(ppu.FPR[op.frb]); + _mm_store_ss(&value, _mm_rsqrt_ss(_mm_load_ss(&value))); + ppu.FPR[op.frd] = value; VERIFY(!op.rc); //if (UNLIKELY(op.rc)) ppu.SetCR(1, ppu.FG, ppu.FL, ppu.FE, ppu.FU); return true; } @@ -3758,14 +3805,14 @@ bool ppu_interpreter::FABS(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTID(PPUThread& ppu, ppu_opcode_t op) { - (s64&)ppu.FPR[op.frd] = std::llrint(ppu.FPR[op.frb]); + (s64&)ppu.FPR[op.frd] = s64(ppu.FPR[op.frb]); VERIFY(!op.rc); //if (UNLIKELY(op.rc)) ppu.SetCR(1, ppu.FG, ppu.FL, ppu.FE, ppu.FU); return true; } bool ppu_interpreter::FCTIDZ(PPUThread& ppu, ppu_opcode_t op) { - (s64&)ppu.FPR[op.frd] = static_cast(ppu.FPR[op.frb]); + (s64&)ppu.FPR[op.frd] = _mm_cvttsd_si64(_mm_load_sd(&ppu.FPR[op.frb])); VERIFY(!op.rc); //if (UNLIKELY(op.rc)) ppu.SetCR(1, ppu.FG, ppu.FL, ppu.FE, ppu.FU); return true; } @@ -3779,5 +3826,5 @@ bool ppu_interpreter::FCFID(PPUThread& ppu, ppu_opcode_t op) bool ppu_interpreter::UNK(PPUThread& ppu, ppu_opcode_t op) { - throw fmt::exception("Unknown/Illegal opcode: 0x%08x" HERE, op.opcode); + throw fmt::exception("Unknown/Illegal opcode: 0x%08x (pc=0x%x)" HERE, op.opcode, ppu.pc); } diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 81942d22e2..09920d6d32 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -13,8 +13,11 @@ #include "Emu/Cell/lv2/sys_prx.h" #include +#include +#include "yaml-cpp/yaml.h" const ppu_decoder s_ppu_itype; +//const ppu_decoder s_ppu_iname; LOG_CHANNEL(cellAdec); LOG_CHANNEL(cellAtrac); @@ -117,6 +120,8 @@ extern std::string ppu_get_variable_name(const std::string& module, u32 vnid); extern void sys_initialize_tls(PPUThread&, u64, u32, u32, u32); +extern void ppu_initialize(const std::string& name, const std::vector>& set, u32 entry); + // Function lookup table. Not supposed to grow after emulation start. std::vector g_ppu_function_cache; @@ -138,20 +143,14 @@ extern void ppu_execute_function(PPUThread& ppu, u32 index) { func(ppu); } - catch (EmulationStopped) - { - LOG_WARNING(PPU, "Function '%s' aborted", ppu.last_function); - ppu.last_function = previous_function; - throw; - } catch (...) { - LOG_ERROR(PPU, "Function '%s' aborted", ppu.last_function); + logs::HLE.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Function '%s' aborted", ppu.last_function); ppu.last_function = previous_function; throw; } - LOG_TRACE(PPU, "Function '%s' finished, r3=0x%llx", ppu.last_function, ppu.GPR[3]); + LOG_TRACE(HLE, "Function '%s' finished, r3=0x%llx", ppu.last_function, ppu.GPR[3]); ppu.last_function = previous_function; return; } @@ -657,12 +656,16 @@ static auto ppu_load_exports(const std::shared_ptr& link, u32 return result; } -static void ppu_load_imports(const std::shared_ptr& link, u32 imports_start, u32 imports_end) +static u32 ppu_load_imports(const std::shared_ptr& link, u32 imports_start, u32 imports_end) { + u32 result = imports_start; + for (u32 addr = imports_start; addr < imports_end;) { const auto& lib = vm::_ref(addr); + result = std::min(result, lib.name.addr()); + const std::string module_name(lib.name.get_ptr()); LOG_NOTICE(LOADER, "** Imported module '%s' (0x%x, 0x%x)", module_name, lib.unk4, lib.unk5); @@ -720,12 +723,404 @@ static void ppu_load_imports(const std::shared_ptr& link, u32 addr += lib.size ? lib.size : sizeof(ppu_prx_module_info); } + + return result; +} + +// Returns max branch address of jumptable +never_inline static u32 ppu_is_jumptable(vm::ptr& start_ptr, u32 start, u32 end) +{ + u32 max_addr = 0; + + if (end - start_ptr.addr() < 8) + { + return 0; + } + + for (vm::ptr ptr = start_ptr; ptr.addr() < end; ptr++) + { + const u32 addr = start_ptr.addr() + *ptr; + + if (addr % 4 || addr < start || addr >= end) + { + if (ptr - start_ptr < 2) + { + return 0; + } + + start_ptr = ptr; + return max_addr; + } + + max_addr = std::max(max_addr, addr); + } + + start_ptr = vm::cast(end); + return max_addr; +} + +// Guess whether the function cannot be divided at specific position `split` +static bool ppu_is_coherent(u32 start, u32 end, u32 split) +{ + // Check if the block before `split` is directly connected (can fall through) + for (vm::ptr rptr = vm::cast(split - 4);; rptr--) + { + const u32 _last = *rptr; + + // Skip NOPs + if (_last == ppu_instructions::NOP()) + { + if (rptr.addr() == start) return true; + continue; + } + + switch (const auto type = s_ppu_itype.decode(_last)) + { + case ppu_itype::UNK: + case ppu_itype::TD: + case ppu_itype::TDI: + case ppu_itype::TW: + case ppu_itype::TWI: + { + break; + } + + case ppu_itype::B: + { + if (ppu_opcode_t{_last}.lk) return true; + break; + } + + case ppu_itype::BC: + case ppu_itype::BCLR: + { + if (ppu_opcode_t{_last}.lk || (ppu_opcode_t{_last}.bo & 0x14) != 0x14) return true; + break; + } + + case ppu_itype::BCCTR: + { + if (ppu_opcode_t{_last}.lk || (ppu_opcode_t{_last}.bo & 0x10) == 0) return true; + break; + } + + default: + { + return true; + } + } + + break; + } + + // Find branches from one part to another + for (vm::ptr ptr = vm::cast(start); ptr.addr() < split; ptr++) + { + const u32 value = *ptr; + const auto type = s_ppu_itype.decode(value); + + const ppu_opcode_t op{value}; + + if (type == ppu_itype::B || type == ppu_itype::BC) + { + const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16); + + if (target % 4 == 0 && target >= split && target < end) + { + return !op.lk; + } + } + + if (type == ppu_itype::BCCTR && !op.lk) + { + const u32 max = ppu_is_jumptable(++ptr, start, end); + + if (max && max >= split) + { + return true; + } + + ptr--; + } + } + + // TODO: ??? + return false; +} + +static std::vector> ppu_analyse(u32 start, u32 end, const std::vector>& segs, u32 rtoc) +{ + // Function entries (except the last one) + std::set result + { + end, + }; + + // Instruction usage stats + //std::unordered_map stats; + + // Jumptable entries (addr->size) + std::unordered_map jts; + + // Block entries + std::set blocks; + + // First pass; Detect branch + link instructions + for (vm::ptr ptr = vm::cast(start); ptr.addr() < end; ptr++) + { + const u32 value = *ptr; + const auto type = s_ppu_itype.decode(value); + //const auto name = s_ppu_iname.decode(value); + + const ppu_opcode_t op{value}; + + if (type == ppu_itype::B || type == ppu_itype::BC) + { + const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16); + + if (op.lk && target % 4 == 0 && target >= start && target < end && target != ptr.addr()) + { + LOG_NOTICE(PPU, "BCall: 0x%x -> 0x%x", ptr, target); + result.emplace(target); + } + + if (!op.lk && target % 4 == 0 && target >= start && target < end) + { + blocks.emplace(target); + } + } + + if (type == ppu_itype::BCCTR && !op.lk) + { + const auto jt = ++ptr; + + if (ppu_is_jumptable(ptr, start, end)) + { + LOG_NOTICE(PPU, "JTable: 0x%x .. 0x%x", jt, ptr); + jts.emplace(jt.addr(), ptr.addr() - jt.addr()); + + for (auto _ptr = jt; _ptr != ptr; _ptr++) + { + blocks.emplace(jt.addr() + *_ptr); + } + } + else + { + LOG_NOTICE(PPU, "BCCTR: 0x%x", ptr - 1); + } + + ptr--; + } + + //stats[name]++; + } + + // Find OPD table + for (const auto& seg : segs) + { + for (vm::ptr ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++) + { + if (ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0 && ptr[1] == rtoc) + { + while (ptr[0] >= start && ptr[0] < end && ptr[0] % 4 == 0 && !jts.count(ptr[0]) /*&& ptr[1] == rtoc*/) + { + LOG_NOTICE(PPU, "OPD: 0x%x -> 0x%x (rtoc=0x%x)", ptr, ptr[0], ptr[1]); + result.emplace(ptr[0]); + ptr += 2; + } + + break; + } + } + } + + // Find more block entries + for (const auto& seg : segs) + { + for (vm::ptr ptr = vm::cast(seg.first); ptr.addr() < seg.first + seg.second; ptr++) + { + const u32 value = *ptr; + + if (value % 4 == 0 && value >= start && value < end) + { + blocks.emplace(value); + } + } + } + + // Detect tail calls + std::deque task{result.begin(), result.end()}; + + while (!task.empty()) + { + const u32 f_start = task.front(); + const auto f_up = result.upper_bound(f_start); + + if (f_up != result.end()) for (vm::ptr ptr = vm::cast(f_start); ptr.addr() < *f_up; ptr++) + { + const u32 value = *ptr; + const auto type = s_ppu_itype.decode(value); + + const ppu_opcode_t op{value}; + + if (type == ppu_itype::B || type == ppu_itype::BC) + { + const u32 target = ppu_branch_target(op.aa ? 0 : ptr.addr(), type == ppu_itype::B ? +op.ll : +op.simm16); + + if (!op.lk && target % 4 == 0 && target >= start && target < end && (target < f_start || target >= *f_up)) + { + auto _lower = result.lower_bound(target); + + if (*_lower == target || _lower == result.begin()) + { + continue; + } + + const u32 f2_end = *_lower; + const u32 f2_start = *--_lower; + + if (ppu_is_coherent(f2_start, f2_end, target)) + { + continue; + } + + LOG_NOTICE(LOADER, "Tail call: 0x%x -> 0x%x", ptr, target); + result.emplace(target); + + // Rescan two new functions if the insertion took place + task.push_back(target); + task.push_back(f2_start); + } + } + } + + task.pop_front(); + } + + // Fill (addr, size) vector + std::vector> vr; + + for (auto it = result.begin(), end = --result.end(); it != end; it++) + { + const u32 addr = *it; + const auto _up = result.upper_bound(addr); + + // Set initial (addr, size) + vr.emplace_back(std::make_pair(addr, *_up - addr)); + + // Analyse function against its end + for (u32& size = vr.back().second; size;) + { + const auto next = result.upper_bound(addr + size); + + if (next != result.end() && ppu_is_coherent(addr, *next, addr + size)) + { + // Extend and check again + const u32 new_size = *next - addr; + + LOG_NOTICE(LOADER, "Extended: 0x%x (0x%x --> 0x%x)", addr, size, new_size); + size = new_size; + continue; + } + + break; + } + } + + // Add blocks as (addr, 0) // TODO + for (auto value : blocks) + { + vr.emplace_back(std::make_pair(value, 0)); + } + + // Print some stats + //{ + // std::multimap> sorted; + + // for (const auto& pair : stats) + // sorted.emplace(pair.second, pair.first); + + // for (const auto& stat : sorted) + // LOG_NOTICE(PPU, "Stats: (%llu) %s", stat.first, stat.second); + //} + + return vr; +} + +static void ppu_validate(const std::string& fname, const std::vector>& funcs, u32 reloc) +{ + // Load custom PRX configuration if available + if (fs::file yml{fname + ".yml"}) + { + const auto cfg = YAML::Load(yml.to_string()); + + u32 index = 0; + + // Validate detected functions using information provided + for (const auto func : cfg["functions"]) + { + const u32 addr = func["addr"].as(-1); + const u32 size = func["size"].as(0); + + if (addr != -1 && index < funcs.size()) + { + u32 found = funcs[index].first - reloc; + + while (addr > found && index + 1 < funcs.size()) + { + LOG_ERROR(LOADER, "%s.yml : validation failed at 0x%x (0x%x, 0x%x)", fname, found, addr, size); + index++; + found = funcs[index].first - reloc; + } + + if (addr < found) + { + LOG_ERROR(LOADER, "%s.yml : function not found (0x%x, 0x%x)", fname, addr, size); + continue; + } + + if (size && size < funcs[index].second) + { + LOG_WARNING(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].second, addr, size); + } + + if (size > funcs[index].second) + { + LOG_ERROR(LOADER, "%s.yml : function size mismatch at 0x%x(size=0x%x) (0x%x, 0x%x)", fname, found, funcs[index].second, addr, size); + } + + index++; + } + else + { + LOG_ERROR(LOADER, "%s.yml : function not found at the end (0x%x, 0x%x)", fname, addr, size); + break; + } + } + + if (!index) + { + return; // ??? + } + + while (index < funcs.size()) + { + if (funcs[index].second) + { + LOG_ERROR(LOADER, "%s.yml : function not covered at 0x%x (size=0x%x)", fname, funcs[index].first, funcs[index].second); + } + + index++; + } + + LOG_SUCCESS(LOADER, "%s.yml : validation completed", fname); + } } template<> std::shared_ptr ppu_prx_loader::load() const { - std::vector segments; + std::vector> segments; for (const auto& prog : progs) { @@ -753,7 +1148,7 @@ std::shared_ptr ppu_prx_loader::load() const std::memcpy(vm::base(addr), prog.bin.data(), file_size); LOG_WARNING(LOADER, "**** Loaded to 0x%x (size=0x%x)", addr, mem_size); - segments.push_back(addr); + segments.emplace_back(std::make_pair(addr, mem_size)); } break; @@ -787,8 +1182,8 @@ std::shared_ptr ppu_prx_loader::load() const { const auto& rel = reinterpret_cast(prog.bin[i]); - const u32 raddr = vm::cast(segments.at(rel.index_addr) + rel.offset, HERE); - const u64 rdata = segments.at(rel.index_value) + rel.ptr.addr(); + const u32 raddr = vm::cast(segments.at(rel.index_addr).first + rel.offset, HERE); + const u64 rdata = segments.at(rel.index_value).first + rel.ptr.addr(); switch (const u32 type = rel.type) { @@ -853,14 +1248,24 @@ std::shared_ptr ppu_prx_loader::load() const }; // Access library information (TODO) - const auto& lib_info = vm::_ref(vm::cast(segments[0] + progs[0].p_paddr - progs[0].p_offset, HERE)); - const auto& lib_name = std::string(lib_info.name); + const auto& lib_info = vm::cptr(vm::cast(segments[0].first + progs[0].p_paddr - progs[0].p_offset, HERE)); + const auto& lib_name = std::string(lib_info->name); - LOG_WARNING(LOADER, "Library %s (toc=0x%x, rtoc=0x%x):", lib_name, lib_info.toc, lib_info.toc + segments[0]); + LOG_WARNING(LOADER, "Library %s (rtoc=0x%x):", lib_name, lib_info->toc); - prx->specials = ppu_load_exports(link, lib_info.exports_start, lib_info.exports_end); + prx->specials = ppu_load_exports(link, lib_info->exports_start, lib_info->exports_end); - ppu_load_imports(link, lib_info.imports_start, lib_info.imports_end); + const std::initializer_list addr_list + { + ppu_load_imports(link, lib_info->imports_start, lib_info->imports_end), + + lib_info.addr(), + lib_info->imports_start, + lib_info->exports_start, + }; + + // Get functions + prx->func = ppu_analyse(segments[0].first, std::min(addr_list), segments, lib_info->toc); } else { @@ -887,9 +1292,18 @@ void ppu_exec_loader::load() const // Access linkage information object const auto link = fxm::get_always(); + // Segment info + std::vector> segments; + + // Functions + std::vector> exec_set; + u32 exec_end{}; + // Allocate memory at fixed positions for (const auto& prog : progs) { + LOG_NOTICE(LOADER, "** Segment: p_type=0x%x, p_vaddr=0x%llx, p_filesz=0x%llx, p_memsz=0x%llx, flags=0x%x", prog.p_type, prog.p_vaddr, prog.p_filesz, prog.p_memsz, prog.p_flags); + const u32 addr = vm::cast(prog.p_vaddr, HERE); const u32 size = fmt::narrow("Invalid p_memsz: 0x%llx" HERE, prog.p_memsz); @@ -902,6 +1316,11 @@ void ppu_exec_loader::load() const throw fmt::exception("vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size); std::memcpy(vm::base(addr), prog.bin.data(), prog.bin.size()); + + segments.emplace_back(std::make_pair(addr, size)); + + if (prog.p_flags & 1) // Test EXEC flag + exec_end = addr + size; } } @@ -987,13 +1406,19 @@ void ppu_exec_loader::load() const const auto& proc_prx_param = vm::_ref(vm::cast(prog.p_vaddr, HERE)); + LOG_NOTICE(LOADER, "* libent_start = *0x%x", proc_prx_param.libent_start); + LOG_NOTICE(LOADER, "* libstub_start = *0x%x", proc_prx_param.libstub_start); + if (proc_prx_param.magic != 0x1b434cec) { throw fmt::exception("Bad magic! (0x%x)", proc_prx_param.magic); } ppu_load_exports(link, proc_prx_param.libent_start, proc_prx_param.libent_end); - ppu_load_imports(link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); + + const u32 min_addr = ppu_load_imports(link, proc_prx_param.libstub_start, proc_prx_param.libstub_end); + + exec_end = std::min(min_addr, exec_end); } break; } @@ -1035,10 +1460,16 @@ void ppu_exec_loader::load() const const auto prx = loader.load(); + // Register start function if (prx->start) { start_funcs.push_back(prx->start.addr()); } + + // Add functions + exec_set.insert(exec_set.end(), prx->func.begin(), prx->func.end()); + + ppu_validate(lle_dir + '/' + name, prx->func, prx->func[0].first); } else { @@ -1167,6 +1598,17 @@ void ppu_exec_loader::load() const } } + // Analyse executable + const u32 entry_rtoc = vm::read32(vm::cast(header.e_entry, HERE) + 4); + const auto funcs = ppu_analyse(segments[0].first, exec_end, segments, entry_rtoc); + + ppu_validate(vfs::get(Emu.GetPath()), funcs, 0); + + for (const auto& pair : funcs) + { + exec_set.emplace_back(pair); + } + // TODO: adjust for liblv2 loading option using namespace ppu_instructions; @@ -1177,7 +1619,7 @@ void ppu_exec_loader::load() const static const int branch_size = 10 * 4; - auto make_branch = [](vm::ptr& ptr, u32 addr) + auto make_branch = [](vm::ptr& ptr, u32 addr, bool last) { const u32 stub = vm::read32(addr); const u32 rtoc = vm::read32(addr + 4); @@ -1189,7 +1631,7 @@ void ppu_exec_loader::load() const *ptr++ = ORI(r2, r2, rtoc & 0xffff); *ptr++ = ORIS(r2, r2, rtoc >> 16); *ptr++ = MTCTR(r0); - *ptr++ = BCTRL(); + *ptr++ = last ? BCTR() : BCTRL(); }; auto entry = vm::ptr::make(vm::alloc(48 + branch_size * (::size32(start_funcs) + 1), vm::main)); @@ -1217,7 +1659,7 @@ void ppu_exec_loader::load() const // Reset arguments (TODO) *entry++ = LI(r3, 0); *entry++ = LI(r4, 0); - make_branch(entry, f); + make_branch(entry, f, false); } // Restore initialization args @@ -1229,7 +1671,13 @@ void ppu_exec_loader::load() const *entry++ = MR(r12, r19); // Branch to initialization - make_branch(entry, vm::cast(header.e_entry, HERE)); + make_branch(entry, static_cast(header.e_entry), true); + + // Register entry function (addr, size) + exec_set.emplace_back(std::make_pair(entry.addr() & -0x1000, entry.addr() & 0xfff)); + + // Initialize recompiler + ppu_initialize("", exec_set, static_cast(header.e_entry)); auto ppu = idm::make_ptr("main_thread"); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 4f7b33ba06..7a52b5ad08 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -5,6 +5,7 @@ #include "Emu/IdManager.h" #include "PPUThread.h" #include "PPUInterpreter.h" +#include "PPUAnalyser.h" #include "PPUModule.h" enum class ppu_decoder_type @@ -24,6 +25,18 @@ cfg::map_entry g_cfg_ppu_decoder(cfg::root.core, "PPU Decoder" const ppu_decoder s_ppu_interpreter_precise; const ppu_decoder s_ppu_interpreter_fast; +struct ppu_addr_hash +{ + u32 operator()(u32 value) const + { + return value / sizeof(32); + } +}; + +static std::unordered_map s_ppu_compiled; + + + std::string PPUThread::get_name() const { return fmt::format("PPU[0x%x] Thread (%s)", id, name); @@ -90,6 +103,16 @@ void PPUThread::cpu_task() return custom_task(*this); } + if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm) + { + const auto found = s_ppu_compiled.find(pc); + + if (found != s_ppu_compiled.end()) + { + return found->second(); + } + } + g_tls_log_prefix = [] { const auto cpu = static_cast(get_current_cpu_thread()); @@ -298,3 +321,733 @@ void PPUThread::fast_call(u32 addr, u32 rtoc) // handle_interrupt(); //} } + +#ifdef LLVM_AVAILABLE +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +//#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +//#include "llvm/Support/Host.h" +#include "llvm/Support/FormattedStream.h" +//#include "llvm/Support/Debug.h" +//#include "llvm/CodeGen/CommandFlags.h" +//#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/LLVMContext.h" +//#include "llvm/IR/Dominators.h" +#include "llvm/IR/Verifier.h" +//#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LegacyPassManager.h" +//#include "llvm/IR/Module.h" +//#include "llvm/IR/Function.h" +//#include "llvm/Analysis/Passes.h" +//#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +//#include "llvm/Analysis/LoopInfo.h" +//#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Vectorize.h" + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +//#include "llvm/Object/ObjectFile.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#include "PPUTranslator.h" + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#include +#endif + +const ppu_decoder s_ppu_itype; + +extern u64 get_timebased_time(); +extern void ppu_execute_syscall(PPUThread& ppu, u64 code); +extern void ppu_execute_function(PPUThread& ppu, u32 index); + +extern __m128 sse_exp2_ps(__m128 A); +extern __m128 sse_log2_ps(__m128 A); +extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C); +extern __m128i sse_altivec_lvsl(u64 addr); +extern __m128i sse_altivec_lvsr(u64 addr); +extern __m128i sse_cellbe_lvlx(u64 addr); +extern __m128i sse_cellbe_lvrx(u64 addr); +extern void sse_cellbe_stvlx(u64 addr, __m128i a); +extern void sse_cellbe_stvrx(u64 addr, __m128i a); + +struct Listener final : llvm::JITEventListener +{ + virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override + { + const llvm::StringRef elf = obj.getData(); + fs::file(fs::get_config_dir() + "LLVM.obj", fs::rewrite) + .write(elf.data(), elf.size()); + } +}; + +static Listener s_listener; + +// Memory size: 512 MB +static const u64 s_memory_size = 0x20000000; + +// Try to reserve a portion of virtual memory in the first 2 GB address space, if possible. +static void* const s_memory = []() -> void* +{ +#ifdef _WIN32 + for (u64 addr = 0x1000000; addr <= 0x60000000; addr += 0x1000000) + { + if (VirtualAlloc((void*)addr, s_memory_size, MEM_RESERVE, PAGE_NOACCESS)) + { + return (void*)addr; + } + } + + return VirtualAlloc(NULL, s_memory_size, MEM_RESERVE, PAGE_NOACCESS); +#else + return ::mmap((void*)0x10000000, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); +#endif +}(); + +// EH frames +static u8* s_unwind_info; +static u64 s_unwind_size; + +#ifdef _WIN32 +// Custom .pdata section replacement +static std::vector s_unwind; +#endif + +struct MemoryManager final : llvm::RTDyldMemoryManager +{ + static PPUThread* context(u64 addr) + { + //trace(addr); + return static_cast(get_current_cpu_thread()); + } + + [[noreturn]] static void trap(u64 addr) + { + LOG_ERROR(PPU, "Trap! (0x%llx)", addr); + throw fmt::exception("Trap! (0x%llx)", addr); + } + + static void trace(u64 addr) + { + LOG_NOTICE(PPU, "Trace: 0x%llx", addr); + } + + static void hack(u32 index) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + ppu_execute_function(ppu, index); + if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily + } + + static void syscall(u64 code) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + ppu_execute_syscall(ppu, code); + if (ppu.state.load() && ppu.check_status()) throw cpu_state::ret; // Temporarily + } + + static u32 tbl() + { + return (u32)get_timebased_time(); + } + + static void call(u32 addr) + { + const auto found = s_ppu_compiled.find(addr); + + if (found != s_ppu_compiled.end()) + { + return found->second(); + } + + const auto op = vm::read32(addr).value(); + const auto itype = s_ppu_itype.decode(op); + + // Allow HLE callbacks without compiling them + if (itype == ppu_itype::HACK && vm::read32(addr + 4) == ppu_instructions::BLR()) + { + return hack(op & 0x3ffffff); + } + + trap(addr); + } + + static __m128 sse_rcp_ps(__m128 A) + { + return _mm_rcp_ps(A); + } + + static __m128 sse_rsqrt_ps(__m128 A) + { + return _mm_rsqrt_ps(A); + } + + static float sse_rcp_ss(float A) + { + _mm_store_ss(&A, _mm_rcp_ss(_mm_load_ss(&A))); + return A; + } + + static float sse_rsqrt_ss(float A) + { + _mm_store_ss(&A, _mm_rsqrt_ss(_mm_load_ss(&A))); + return A; + } + + static u32 lwarx(u32 addr) + { + be_t reg_value; + vm::reservation_acquire(®_value, addr, sizeof(reg_value)); + return reg_value; + } + + static u64 ldarx(u32 addr) + { + be_t reg_value; + vm::reservation_acquire(®_value, addr, sizeof(reg_value)); + return reg_value; + } + + static bool stwcx(u32 addr, u32 reg_value) + { + const be_t data = reg_value; + return vm::reservation_update(addr, &data, sizeof(data)); + } + + static bool stdcx(u32 addr, u64 reg_value) + { + const be_t data = reg_value; + return vm::reservation_update(addr, &data, sizeof(data)); + } + + static bool sraw_carry(s32 arg, u64 shift) + { + return (arg < 0) && (shift > 31 || (arg >> shift << shift) != arg); + } + + static bool srad_carry(s64 arg, u64 shift) + { + return (arg < 0) && (shift > 63 || (arg >> shift << shift) != arg); + } + + static bool adde_carry(u64 a, u64 b, bool c) + { + return _addcarry_u64(c, a, b, nullptr) != 0; + } + + // Interpreter call for simple vector instructions + static __m128i vec3op(decltype(&ppu_interpreter::UNK) func, __m128i _a, __m128i _b, __m128i _c) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + ppu.VR[21].vi = _a; + ppu.VR[22].vi = _b; + ppu.VR[23].vi = _c; + + ppu_opcode_t op{}; + op.vd = 20; + op.va = 21; + op.vb = 22; + op.vc = 23; + func(ppu, op); + + return ppu.VR[20].vi; + } + + // Interpreter call for simple vector instructions with immediate + static __m128i veciop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, __m128i _b) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + ppu.VR[22].vi = _b; + + op.vd = 20; + op.vb = 22; + func(ppu, op); + + return ppu.VR[20].vi; + } + + // Interpreter call for FP instructions + static f64 fpop(decltype(&ppu_interpreter::UNK) func, f64 _a, f64 _b, f64 _c) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + ppu.FPR[21] = _a; + ppu.FPR[22] = _b; + ppu.FPR[23] = _c; + + ppu_opcode_t op{}; + op.frd = 20; + op.fra = 21; + op.frb = 22; + op.frc = 23; + func(ppu, op); + + return ppu.FPR[20]; + } + + // Interpreter call for GPR instructions writing result to RA + static u64 aimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + const u64 a = ppu.GPR[op.ra]; + const u64 s = ppu.GPR[op.rs]; + ppu.GPR[op.rs] = _s; + + func(ppu, op); + + const u64 r = ppu.GPR[op.ra]; + ppu.GPR[op.ra] = a; + ppu.GPR[op.rs] = s; + return r; + } + + // Interpreter call for GPR instructions writing result to RA + static u64 aimmbop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _b) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + const u64 a = ppu.GPR[op.ra]; + const u64 s = ppu.GPR[op.rs]; + const u64 b = ppu.GPR[op.rb]; + ppu.GPR[op.rs] = _s; + ppu.GPR[op.rb] = _b; + + func(ppu, op); + + const u64 r = ppu.GPR[op.ra]; + ppu.GPR[op.ra] = a; + ppu.GPR[op.rs] = s; + ppu.GPR[op.rb] = b; + return r; + } + + // Interpreter call for GPR instructions writing result to RA (destructive) + static u64 aaimmop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _s, u64 _a) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + const u64 s = ppu.GPR[op.rs]; + const u64 a = ppu.GPR[op.ra]; + ppu.GPR[op.rs] = _s; + ppu.GPR[op.ra] = _a; + + func(ppu, op); + + const u64 r = ppu.GPR[op.ra]; + ppu.GPR[op.rs] = s; + ppu.GPR[op.ra] = a; + return r; + } + + static u64 immaop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + const u64 a = ppu.GPR[op.ra]; + const u64 d = ppu.GPR[op.rd]; + ppu.GPR[op.ra] = _a; + + func(ppu, op); + + const u64 r = ppu.GPR[op.rd]; + ppu.GPR[op.ra] = a; + ppu.GPR[op.rd] = d; + return r; + } + + static u64 immabop(decltype(&ppu_interpreter::UNK) func, ppu_opcode_t op, u64 _a, u64 _b) + { + PPUThread& ppu = static_cast(*get_current_cpu_thread()); + const u64 a = ppu.GPR[op.ra]; + const u64 b = ppu.GPR[op.rb]; + const u64 d = ppu.GPR[op.rd]; + ppu.GPR[op.ra] = _a; + ppu.GPR[op.rb] = _b; + + func(ppu, op); + + const u64 r = ppu.GPR[op.rd]; + ppu.GPR[op.ra] = a; + ppu.GPR[op.rb] = b; + ppu.GPR[op.rd] = d; + return r; + } + + // No operation on specific u64 value (silly optimization barrier) + static u64 nop64(u64 value) + { + return value; + } + + std::unordered_map table + { + { "__memory", (u64)vm::base(0) }, + { "__context", (u64)&context }, + { "__trap", (u64)&trap }, + { "__trace", (u64)&trace }, + { "__hlecall", (u64)&hack }, + { "__syscall", (u64)&syscall }, + { "__get_tbl", (u64)&tbl }, + { "__call", (u64)&call }, + { "__lwarx", (u64)&lwarx }, + { "__ldarx", (u64)&ldarx }, + { "__stwcx", (u64)&stwcx }, + { "__stdcx", (u64)&stdcx }, + { "__sraw_get_ca", (u64)&sraw_carry }, + { "__srad_get_ca", (u64)&srad_carry }, + { "__adde_get_ca", (u64)&adde_carry }, + { "__vexptefp", (u64)&sse_exp2_ps }, + { "__vlogefp", (u64)&sse_log2_ps }, + { "__vperm", (u64)&sse_altivec_vperm }, + { "__vrefp", (u64)&sse_rcp_ps }, + { "__vrsqrtefp", (u64)&sse_rsqrt_ps }, + { "__vec3op", (u64)&vec3op }, + { "__veciop", (u64)&veciop }, + { "__aimmop", (u64)&aimmop }, + { "__aimmbop", (u64)&aimmbop }, + { "__aaimmop", (u64)&aaimmop }, + { "__immaop", (u64)&immaop }, + { "__immabop", (u64)&immabop }, + { "__fpop", (u64)&fpop }, + { "__nop64", (u64)&nop64 }, + { "__lvsl", (u64)&sse_altivec_lvsl }, + { "__lvsr", (u64)&sse_altivec_lvsr }, + { "__lvlx", (u64)&sse_cellbe_lvlx }, + { "__lvrx", (u64)&sse_cellbe_lvrx }, + { "__stvlx", (u64)&sse_cellbe_stvlx }, + { "__stvrx", (u64)&sse_cellbe_stvrx }, + { "__fre", (u64)&sse_rcp_ss }, + { "__frsqrte", (u64)&sse_rsqrt_ss }, + }; + + virtual u64 getSymbolAddress(const std::string& name) override + { + if (u64 addr = RTDyldMemoryManager::getSymbolAddress(name)) + { + LOG_ERROR(GENERAL, "LLVM: Linkage requested %s -> 0x%016llx", name, addr); + return addr; + } + + const auto found = table.find(name); + + if (found != table.end()) + { + return found->second; + } + + LOG_FATAL(GENERAL, "LLVM: Linkage failed for %s", name); + return (u64)trap; + } + + virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override + { + // Simple allocation (TODO) + const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096); + +#ifdef _WIN32 + if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) +#else + if (::mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC)) +#endif + { + LOG_FATAL(GENERAL, "LLVM: Failed to allocate code section '%s', error %u", sec_name.data(), GetLastError()); + return nullptr; + } + + LOG_SUCCESS(GENERAL, "LLVM: Code section '%s' allocated -> 0x%p", sec_name.data(), ptr); + return (u8*)ptr; + } + + virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override + { + // Simple allocation (TODO) + const auto ptr = m_next; m_next = (void*)::align((u64)m_next + size, 4096); + +#ifdef _WIN32 + if (!VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE)) +#else + if (::mprotect(ptr, size, PROT_READ | PROT_WRITE)) +#endif + { + LOG_FATAL(GENERAL, "LLVM: Failed to allocate data section '%s', error %u", sec_name.data(), GetLastError()); + return nullptr; + } + + LOG_SUCCESS(GENERAL, "LLVM: Data section '%s' allocated -> 0x%p", sec_name.data(), ptr); + return (u8*)ptr; + } + + virtual bool finalizeMemory(std::string* = nullptr) override + { + // TODO: make sections read-only when necessary + return false; + } + + virtual void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override + { + s_unwind_info = addr; + s_unwind_size = size; + + return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size); + } + + virtual void deregisterEHFrames(u8* addr, u64 load_addr, std::size_t size) override + { + LOG_ERROR(GENERAL, "deregisterEHFrames() called"); // Not expected + + return RTDyldMemoryManager::deregisterEHFrames(addr, load_addr, size); + } + + ~MemoryManager() + { +#ifdef _WIN32 + if (!RtlDeleteFunctionTable(s_unwind.data())) + { + LOG_FATAL(GENERAL, "RtlDeleteFunctionTable(addr=0x%p) failed! Error %u", s_unwind_info, GetLastError()); + } + + if (!VirtualFree(s_memory, 0, MEM_DECOMMIT)) + { + LOG_FATAL(GENERAL, "VirtualFree(0x%p) failed! Error %u", s_memory, GetLastError()); + } +#else + if (::mprotect(s_memory, s_memory_size, PROT_NONE)) + { + LOG_FATAL(GENERAL, "mprotect(0x%p) failed! Error %d", s_memory, errno); + } + + // TODO: unregister EH frames if necessary +#endif + } + +private: + void* m_next = s_memory; +}; + +llvm::LLVMContext g_context; + +extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) +{ + if (!s_memory) + { + throw std::runtime_error("LLVM: Memory not allocated, report to the developers." HERE); + } + + if (g_cfg_ppu_decoder.get() != ppu_decoder_type::llvm || funcs.empty()) + { + return; + } + + using namespace llvm; + + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + LLVMLinkInMCJIT(); + + // Initialization + const auto _pi8 = Type::getInt8PtrTy(g_context); + const auto _void = Type::getVoidTy(g_context); + const auto _func = FunctionType::get(Type::getVoidTy(g_context), false); + + // Create LLVM module + std::unique_ptr module = std::make_unique(name, g_context); + + // Initialize target + module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); + + // Initialize translator + std::unique_ptr translator = std::make_unique(g_context, module.get(), 0, entry); + + // Initialize function list + for (const auto& info : funcs) + { + if (info.second) + { + translator->AddFunction(info.first, cast(module->getOrInsertFunction(fmt::format("__sub_%x", info.first), _func))); + } + + translator->AddBlockInfo(info.first); + } + + legacy::FunctionPassManager pm(module.get()); + + // Basic optimizations + pm.add(createCFGSimplificationPass()); + pm.add(createPromoteMemoryToRegisterPass()); + pm.add(createEarlyCSEPass()); + pm.add(createTailCallEliminationPass()); + pm.add(createReassociatePass()); + pm.add(createInstructionCombiningPass()); + //pm.add(new DominatorTreeWrapperPass()); + //pm.add(createInstructionCombiningPass()); + //pm.add(new MemoryDependenceAnalysis()); + pm.add(createDeadStoreEliminationPass()); + //pm.add(createGVNPass()); + //pm.add(createBBVectorizePass()); + //pm.add(new LoopInfo()); + //pm.add(new ScalarEvolution()); + + pm.add(createSCCPPass()); + //pm.addPass(new SyscallAnalysisPass()); // Requires constant propagation + pm.add(createInstructionCombiningPass()); + pm.add(createAggressiveDCEPass()); + pm.add(createCFGSimplificationPass()); + //pm.add(createLintPass()); // Check + + // Translate functions + for (const auto& info : funcs) + { + if (info.second) + { + pm.run(*translator->TranslateToIR(info.first, info.first + info.second, vm::_ptr(info.first))); + } + } + + //static auto s_current = &PPUTranslator::UNK; + + //for (const auto& info : s_test) + //{ + // const u64 pseudo_addr = (u64)&info.first + INT64_MIN; + + // s_current = info.second; + // const auto func = translator->TranslateToIR(pseudo_addr, pseudo_addr, nullptr, [](PPUTranslator* _this) + // { + // (_this->*s_current)(op); + // _this->ReturnFromFunction(); + // }); + + // pm.run(*func); + //} + + legacy::PassManager mpm; + + // Remove unused functions, structs, global variables, etc + mpm.add(createStripDeadPrototypesPass()); + mpm.run(*module); + + std::string result; + raw_string_ostream out(result); + + out << *module; // print IR + fs::file(fs::get_config_dir() + "LLVM.log", fs::rewrite) + .write(out.str()); + + result.clear(); + + if (verifyModule(*module, &out)) + { + out.flush(); + LOG_ERROR(PPU, "{%s} LLVM: Translation failed:\n%s", name, result); + return; + } + + LOG_SUCCESS(PPU, "LLVM: %zu functions generated", module->getFunctionList().size()); + + Module* module_ptr = module.get(); + + std::shared_ptr engine(EngineBuilder(std::move(module)) + .setErrorStr(&result) + .setMCJITMemoryManager(std::make_unique()) + .setOptLevel(llvm::CodeGenOpt::Aggressive) + .setRelocationModel(Reloc::PIC_) + .setCodeModel((u64)s_memory <= 0x60000000 ? CodeModel::Medium : CodeModel::Large) + .setMCPU(sys::getHostCPUName()) + .create()); + + if (!engine) + { + throw fmt::exception("LLVM: Failed to create ExecutionEngine: %s", result); + } + + engine->setProcessAllSections(true); + //engine->setVerifyModules(true); + engine->RegisterJITEventListener(&s_listener); + engine->finalizeObject(); + + s_ppu_compiled.clear(); + + // Get function addresses + for (const auto& info : funcs) + { + if (info.second) + { + const std::uintptr_t link = engine->getFunctionAddress(fmt::format("__sub_%x", info.first)); + s_ppu_compiled.emplace(info.first, (void(*)())link); + + LOG_NOTICE(PPU, "** Function __sub_%x -> 0x%llx (addr=0x%x, size=0x%x)", info.first, link, info.first, info.second); + } + } + + // Delete IR to lower memory consumption + for (auto& func : module_ptr->functions()) + { + func.deleteBody(); + } + +#ifdef _WIN32 + // Register .xdata UNWIND_INFO (.pdata section is empty for some reason) + std::set func_set; + + for (const auto& pair : s_ppu_compiled) + { + // Get addresses + func_set.emplace((u64)pair.second); + } + + func_set.emplace(::align(*--func_set.end() + 4096, 4096)); + + const u64 base = (u64)s_memory; + const u8* bits = s_unwind_info; + + s_unwind.clear(); + s_unwind.reserve(s_ppu_compiled.size()); + + for (auto it = func_set.begin(), end = --func_set.end(); it != end; it++) + { + const u64 addr = *it; + const u64 next = *func_set.upper_bound(addr); + + // Generate RUNTIME_FUNCTION record + RUNTIME_FUNCTION uw; + uw.BeginAddress = static_cast(addr - base); + uw.EndAddress = static_cast(next - base); + uw.UnwindData = static_cast((u64)bits - base); + s_unwind.emplace_back(uw); + + // Parse .xdata record + VERIFY(*bits++ == 1); // Version and flags + bits++; // Size of prolog + const u8 count = *bits++; // Count of unwind codes + bits++; // Frame Reg + Off + bits += ::align(count, 2) * sizeof(u16); // UNWIND_CODE array + while (!*bits && bits < s_unwind_info + s_unwind_size) bits++; // Skip strange zero padding (???) + } + + VERIFY(bits == s_unwind_info + s_unwind_size); + VERIFY(RtlAddFunctionTable(s_unwind.data(), (DWORD)s_unwind.size(), base)); + LOG_SUCCESS(GENERAL, "LLVM: UNWIND_INFO registered (addr=0x%p, size=0x%llx)", s_unwind_info, s_unwind_size); +#endif + + fxm::import(WRAP_EXPR(engine)); + + LOG_SUCCESS(PPU, "LLVM: Compilation finished (%s)", sys::getHostCPUName().data()); +} + +#else + +extern void ppu_initialize(const std::string& name, const std::vector>& funcs, u32 entry) +{ +} + +#endif diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 8307245196..6e3ae11888 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -1,8 +1,8 @@ #pragma once -#include "Emu/Cell/Common.h" -#include "Emu/CPU/CPUThread.h" -#include "Emu/Memory/vm.h" +#include "Common.h" +#include "../CPU/CPUThread.h" +#include "../Memory/vm.h" class PPUThread final : public cpu_thread { diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp new file mode 100644 index 0000000000..ceb0487640 --- /dev/null +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -0,0 +1,4247 @@ +#ifdef LLVM_AVAILABLE + +#include "PPUTranslator.h" +#include "PPUThread.h" +#include "PPUInterpreter.h" + +#include "../Utilities/Log.h" + +using namespace llvm; + +const ppu_decoder s_ppu_decoder; + +/* Interpreter Call Macro */ + +#define VEC3OP(name) SetVr(op.vd, Call(GetType(), "__vec3op",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + GetVr(op.va, VrType::vi32),\ + GetVr(op.vb, VrType::vi32),\ + GetVr(op.vc, VrType::vi32))) + +#define VEC2OP(name) SetVr(op.vd, Call(GetType(), "__vec3op",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + GetVr(op.va, VrType::vi32),\ + GetVr(op.vb, VrType::vi32),\ + GetUndef())) + +#define VECIOP(name) SetVr(op.vd, Call(GetType(), "__veciop",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + m_ir->getInt32(op.opcode),\ + GetVr(op.vb, VrType::vi32))) + +#define FPOP(name) SetFpr(op.frd, Call(GetType(), "__fpop",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + GetFpr(op.fra),\ + GetFpr(op.frb),\ + GetFpr(op.frc))) + +#define AIMMOP(name) SetGpr(op.ra, Call(GetType(), "__aimmop",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + m_ir->getInt32(op.opcode),\ + GetGpr(op.rs))) + +#define AIMMBOP(name) SetGpr(op.ra, Call(GetType(), "__aimmbop",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + m_ir->getInt32(op.opcode),\ + GetGpr(op.rs),\ + GetGpr(op.rb))) + +#define AAIMMOP(name) SetGpr(op.ra, Call(GetType(), "__aaimmop",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + m_ir->getInt32(op.opcode),\ + GetGpr(op.rs),\ + GetGpr(op.ra))) + +#define IMMAOP(name) SetGpr(op.rd, Call(GetType(), "__immaop",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + m_ir->getInt32(op.opcode),\ + GetGpr(op.ra))) + +#define IMMABOP(name) SetGpr(op.rd, Call(GetType(), "__immabop",\ + m_ir->getInt64((u64)&ppu_interpreter_fast::name),\ + m_ir->getInt32(op.opcode),\ + GetGpr(op.ra),\ + GetGpr(op.rb))) + +PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, u64 base, u64 entry) + : m_context(context) + , m_module(module) + , m_base_addr(base) + , m_is_be(false) + , m_pure_attr(AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadNone})) +{ + // Memory base + m_base = new GlobalVariable(*module, ArrayType::get(GetType(), 0x100000000), false, GlobalValue::ExternalLinkage, 0, "__memory"); + + // Thread context struct (TODO: safer member access) + std::vector thread_struct{ArrayType::get(GetType(), OFFSET_32(PPUThread, GPR))}; + + thread_struct.insert(thread_struct.end(), 32, GetType()); // GPR[0..31] + thread_struct.insert(thread_struct.end(), 32, GetType()); // FPR[0..31] + thread_struct.insert(thread_struct.end(), 32, GetType()); // VR[0..31] + thread_struct.insert(thread_struct.end(), 32, GetType()); // CR[0..31] + + m_thread_type = StructType::create(m_context, thread_struct, "context_t"); +} + +PPUTranslator::~PPUTranslator() +{ +} + +void PPUTranslator::AddFunction(u64 addr, Function* func, FunctionType* type) +{ + if (!m_func_types.emplace(addr, type).second || !m_func_list.emplace(addr, func).second) + { + throw fmt::exception("AddFunction(0x%08llx: %s) failed: function already exists", addr, func->getName().data()); + } +} + +void PPUTranslator::AddBlockInfo(u64 addr) +{ + m_block_info.emplace(addr); +} + +Function* PPUTranslator::TranslateToIR(u64 start_addr, u64 end_addr, be_t* bin, void(*custom)(PPUTranslator*)) +{ + m_function = m_func_list[start_addr]; + m_function_type = m_func_types[start_addr]; + m_start_addr = start_addr; + m_end_addr = end_addr; + m_blocks.clear(); + m_value_usage.clear(); + + IRBuilder<> builder(BasicBlock::Create(m_context, "__entry", m_function)); + m_ir = &builder; + + /* Create context variables */ + m_thread = Call(m_thread_type->getPointerTo(), AttributeSet::get(m_context, AttributeSet::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadOnly}), "__context", m_ir->getInt64(start_addr)); + + // Non-volatile registers with special meaning (TODO) + m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".sp"); + m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 2, ".rtoc"); + m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 13, ".tls"); + + // Registers used for args or results (TODO) + for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + i, fmt::format(".r%u", i)); + for (u32 i = 1; i <= 13; i++) m_g_fpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 33 + i, fmt::format(".f%u", i)); + for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 65 + i, fmt::format(".v%u", i)); + + /* Create local variables */ + for (u32 i = 0; i < 32; i++) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType(), nullptr, fmt::format(".r%d", i)); + for (u32 i = 0; i < 32; i++) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType(), nullptr, fmt::format(".f%d", i)); + for (u32 i = 0; i < 32; i++) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType(), nullptr, 16, fmt::format(".v%d", i))); + + for (u32 i = 0; i < 32; i++) + { + static const char* const names[] + { + "lt", + "gt", + "eq", + "so", + }; + + //m_cr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 97 + i, fmt::format("cr%u.%s", i / 4, names[i % 4])); + m_cr[i] = m_ir->CreateAlloca(GetType(), 0, fmt::format("cr%u.%s", i / 4, names[i % 4])); + } + + m_reg_lr = m_ir->CreateAlloca(GetType(), nullptr, ".lr"); + m_reg_ctr = m_ir->CreateAlloca(GetType(), nullptr, ".ctr"); + m_reg_vrsave = m_ir->CreateAlloca(GetType(), nullptr, ".vrsave"); + + m_xer_so = m_ir->CreateAlloca(GetType(), nullptr, "xer.so"); + m_xer_ov = m_ir->CreateAlloca(GetType(), nullptr, "xer.ov"); + m_xer_ca = m_ir->CreateAlloca(GetType(), nullptr, ".carry"); + m_xer_count = m_ir->CreateAlloca(GetType(), nullptr, "xer.count"); + + m_vscr_nj = m_ir->CreateAlloca(GetType(), nullptr, "vscr.nj"); + m_vscr_sat = m_ir->CreateAlloca(GetType(), nullptr, "vscr.sat"); + + //m_fpscr_fx = m_fpscr[0] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.fx"); + //m_fpscr_ox = m_fpscr[3] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ox"); + //m_fpscr_ux = m_fpscr[4] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ux"); + //m_fpscr_zx = m_fpscr[5] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.zx"); + //m_fpscr_xx = m_fpscr[6] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.xx"); + //m_fpscr_vxsnan = m_fpscr[7] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxsnan"); + //m_fpscr_vxisi = m_fpscr[8] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxisi"); + //m_fpscr_vxidi = m_fpscr[9] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxidi"); + //m_fpscr_vxzdz = m_fpscr[10] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxzdz"); + //m_fpscr_vximz = m_fpscr[11] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vximz"); + //m_fpscr_vxvc = m_fpscr[12] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxvc"); + //m_fpscr_fr = m_fpscr[13] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.fr"); + //m_fpscr_fi = m_fpscr[14] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.fi"); + //m_fpscr_c = m_fpscr[15] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.c"); + m_fpscr_lt = m_fpscr[16] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.lt"); + m_fpscr_gt = m_fpscr[17] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.gt"); + m_fpscr_eq = m_fpscr[18] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.eq"); + m_fpscr_un = m_fpscr[19] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.un"); + //m_fpscr_reserved = m_fpscr[20] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.52"); + //m_fpscr_vxsoft = m_fpscr[21] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxsoft"); + //m_fpscr_vxsqrt = m_fpscr[22] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxsqrt"); + //m_fpscr_vxcvi = m_fpscr[23] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.vxcvi"); + //m_fpscr_ve = m_fpscr[24] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ve"); + //m_fpscr_oe = m_fpscr[25] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.oe"); + //m_fpscr_ue = m_fpscr[26] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ue"); + //m_fpscr_ze = m_fpscr[27] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ze"); + //m_fpscr_xe = m_fpscr[28] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.xe"); + //m_fpscr_ni = m_fpscr[29] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.ni"); + //m_fpscr_rnh = m_fpscr[30] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.rn.msb"); + //m_fpscr_rnl = m_fpscr[31] = m_ir->CreateAlloca(GetType(), nullptr, "fpscr.rn.lsb"); + + /* Initialize local variables */ + m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO + m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT + m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj); + + // TODO: only loaded r12 (extended argument for program initialization) + m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 12)), m_gpr[12]); + + m_jtr = BasicBlock::Create(m_context, "__jtr", m_function); + + /* Convert each instruction to LLVM IR */ + const auto start = GetBasicBlock(m_start_addr); + m_ir->CreateBr(start); + m_ir->SetInsertPoint(start); + + for (m_current_addr = start_addr; m_current_addr < end_addr;) + { + // Preserve current address (m_current_addr may be changed by the decoder) + const u64 addr = m_current_addr; + + // Translate opcode + const u32 op = *(m_bin = bin + (addr - start_addr) / sizeof(u32)); + (this->*(s_ppu_decoder.decode(op)))({op}); + + // Calculate next address if necessary + if (m_current_addr == addr) m_current_addr += sizeof(u32); + + // Get next block + const auto next = GetBasicBlock(m_current_addr); + + // Finalize current block if necessary (create branch to next address) + if (!m_ir->GetInsertBlock()->getTerminator()) + { + m_ir->CreateBr(next); + } + + // Start next block + m_ir->SetInsertPoint(next); + } + + // Run custom IR generation function + if (custom) custom(this); + + // Finalize past-the-end block + if (!m_ir->GetInsertBlock()->getTerminator()) + { + Call(GetType(), "__end", m_ir->getInt64(end_addr)); + m_ir->CreateUnreachable(); + } + + m_ir->SetInsertPoint(m_jtr); + + if (m_jtr->use_empty()) + { + m_ir->CreateUnreachable(); + } + else + { + // Get block entries + const std::vector cases{m_block_info.upper_bound(start_addr), m_block_info.lower_bound(end_addr)}; + + const auto _ctr = m_ir->CreateLoad(m_reg_ctr); + const auto _default = BasicBlock::Create(m_context, "__jtr.def", m_function); + const auto _switch = m_ir->CreateSwitch(_ctr, _default, ::size32(cases)); + + for (const u64 addr : cases) + { + _switch->addCase(m_ir->getInt64(addr), GetBasicBlock(addr)); + } + + m_ir->SetInsertPoint(_default); + Call(GetType(), "__call", _ctr); + m_ir->CreateRetVoid(); + } + + //for (auto i = inst_begin(*m_function), end = inst_end(*m_function); i != end;) + //{ + // const auto inst = &*i++; + + // // Remove unnecessary stores of global variables created by PrepareGlobalArguments() and similar functions + // if (const auto si = dyn_cast(inst)) + // { + // const auto g = dyn_cast(si->getOperand(1)); + + // if (g && m_value_usage[g] == 0) + // { + // si->eraseFromParent(); + // continue; + // } + // } + //} + + return m_function; +} + +Type* PPUTranslator::ScaleType(Type* type, s32 pow2) +{ + EXPECTS(type->getScalarType()->isIntegerTy()); + + const auto new_type = m_ir->getIntNTy(type->getScalarSizeInBits() * std::pow(2, pow2)); + return type->isVectorTy() ? VectorType::get(new_type, type->getVectorNumElements()) : cast(new_type); +} + +Value* PPUTranslator::DuplicateExt(Value* arg) +{ + const auto extended = ZExt(arg); + return m_ir->CreateOr(extended, m_ir->CreateShl(extended, arg->getType()->getScalarSizeInBits())); +} + +Value* PPUTranslator::RotateLeft(Value* arg, u64 n) +{ + return !n ? arg : m_ir->CreateOr(m_ir->CreateShl(arg, n), m_ir->CreateLShr(arg, arg->getType()->getScalarSizeInBits() - n)); +} + +Value* PPUTranslator::RotateLeft(Value* arg, Value* n) +{ + const u64 mask = arg->getType()->getScalarSizeInBits() - 1; + + return m_ir->CreateOr(m_ir->CreateShl(arg, m_ir->CreateAnd(n, mask)), m_ir->CreateLShr(arg, m_ir->CreateAnd(m_ir->CreateNeg(n), mask))); +} + +void PPUTranslator::CallFunction(u64 target, bool tail, Value* indirect) +{ + const auto func = indirect ? nullptr : m_func_list[target]; + + const auto callee_type = func ? m_func_types[target] : nullptr; + + // Prepare function arguments + std::vector args; + + if (!callee_type) + { + // Prepare args for untyped function + } + + // Call the function + const auto result = func ? m_ir->CreateCall(func, args) : Call(GetType(), "__call", indirect ? indirect : m_ir->getInt64(target)); + + if (!tail) + { + UndefineVolatileRegisters(); + } + + if (!callee_type) + { + // Get result from untyped function + } + + if (tail) + { + m_ir->CreateRetVoid(); + } +} + +void PPUTranslator::UndefineVolatileRegisters() +{ + const auto undef_i64 = GetUndef(); + const auto undef_f64 = GetUndef(); + const auto undef_vec = GetUndef(); + const auto undef_bool = GetUndef(); + + // Undefine local volatile registers + SetGpr(0, undef_i64); // r0 + SetFpr(0, undef_f64); // f0: volatile scratch register + SetVr(0, undef_vec); // v0: volatile scratch register + SetVr(1, undef_vec); // v1: volatile scratch register + + m_ir->CreateStore(undef_i64, m_reg_lr); // LR + m_ir->CreateStore(undef_i64, m_reg_ctr); // CTR + m_ir->CreateStore(undef_bool, m_xer_ca); // XER.CA + + m_ir->CreateStore(undef_bool, m_fpscr_lt); + m_ir->CreateStore(undef_bool, m_fpscr_gt); + m_ir->CreateStore(undef_bool, m_fpscr_eq); + m_ir->CreateStore(undef_bool, m_fpscr_un); + + SetCrField(0, undef_bool, undef_bool, undef_bool, undef_bool); // cr0 + SetCrField(1, undef_bool, undef_bool, undef_bool, undef_bool); // cr1 + SetCrField(5, undef_bool, undef_bool, undef_bool, undef_bool); // cr5 + SetCrField(6, undef_bool, undef_bool, undef_bool, undef_bool); // cr6 + SetCrField(7, undef_bool, undef_bool, undef_bool, undef_bool); // cr7 + + // Cannot undef sticky flags because it makes |= op meaningless + //m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO + //m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT +} + +BasicBlock* PPUTranslator::GetBasicBlock(u64 address) +{ + if (auto& block = m_blocks[address]) + { + return block; + } + else + { + return block = BasicBlock::Create(m_context, fmt::format("loc_%llx", address/* - m_start_addr*/), m_function); + } +} + +Value* PPUTranslator::Solid(Value* value) +{ + const u32 size = value->getType()->getPrimitiveSizeInBits(); + + /* Workarounds (casting bool vectors directly may produce invalid code) */ + + if (value->getType() == GetType()) + { + return m_ir->CreateBitCast(SExt(value, GetType()), m_ir->getIntNTy(128)); + } + + if (value->getType() == GetType()) + { + return m_ir->CreateBitCast(SExt(value, GetType()), m_ir->getIntNTy(128)); + } + + if (value->getType() == GetType()) + { + return m_ir->CreateBitCast(SExt(value, GetType()), m_ir->getIntNTy(128)); + } + + return m_ir->CreateBitCast(value, m_ir->getIntNTy(size)); +} + +Value* PPUTranslator::IsZero(Value* value) +{ + return m_ir->CreateIsNull(Solid(value)); +} + +Value* PPUTranslator::IsNotZero(Value* value) +{ + return m_ir->CreateIsNotNull(Solid(value)); +} + +Value* PPUTranslator::IsOnes(Value* value) +{ + value = Solid(value); + return m_ir->CreateICmpEQ(value, ConstantInt::getSigned(value->getType(), -1)); +} + +Value* PPUTranslator::IsNotOnes(Value* value) +{ + value = Solid(value); + return m_ir->CreateICmpNE(value, ConstantInt::getSigned(value->getType(), -1)); +} + +Value* PPUTranslator::Broadcast(Value* value, u32 count) +{ + if (const auto cv = dyn_cast(value)) + { + return ConstantVector::getSplat(count, cv); + } + + return m_ir->CreateVectorSplat(count, value); +} + +std::pair PPUTranslator::Saturate(Value* value, CmpInst::Predicate inst, Value* extreme) +{ + // Modify args + if (value->getType()->isVectorTy() && !extreme->getType()->isVectorTy()) + extreme = Broadcast(extreme, value->getType()->getVectorNumElements()); + if (extreme->getType()->isVectorTy() && !value->getType()->isVectorTy()) + value = Broadcast(value, extreme->getType()->getVectorNumElements()); + + // Compare args + const auto cmp = m_ir->CreateICmp(inst, value, extreme); + + // Return saturated result and saturation bitmask + return{m_ir->CreateSelect(cmp, extreme, value), cmp}; +} + +std::pair PPUTranslator::SaturateSigned(Value* value, u64 min, u64 max) +{ + const auto type = value->getType()->getScalarType(); + const auto sat_l = Saturate(value, ICmpInst::ICMP_SLT, ConstantInt::get(type, min, true)); + const auto sat_h = Saturate(sat_l.first, ICmpInst::ICMP_SGT, ConstantInt::get(type, max, true)); + + // Return saturated result and saturation bitmask + return{sat_h.first, m_ir->CreateOr(sat_l.second, sat_h.second)}; +} + +Value* PPUTranslator::Scale(Value* value, s32 scale) +{ + if (scale) + { + const auto type = value->getType(); + const auto power = std::pow(2, scale); + + if (type->isVectorTy()) + { + return m_ir->CreateFMul(value, ConstantVector::getSplat(type->getVectorNumElements(), ConstantFP::get(type->getVectorElementType(), power))); + } + else + { + return m_ir->CreateFMul(value, ConstantFP::get(type, power)); + } + } + + return value; +} + +Value* PPUTranslator::Shuffle(Value* left, Value* right, std::initializer_list indices) +{ + const auto type = left->getType(); + + if (!right) + { + right = UndefValue::get(type); + } + + if (!m_is_be) + { + std::vector data; data.reserve(indices.size()); + + const u32 mask = type->getVectorNumElements() - 1; + + // Transform indices (works for vectors with size 2^N) + for (std::size_t i = 0; i < indices.size(); i++) + { + data.push_back(indices.end()[~i] ^ mask); + } + + return m_ir->CreateShuffleVector(left, right, ConstantDataVector::get(m_context, data)); + } + + return m_ir->CreateShuffleVector(left, right, ConstantDataVector::get(m_context, { indices.begin(), indices.end() })); +} + +Value* PPUTranslator::SExt(Value* value, Type* type) +{ + return m_ir->CreateSExt(value, type ? type : ScaleType(value->getType(), 1)); +} + +Value* PPUTranslator::ZExt(Value* value, Type* type) +{ + return m_ir->CreateZExt(value, type ? type : ScaleType(value->getType(), 1)); +} + +Value* PPUTranslator::Add(std::initializer_list args) +{ + Value* result{}; + for (auto arg : args) + { + result = result ? m_ir->CreateAdd(result, arg) : arg; + } + + return result; +} + +Value* PPUTranslator::Trunc(Value* value, Type* type) +{ + return m_ir->CreateTrunc(value, type ? type : ScaleType(value->getType(), -1)); +} + +void PPUTranslator::UseCondition(Value* cond) +{ + if (cond) + { + const auto local = BasicBlock::Create(m_context, fmt::format("loc_%llx.cond", m_current_addr/* - m_start_addr*/), m_function); + m_ir->CreateCondBr(cond, local, GetBasicBlock(m_current_addr + 4)); + m_ir->SetInsertPoint(local); + } +} + +llvm::Value* PPUTranslator::GetMemory(llvm::Value* addr, llvm::Type* type) +{ + return m_ir->CreateBitCast(m_ir->CreateGEP(m_base, {m_ir->getInt64(0), addr}), type->getPointerTo()); +} + +Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align) +{ + const auto size = type->getPrimitiveSizeInBits(); + + if (is_be ^ m_is_be && size > 8) + { + // Read, byteswap, bitcast + const auto int_type = m_ir->getIntNTy(size); + const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), align, !IsStackAddr(addr)); + return m_ir->CreateBitCast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type); + } + + // Read normally + return m_ir->CreateAlignedLoad(GetMemory(addr, type), align, !IsStackAddr(addr)); +} + +void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align) +{ + const auto type = value->getType(); + const auto size = type->getPrimitiveSizeInBits(); + + if (is_be ^ m_is_be && size > 8) + { + // Bitcast, byteswap + const auto int_type = m_ir->getIntNTy(size); + value = Call(int_type, fmt::format("llvm.bswap.i%u", size), m_ir->CreateBitCast(value, int_type)); + } + + // Write + m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), align, !IsStackAddr(addr)); +} + +void PPUTranslator::CompilationError(const std::string& error) +{ + LOG_ERROR(PPU, "0x%08llx: Error: %s", m_current_addr, error); +} + + +void PPUTranslator::MFVSCR(ppu_opcode_t op) +{ + const auto vscr = m_ir->CreateOr(ZExt(m_ir->CreateLoad(m_vscr_sat), GetType()), m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_vscr_nj), GetType()), 16)); + SetVr(op.vd, m_ir->CreateInsertElement(ConstantVector::getSplat(4, m_ir->getInt32(0)), vscr, m_ir->getInt32(m_is_be ? 3 : 0))); +} + +void PPUTranslator::MTVSCR(ppu_opcode_t op) +{ + const auto vscr = m_ir->CreateExtractElement(GetVr(op.vb, VrType::vi32), m_ir->getInt32(m_is_be ? 3 : 0)); + m_ir->CreateStore(Trunc(m_ir->CreateLShr(vscr, 16), GetType()), m_vscr_nj); + m_ir->CreateStore(Trunc(vscr, GetType()), m_vscr_sat); +} + +void PPUTranslator::VADDCUW(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); + SetVr(op.vd, m_ir->CreateLShr(m_ir->CreateAdd(ab[0], ab[1]), 32)); +} + +void PPUTranslator::VADDFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + SetVr(op.vd, m_ir->CreateFAdd(ab[0], ab[1])); +} + +void PPUTranslator::VADDSBS(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi8, op.va, op.vb)); + const auto result = m_ir->CreateAdd(ab[0], ab[1]); + const auto saturated = SaturateSigned(result, -0x80, 0x7f); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VADDSHS(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi16, op.va, op.vb)); + const auto result = m_ir->CreateAdd(ab[0], ab[1]); + const auto saturated = SaturateSigned(result, -0x8000, 0x7fff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VADDSWS(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi32, op.va, op.vb)); + const auto result = m_ir->CreateAdd(ab[0], ab[1]); + const auto saturated = SaturateSigned(result, -0x80000000ll, 0x7fffffff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VADDUBM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAdd(ab[0], ab[1])); +} + +void PPUTranslator::VADDUBS(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi8, op.va, op.vb)); + const auto result = m_ir->CreateAdd(ab[0], ab[1]); + const auto saturated = Saturate(result, ICmpInst::ICMP_UGT, m_ir->getInt16(0xff)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VADDUHM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAdd(ab[0], ab[1])); +} + +void PPUTranslator::VADDUHS(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi16, op.va, op.vb)); + const auto result = m_ir->CreateAdd(ab[0], ab[1]); + const auto saturated = Saturate(result, ICmpInst::ICMP_UGT, m_ir->getInt32(0xffff)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VADDUWM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAdd(ab[0], ab[1])); +} + +void PPUTranslator::VADDUWS(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); + const auto result = m_ir->CreateAdd(ab[0], ab[1]); + const auto saturated = Saturate(result, ICmpInst::ICMP_UGT, m_ir->getInt64(0xffffffff)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VAND(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAnd(ab[0], ab[1])); +} + +void PPUTranslator::VANDC(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAnd(ab[0], m_ir->CreateNot(ab[1]))); +} + +#define AVG_OP(a, b) m_ir->CreateLShr(m_ir->CreateSub(a, m_ir->CreateNot(b)), 1) /* (a + b + 1) >> 1 */ + +void PPUTranslator::VAVGSB(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi8, op.va, op.vb)); + SetVr(op.vd, AVG_OP(ab[0], ab[1])); +} + +void PPUTranslator::VAVGSH(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi16, op.va, op.vb)); + SetVr(op.vd, AVG_OP(ab[0], ab[1])); +} + +void PPUTranslator::VAVGSW(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi32, op.va, op.vb)); + SetVr(op.vd, AVG_OP(ab[0], ab[1])); +} + +void PPUTranslator::VAVGUB(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi8, op.va, op.vb)); + SetVr(op.vd, AVG_OP(ab[0], ab[1])); +} + +void PPUTranslator::VAVGUH(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi16, op.va, op.vb)); + SetVr(op.vd, AVG_OP(ab[0], ab[1])); +} + +void PPUTranslator::VAVGUW(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); + SetVr(op.vd, AVG_OP(ab[0], ab[1])); +} + +void PPUTranslator::VCFSX(ppu_opcode_t op) +{ + const auto b = GetVr(op.vb, VrType::vi32); + SetVr(op.vd, Scale(m_ir->CreateSIToFP(b, GetType()), 0 - op.vuimm)); +} + +void PPUTranslator::VCFUX(ppu_opcode_t op) +{ + const auto b = GetVr(op.vb, VrType::vi32); + SetVr(op.vd, Scale(m_ir->CreateUIToFP(b, GetType()), 0 - op.vuimm)); +} + +void PPUTranslator::VCMPBFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + const auto nle = m_ir->CreateFCmpUGT(ab[0], ab[1]); + const auto nge = m_ir->CreateFCmpULT(ab[0], m_ir->CreateFNeg(ab[1])); + const auto le_bit = m_ir->CreateShl(ZExt(nle, GetType()), 31); + const auto ge_bit = m_ir->CreateShl(ZExt(nge, GetType()), 30); + const auto result = m_ir->CreateOr(le_bit, ge_bit); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, m_ir->getFalse(), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPEQFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + const auto result = m_ir->CreateFCmpOEQ(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPEQUB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + const auto result = m_ir->CreateICmpEQ(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPEQUH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto result = m_ir->CreateICmpEQ(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPEQUW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto result = m_ir->CreateICmpEQ(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGEFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + const auto result = m_ir->CreateFCmpOGE(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGTFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + const auto result = m_ir->CreateFCmpOGT(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGTSB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + const auto result = m_ir->CreateICmpSGT(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGTSH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto result = m_ir->CreateICmpSGT(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGTSW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto result = m_ir->CreateICmpSGT(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGTUB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + const auto result = m_ir->CreateICmpUGT(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGTUH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto result = m_ir->CreateICmpUGT(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +void PPUTranslator::VCMPGTUW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto result = m_ir->CreateICmpUGT(ab[0], ab[1]); + SetVr(op.vd, result); + if (op.oe) SetCrField(6, IsOnes(result), m_ir->getFalse(), IsZero(result), m_ir->getFalse()); +} + +#define FP_SAT_OP(fcmp, value) m_ir->CreateSelect(fcmp, cast(cast(fcmp)->getOperand(1)), value) + +void PPUTranslator::VCTSXS(ppu_opcode_t op) +{ + const auto b = GetVr(op.vb, VrType::vf); + const auto scaled = Scale(b, op.vuimm); + const auto is_nan = m_ir->CreateFCmpUNO(b, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); // NaN -> 0.0 + const auto sat_l = m_ir->CreateFCmpOLT(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), -std::pow(2, 31)))); // TODO ??? + const auto sat_h = m_ir->CreateFCmpOGE(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), std::pow(2, 31)))); + const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, FP_SAT_OP(is_nan, scaled)), GetType()); + SetVr(op.vd, m_ir->CreateSelect(sat_h, ConstantVector::getSplat(4, m_ir->getInt32(0x7fffffff)), converted)); + SetSat(IsNotZero(m_ir->CreateOr(sat_l, sat_h))); +} + +void PPUTranslator::VCTUXS(ppu_opcode_t op) +{ + const auto b = GetVr(op.vb, VrType::vf); + const auto scaled = Scale(b, op.vuimm); + const auto is_nan = m_ir->CreateFCmpUNO(b, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); // NaN -> 0.0 + const auto sat_l = m_ir->CreateFCmpOLT(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), 0.0))); + const auto sat_h = m_ir->CreateFCmpOGE(scaled, ConstantVector::getSplat(4, ConstantFP::get(GetType(), std::pow(2, 32)))); // TODO ??? + const auto converted = m_ir->CreateFPToUI(FP_SAT_OP(sat_l, FP_SAT_OP(is_nan, scaled)), GetType()); + SetVr(op.vd, m_ir->CreateSelect(sat_h, ConstantVector::getSplat(4, m_ir->getInt32(0xffffffff)), converted)); + SetSat(IsNotZero(m_ir->CreateOr(sat_l, sat_h))); +} + +void PPUTranslator::VEXPTEFP(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), m_pure_attr, "__vexptefp", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VLOGEFP(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), m_pure_attr, "__vlogefp", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VMADDFP(ppu_opcode_t op) +{ + const auto acb = GetVrs(VrType::vf, op.va, op.vc, op.vb); + SetVr(op.vd, m_ir->CreateFAdd(m_ir->CreateFMul(acb[0], acb[1]), acb[2])); + //SetVr(op.vd, Call(GetType(), "llvm.fmuladd.v4f32", acb[0], acb[1], acb[2])); +} + +void PPUTranslator::VMAXFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + SetVr(op.vd, m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])); +} + +void PPUTranslator::VMAXSB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_SLT, ab[1]).first); +} + +void PPUTranslator::VMAXSH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_SLT, ab[1]).first); +} + +void PPUTranslator::VMAXSW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_SLT, ab[1]).first); +} + +void PPUTranslator::VMAXUB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_ULT, ab[1]).first); +} + +void PPUTranslator::VMAXUH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_ULT, ab[1]).first); +} + +void PPUTranslator::VMAXUW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_ULT, ab[1]).first); +} + +void PPUTranslator::VMHADDSHS(ppu_opcode_t op) +{ + const auto abc = SExt(GetVrs(VrType::vi16, op.va, op.vb, op.vc)); + const auto result = m_ir->CreateAdd(m_ir->CreateAShr(m_ir->CreateMul(abc[0], abc[1]), 15), abc[2]); + const auto saturated = SaturateSigned(result, -0x8000, 0x7fff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VMHRADDSHS(ppu_opcode_t op) +{ + const auto abc = SExt(GetVrs(VrType::vi16, op.va, op.vb, op.vc)); + const auto result = m_ir->CreateAdd(m_ir->CreateAShr(m_ir->CreateAdd(m_ir->CreateMul(abc[0], abc[1]), ConstantVector::getSplat(8, m_ir->getInt32(0x4000))), 15), abc[2]); + const auto saturated = SaturateSigned(result, -0x8000, 0x7fff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VMINFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + SetVr(op.vd, m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])); +} + +void PPUTranslator::VMINSB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_SGT, ab[1]).first); +} + +void PPUTranslator::VMINSH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_SGT, ab[1]).first); +} + +void PPUTranslator::VMINSW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_SGT, ab[1]).first); +} + +void PPUTranslator::VMINUB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_UGT, ab[1]).first); +} + +void PPUTranslator::VMINUH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_UGT, ab[1]).first); +} + +void PPUTranslator::VMINUW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, Saturate(ab[0], CmpInst::ICMP_UGT, ab[1]).first); +} + +void PPUTranslator::VMLADDUHM(ppu_opcode_t op) +{ + const auto abc = GetVrs(VrType::vi16, op.va, op.vb, op.vc); + SetVr(op.vd, m_ir->CreateAdd(m_ir->CreateMul(abc[0], abc[1]), abc[2])); +} + +void PPUTranslator::VMRGHB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, Shuffle(ab[0], ab[1], { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 })); +} + +void PPUTranslator::VMRGHH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, Shuffle(ab[0], ab[1], { 0, 8, 1, 9, 2, 10, 3, 11 })); +} + +void PPUTranslator::VMRGHW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, Shuffle(ab[0], ab[1], { 0, 4, 1, 5 })); +} + +void PPUTranslator::VMRGLB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, Shuffle(ab[0], ab[1], { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 })); +} + +void PPUTranslator::VMRGLH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, Shuffle(ab[0], ab[1], { 4, 12, 5, 13, 6, 14, 7, 15 })); +} + +void PPUTranslator::VMRGLW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, Shuffle(ab[0], ab[1], { 2, 6, 3, 7 })); +} + +void PPUTranslator::VMSUMMBM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + const auto a = SExt(ab[0], GetType()); + const auto b = ZExt(ab[1], GetType()); + const auto p = m_ir->CreateMul(a, b); + const auto c = GetVr(op.vc, VrType::vi32); + const auto e0 = Shuffle(p, nullptr, { 0, 4, 8, 12 }); + const auto e1 = Shuffle(p, nullptr, { 1, 5, 9, 13 }); + const auto e2 = Shuffle(p, nullptr, { 2, 6, 10, 14 }); + const auto e3 = Shuffle(p, nullptr, { 3, 7, 11, 15 }); + SetVr(op.vd, Add({ c, e0, e1, e2, e3 })); +} + +void PPUTranslator::VMSUMSHM(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi16, op.va, op.vb)); + const auto p = m_ir->CreateMul(ab[0], ab[1]); + const auto c = GetVr(op.vc, VrType::vi32); + const auto e0 = Shuffle(p, nullptr, { 0, 2, 4, 6 }); + const auto e1 = Shuffle(p, nullptr, { 1, 3, 5, 7 }); + SetVr(op.vd, Add({ c, e0, e1 })); +} + +void PPUTranslator::VMSUMSHS(ppu_opcode_t op) +{ + // TODO (very rare) + /**/ return_ VEC3OP(VMSUMSHS); + //const auto a = GetVr(op.va, VrType::vi16); + //const auto b = GetVr(op.vb, VrType::vi16); + //const auto c = GetVr(op.vc, VrType::vi32); + //SetVr(op.vd, Call(GetType(), m_pure_attr, "__vmsumshs", a, b, c)); + //SetSat(Call(GetType(), m_pure_attr, "__vmsumshs_get_sat", a, b, c)); +} + +void PPUTranslator::VMSUMUBM(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi8, op.va, op.vb), GetType()); + const auto p = m_ir->CreateMul(ab[0], ab[1]); + const auto c = GetVr(op.vc, VrType::vi32); + const auto e0 = Shuffle(p, nullptr, { 0, 4, 8, 12 }); + const auto e1 = Shuffle(p, nullptr, { 1, 5, 9, 13 }); + const auto e2 = Shuffle(p, nullptr, { 2, 6, 10, 14 }); + const auto e3 = Shuffle(p, nullptr, { 3, 7, 11, 15 }); + SetVr(op.vd, Add({ c, e0, e1, e2, e3 })); +} + +void PPUTranslator::VMSUMUHM(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi16, op.va, op.vb)); + const auto p = m_ir->CreateMul(ab[0], ab[1]); + const auto c = GetVr(op.vc, VrType::vi32); + const auto e0 = Shuffle(p, nullptr, { 0, 2, 4, 6 }); + const auto e1 = Shuffle(p, nullptr, { 1, 3, 5, 7 }); + SetVr(op.vd, Add({ c, e0, e1 })); +} + +void PPUTranslator::VMSUMUHS(ppu_opcode_t op) +{ + // TODO (very rare) + /**/ return_ VEC3OP(VMSUMUHS); + //const auto a = GetVr(op.va, VrType::vi16); + //const auto b = GetVr(op.vb, VrType::vi16); + //const auto c = GetVr(op.vc, VrType::vi32); + //SetVr(op.vd, Call(GetType(), m_pure_attr, "__vmsumuhs", a, b, c)); + //SetSat(Call(GetType(), m_pure_attr, "__vmsumuhs_get_sat", a, b, c)); +} + +void PPUTranslator::VMULESB(ppu_opcode_t op) +{ + // TODO + const auto ab = SExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 0, 2, 4, 6, 8, 10, 12, 14 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VMULESH(ppu_opcode_t op) +{ + // TODO + const auto ab = SExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 0, 2, 4, 6 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VMULEUB(ppu_opcode_t op) +{ + // TODO + const auto ab = ZExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 0, 2, 4, 6, 8, 10, 12, 14 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VMULEUH(ppu_opcode_t op) +{ + // TODO + const auto ab = ZExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 0, 2, 4, 6 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VMULOSB(ppu_opcode_t op) +{ + // TODO + const auto ab = SExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 1, 3, 5, 7, 9, 11, 13, 15 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VMULOSH(ppu_opcode_t op) +{ + // TODO + const auto ab = SExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 1, 3, 5, 7 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VMULOUB(ppu_opcode_t op) +{ + // TODO + const auto ab = ZExt(Shuffle(GetVrs(VrType::vi8, op.va, op.vb), {}, { 1, 3, 5, 7, 9, 11, 13, 15 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VMULOUH(ppu_opcode_t op) +{ + // TODO + const auto ab = ZExt(Shuffle(GetVrs(VrType::vi16, op.va, op.vb), {}, { 1, 3, 5, 7 })); + SetVr(op.vd, m_ir->CreateMul(ab[0], ab[1])); +} + +void PPUTranslator::VNMSUBFP(ppu_opcode_t op) +{ + const auto acb = GetVrs(VrType::vf, op.va, op.vc, op.vb); + SetVr(op.vd, m_ir->CreateFSub(acb[2], m_ir->CreateFMul(acb[0], acb[1]))); + //SetVr(op.vd, m_ir->CreateFNeg(Call(GetType(), "llvm.fmuladd.v4f32", acb[0], acb[1], m_ir->CreateFNeg(acb[2])))); +} + +void PPUTranslator::VNOR(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateNot(m_ir->CreateOr(ab[0], ab[1]))); +} + +void PPUTranslator::VOR(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateOr(ab[0], ab[1])); +} + +void PPUTranslator::VPERM(ppu_opcode_t op) +{ + const auto abc = GetVrs(VrType::vi8, op.va, op.vb, op.vc); + SetVr(op.vd, Call(GetType(), m_pure_attr, "__vperm", abc[0], abc[1], abc[2])); +} + +void PPUTranslator::VPKPX(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto px = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7 }); + const auto e1 = m_ir->CreateLShr(m_ir->CreateAnd(px, 0x01f80000), 9); + const auto e2 = m_ir->CreateLShr(m_ir->CreateAnd(px, 0xf800), 6); + const auto e3 = m_ir->CreateLShr(m_ir->CreateAnd(px, 0xf8), 3); + SetVr(op.vd, m_ir->CreateOr(m_ir->CreateOr(e1, e2), e3)); +} + +void PPUTranslator::VPKSHSS(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }); + const auto saturated = SaturateSigned(src, -0x80, 0x7f); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VPKSHUS(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }); + const auto saturated = SaturateSigned(src, 0, 0xff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VPKSWSS(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7 }); + const auto saturated = SaturateSigned(src, -0x8000, 0x7fff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VPKSWUS(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7 }); + const auto saturated = SaturateSigned(src, 0, 0xffff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VPKUHUM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }); + SetVr(op.vd, src); // Truncate +} + +void PPUTranslator::VPKUHUS(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }); + const auto saturated = Saturate(src, ICmpInst::ICMP_UGT, m_ir->getInt16(0xff)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VPKUWUM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7 }); + SetVr(op.vd, src); // Truncate +} + +void PPUTranslator::VPKUWUS(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto src = Shuffle(ab[0], ab[1], { 0, 1, 2, 3, 4, 5, 6, 7 }); + const auto saturated = Saturate(src, ICmpInst::ICMP_UGT, m_ir->getInt32(0xffff)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VREFP(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), m_pure_attr, "__vrefp", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VRFIM(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VRFIN(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VRFIP(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VRFIZ(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VRLB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, RotateLeft(ab[0], ab[1])); +} + +void PPUTranslator::VRLH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, RotateLeft(ab[0], ab[1])); +} + +void PPUTranslator::VRLW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, RotateLeft(ab[0], ab[1])); +} + +void PPUTranslator::VRSQRTEFP(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), m_pure_attr, "__vrsqrtefp", GetVr(op.vb, VrType::vf))); +} + +void PPUTranslator::VSEL(ppu_opcode_t op) +{ + const auto abc = GetVrs(VrType::vi32, op.va, op.vb, op.vc); + SetVr(op.vd, m_ir->CreateOr(m_ir->CreateAnd(abc[1], abc[2]), m_ir->CreateAnd(abc[0], m_ir->CreateNot(abc[2])))); +} + +void PPUTranslator::VSL(ppu_opcode_t op) +{ + // TODO (very rare) + SetVr(op.vd, m_ir->CreateShl(GetVr(op.va, VrType::i128), m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 7))); +} + +void PPUTranslator::VSLB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, m_ir->CreateShl(ab[0], m_ir->CreateAnd(ab[1], 7))); +} + +void PPUTranslator::VSLDOI(ppu_opcode_t op) +{ + if (op.vsh == 0) + { + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, ab[0]); + } + else if ((op.vsh % 4) == 0) + { + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto s = op.vsh / 4; + SetVr(op.vd, Shuffle(ab[0], ab[1], { s, s + 1, s + 2, s + 3 })); + } + else if ((op.vsh % 2) == 0) + { + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + const auto s = op.vsh / 2; + SetVr(op.vd, Shuffle(ab[0], ab[1], { s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7 })); + } + else + { + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + const auto s = op.vsh; + SetVr(op.vd, Shuffle(ab[0], ab[1], { s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, s + 8, s + 9, s + 10, s + 11, s + 12, s + 13, s + 14, s + 15 })); + } +} + +void PPUTranslator::VSLH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateShl(ab[0], m_ir->CreateAnd(ab[1], 15))); +} + +void PPUTranslator::VSLO(ppu_opcode_t op) +{ + // TODO (rare) + SetVr(op.vd, m_ir->CreateShl(GetVr(op.va, VrType::i128), m_ir->CreateShl(m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 15), 3))); +} + +void PPUTranslator::VSLW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateShl(ab[0], m_ir->CreateAnd(ab[1], 31))); +} + +void PPUTranslator::VSPLTB(ppu_opcode_t op) +{ + const u32 ui = op.vuimm & 0xf; + SetVr(op.vd, Shuffle(GetVr(op.vb, VrType::vi8), nullptr, { ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui })); +} + +void PPUTranslator::VSPLTH(ppu_opcode_t op) +{ + const u32 ui = op.vuimm & 0x7; + SetVr(op.vd, Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { ui, ui, ui, ui, ui, ui, ui, ui })); +} + +void PPUTranslator::VSPLTISB(ppu_opcode_t op) +{ + SetVr(op.vd, ConstantVector::getSplat(16, m_ir->getInt8(op.vsimm))); +} + +void PPUTranslator::VSPLTISH(ppu_opcode_t op) +{ + SetVr(op.vd, ConstantVector::getSplat(8, m_ir->getInt16(op.vsimm))); +} + +void PPUTranslator::VSPLTISW(ppu_opcode_t op) +{ + SetVr(op.vd, ConstantVector::getSplat(4, m_ir->getInt32(op.vsimm))); +} + +void PPUTranslator::VSPLTW(ppu_opcode_t op) +{ + const u32 ui = op.vuimm & 0x3; + SetVr(op.vd, Shuffle(GetVr(op.vb, VrType::vi32), nullptr, { ui, ui, ui, ui })); +} + +void PPUTranslator::VSR(ppu_opcode_t op) +{ + // TODO (very rare) + SetVr(op.vd, m_ir->CreateLShr(GetVr(op.va, VrType::i128), m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 7))); +} + +void PPUTranslator::VSRAB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAShr(ab[0], m_ir->CreateAnd(ab[1], 7))); +} + +void PPUTranslator::VSRAH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAShr(ab[0], m_ir->CreateAnd(ab[1], 15))); +} + +void PPUTranslator::VSRAW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateAShr(ab[0], m_ir->CreateAnd(ab[1], 31))); +} + +void PPUTranslator::VSRB(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, m_ir->CreateLShr(ab[0], m_ir->CreateAnd(ab[1], 7))); +} + +void PPUTranslator::VSRH(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateLShr(ab[0], m_ir->CreateAnd(ab[1], 15))); +} + +void PPUTranslator::VSRO(ppu_opcode_t op) +{ + // TODO (very rare) + SetVr(op.vd, m_ir->CreateLShr(GetVr(op.va, VrType::i128), m_ir->CreateShl(m_ir->CreateAnd(GetVr(op.vb, VrType::i128), 15), 3))); +} + +void PPUTranslator::VSRW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateLShr(ab[0], m_ir->CreateAnd(ab[1], 31))); +} + +void PPUTranslator::VSUBCUW(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, ZExt(m_ir->CreateICmpUGE(ab[0], ab[1]), GetType())); +} + +void PPUTranslator::VSUBFP(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vf, op.va, op.vb); + SetVr(op.vd, m_ir->CreateFSub(ab[0], ab[1])); +} + +void PPUTranslator::VSUBSBS(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi8, op.va, op.vb)); + const auto result = m_ir->CreateSub(ab[0], ab[1]); + const auto saturated = SaturateSigned(result, -0x80, 0x7f); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VSUBSHS(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi16, op.va, op.vb)); + const auto result = m_ir->CreateSub(ab[0], ab[1]); + const auto saturated = SaturateSigned(result, -0x8000, 0x7fff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VSUBSWS(ppu_opcode_t op) +{ + const auto ab = SExt(GetVrs(VrType::vi32, op.va, op.vb)); + const auto result = m_ir->CreateSub(ab[0], ab[1]); + const auto saturated = SaturateSigned(result, -0x80000000ll, 0x7fffffff); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VSUBUBM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi8, op.va, op.vb); + SetVr(op.vd, m_ir->CreateSub(ab[0], ab[1])); +} + +void PPUTranslator::VSUBUBS(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi8, op.va, op.vb)); + const auto result = m_ir->CreateSub(ab[0], ab[1]); + const auto saturated = Saturate(result, ICmpInst::ICMP_SLT, m_ir->getInt16(0)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VSUBUHM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi16, op.va, op.vb); + SetVr(op.vd, m_ir->CreateSub(ab[0], ab[1])); +} + +void PPUTranslator::VSUBUHS(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi16, op.va, op.vb)); + const auto result = m_ir->CreateSub(ab[0], ab[1]); + const auto saturated = Saturate(result, ICmpInst::ICMP_SLT, m_ir->getInt32(0)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VSUBUWM(ppu_opcode_t op) +{ + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateSub(ab[0], ab[1])); +} + +void PPUTranslator::VSUBUWS(ppu_opcode_t op) +{ + const auto ab = ZExt(GetVrs(VrType::vi32, op.va, op.vb)); + const auto result = m_ir->CreateSub(ab[0], ab[1]); + const auto saturated = Saturate(result, ICmpInst::ICMP_SLT, m_ir->getInt64(0)); + SetVr(op.vd, saturated.first); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VSUMSWS(ppu_opcode_t op) +{ + // TODO (rare) + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto a = SExt(ab[0]); + const auto b = SExt(m_ir->CreateExtractElement(ab[1], m_ir->getInt32(m_is_be ? 3 : 0))); + const auto e0 = m_ir->CreateExtractElement(a, m_ir->getInt32(0)); + const auto e1 = m_ir->CreateExtractElement(a, m_ir->getInt32(1)); + const auto e2 = m_ir->CreateExtractElement(a, m_ir->getInt32(2)); + const auto e3 = m_ir->CreateExtractElement(a, m_ir->getInt32(3)); + const auto saturated = SaturateSigned(Add({ b, e0, e1, e2, e3 }), -0x80000000ll, 0x7fffffff); + SetVr(op.vd, ZExt(m_ir->CreateAnd(saturated.first, 0xffffffff))); + SetSat(saturated.second); +} + +void PPUTranslator::VSUM2SWS(ppu_opcode_t op) +{ + // TODO (rare) + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + const auto b = SExt(Shuffle(ab[1], nullptr, { 1, 3 })); + const auto a = SExt(ab[0]); + const auto e0 = Shuffle(a, nullptr, { 0, 2 }); + const auto e1 = Shuffle(a, nullptr, { 1, 3 }); + const auto saturated = SaturateSigned(Add({ b, e0, e1 }), -0x80000000ll, 0x7fffffff); + SetVr(op.vd, m_ir->CreateAnd(saturated.first, 0xffffffff)); + SetSat(IsNotZero(saturated.second)); +} + +void PPUTranslator::VSUM4SBS(ppu_opcode_t op) +{ + // TODO (very rare) + /**/ return_ VEC2OP(VSUM4SBS); + //const auto a = GetVr(op.va, VrType::vi8); + //const auto b = GetVr(op.vb, VrType::vi32); + //SetVr(op.vd, Call(GetType(), m_pure_attr, "__vsum4sbs", a, b)); + //SetSat(Call(GetType(), m_pure_attr, "__vsum4sbs_get_sat", a, b)); +} + +void PPUTranslator::VSUM4SHS(ppu_opcode_t op) +{ + // TODO (very rare) + /**/ return_ VEC2OP(VSUM4SHS); + //const auto a = GetVr(op.va, VrType::vi16); + //const auto b = GetVr(op.vb, VrType::vi32); + //SetVr(op.vd, Call(GetType(), m_pure_attr, "__vsum4shs", a, b)); + //SetSat(Call(GetType(), m_pure_attr, "__vsum4shs_get_sat", a, b)); +} + +void PPUTranslator::VSUM4UBS(ppu_opcode_t op) +{ + // TODO + const auto a = ZExt(GetVr(op.va, VrType::vi8), GetType()); + const auto b = GetVr(op.vb, VrType::vi32); + const auto e0 = Shuffle(a, nullptr, { 0, 4, 8, 12 }); + const auto e1 = Shuffle(a, nullptr, { 1, 5, 9, 13 }); + const auto e2 = Shuffle(a, nullptr, { 2, 6, 10, 14 }); + const auto e3 = Shuffle(a, nullptr, { 3, 7, 11, 15 }); + const auto r = Add({ b, e0, e1, e2, e3 }); // Summ, (e0+e1+e2+e3) is small + const auto s = m_ir->CreateICmpULT(r, b); // Carry (saturation) + SetVr(op.vd, m_ir->CreateSelect(s, ConstantVector::getSplat(4, m_ir->getInt32(0xffffffff)), r)); + SetSat(IsNotZero(s)); +} + +#define UNPACK_PIXEL_OP(px) m_ir->CreateOr(m_ir->CreateAnd(px, 0xff00001f), m_ir->CreateOr(m_ir->CreateAnd(m_ir->CreateShl(px, 6), 0x1f0000), m_ir->CreateAnd(m_ir->CreateShl(px, 3), 0x1f00))) + +void PPUTranslator::VUPKHPX(ppu_opcode_t op) +{ + const auto px = SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 0, 1, 2, 3 })); + SetVr(op.vd, UNPACK_PIXEL_OP(px)); +} + +void PPUTranslator::VUPKHSB(ppu_opcode_t op) +{ + SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi8), nullptr, { 0, 1, 2, 3, 4, 5, 6, 7 }))); +} + +void PPUTranslator::VUPKHSH(ppu_opcode_t op) +{ + SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 0, 1, 2, 3 }))); +} + +void PPUTranslator::VUPKLPX(ppu_opcode_t op) +{ + const auto px = SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 4, 5, 6, 7 })); + SetVr(op.vd, UNPACK_PIXEL_OP(px)); +} + +void PPUTranslator::VUPKLSB(ppu_opcode_t op) +{ + SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi8), nullptr, { 8, 9, 10, 11, 12, 13, 14, 15 }))); +} + +void PPUTranslator::VUPKLSH(ppu_opcode_t op) +{ + SetVr(op.vd, SExt(Shuffle(GetVr(op.vb, VrType::vi16), nullptr, { 4, 5, 6, 7 }))); +} + +void PPUTranslator::VXOR(ppu_opcode_t op) +{ + if (op.va == op.vb) + { + // Assign zero, break dependencies + SetVr(op.vd, ConstantVector::getSplat(4, m_ir->getInt32(0))); + return; + } + + const auto ab = GetVrs(VrType::vi32, op.va, op.vb); + SetVr(op.vd, m_ir->CreateXor(ab[0], ab[1])); +} + +void PPUTranslator::TDI(ppu_opcode_t op) +{ + UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra), m_ir->getInt64(op.simm16))); + Trap(m_current_addr); +} + +void PPUTranslator::TWI(ppu_opcode_t op) +{ + UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra, 32), m_ir->getInt32(op.simm16))); + Trap(m_current_addr); +} + +void PPUTranslator::MULLI(ppu_opcode_t op) +{ + SetGpr(op.rd, m_ir->CreateMul(GetGpr(op.ra), m_ir->getInt64(op.simm16))); +} + +void PPUTranslator::SUBFIC(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto imm = m_ir->getInt64(op.simm16); + const auto result = m_ir->CreateSub(imm, a); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateICmpULE(result, imm)); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", m_ir->CreateNot(a), imm, m_ir->getTrue())); +} + +void PPUTranslator::CMPLI(ppu_opcode_t op) +{ + SetCrFieldUnsignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), op.l10 ? m_ir->getInt64(op.uimm16) : m_ir->getInt32(op.uimm16)); +} + +void PPUTranslator::CMPI(ppu_opcode_t op) +{ + SetCrFieldSignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), op.l10 ? m_ir->getInt64(op.simm16) : m_ir->getInt32(op.simm16)); +} + +void PPUTranslator::ADDIC(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto imm = m_ir->getInt64(op.simm16); + const auto result = m_ir->CreateAdd(a, imm); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateICmpULT(result, imm)); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, imm, m_ir->getFalse())); + if (op.main & 1) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::ADDI(ppu_opcode_t op) +{ + const auto imm = m_ir->getInt64(op.simm16); + SetGpr(op.rd, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm); +} + +void PPUTranslator::ADDIS(ppu_opcode_t op) +{ + const auto imm = m_ir->getInt64(op.simm16 << 16); + SetGpr(op.rd, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm); +} + +void PPUTranslator::BC(ppu_opcode_t op) +{ + const u64 target = ppu_branch_target(op.aa ? 0 : m_current_addr, op.simm16); + + const auto cond = CheckBranchCondition(op.bo, op.bi); + + if ((target > m_start_addr && target < m_end_addr) || (target == m_start_addr && !op.lk)) + { + // Local branch + + if (op.lk) + { + CompilationError("BCL: local branch"); + Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); + m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_reg_lr); + } + else if (cond) + { + m_ir->CreateCondBr(cond, GetBasicBlock(target), GetBasicBlock(m_current_addr + 4)); + return; + } + else + { + m_ir->CreateBr(GetBasicBlock(target)); + return; + } + } + + // External branch + UseCondition(cond); + CallFunction(target, !op.lk); +} + +void PPUTranslator::HACK(ppu_opcode_t op) +{ + Call(GetType(), "__hlecall", m_ir->getInt32(op.opcode & 0x3ffffff)); + UndefineVolatileRegisters(); +} + +void PPUTranslator::SC(ppu_opcode_t op) +{ + Call(GetType(), fmt::format(op.lev == 0 ? "__syscall" : "__lv%ucall", +op.lev), m_ir->CreateLoad(m_gpr[11])); + UndefineVolatileRegisters(); +} + +void PPUTranslator::B(ppu_opcode_t op) +{ + const u64 target = ppu_branch_target(op.aa ? 0 : m_current_addr, op.ll); + + if ((target > m_start_addr && target < m_end_addr) || (target == m_start_addr && !op.lk)) + { + // Local branch + + if (op.lk) + { + CompilationError("BL: local branch"); + Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); + m_ir->CreateStore(m_ir->getInt64(m_current_addr + 4), m_reg_lr); + } + else + { + m_ir->CreateBr(GetBasicBlock(target)); + return; + } + } + + // External branch or recursive call + CallFunction(target, !op.lk); +} + +void PPUTranslator::MCRF(ppu_opcode_t op) +{ + const auto le = GetCrb(op.crfs * 4 + 0); + const auto ge = GetCrb(op.crfs * 4 + 1); + const auto eq = GetCrb(op.crfs * 4 + 2); + const auto so = GetCrb(op.crfs * 4 + 3); + SetCrField(op.crfd, le, ge, eq, so); +} + +void PPUTranslator::BCLR(ppu_opcode_t op) +{ + UseCondition(CheckBranchCondition(op.bo, op.bi)); + + if (op.lk) + { + // Sort of indirect call + CallFunction(0, false, m_ir->CreateLoad(m_reg_lr)); + } + else + { + // Simple return + m_ir->CreateRetVoid(); + } +} + +void PPUTranslator::CRNOR(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateNot(m_ir->CreateOr(GetCrb(op.crba), GetCrb(op.crbb)))); +} + +void PPUTranslator::CRANDC(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateAnd(GetCrb(op.crba), m_ir->CreateNot(GetCrb(op.crbb)))); +} + +void PPUTranslator::ISYNC(ppu_opcode_t op) +{ + m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent); +} + +void PPUTranslator::CRXOR(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateXor(GetCrb(op.crba), GetCrb(op.crbb))); +} + +void PPUTranslator::DCBI(ppu_opcode_t op) +{ +} + +void PPUTranslator::CRNAND(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateNot(m_ir->CreateAnd(GetCrb(op.crba), GetCrb(op.crbb)))); +} + +void PPUTranslator::CRAND(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateAnd(GetCrb(op.crba), GetCrb(op.crbb))); +} + +void PPUTranslator::CREQV(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateNot(m_ir->CreateXor(GetCrb(op.crba), GetCrb(op.crbb)))); +} + +void PPUTranslator::CRORC(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateOr(GetCrb(op.crba), m_ir->CreateNot(GetCrb(op.crbb)))); +} + +void PPUTranslator::CROR(ppu_opcode_t op) +{ + SetCrb(op.crbd, m_ir->CreateOr(GetCrb(op.crba), GetCrb(op.crbb))); +} + +void PPUTranslator::BCCTR(ppu_opcode_t op) +{ + UseCondition(CheckBranchCondition(op.bo | 0x4, op.bi)); + + const auto jt_addr = m_current_addr + 4; + const auto jt_data = m_bin + 1; + + // Detect a possible jumptable + for (u64 i = 0, addr = jt_addr; addr < m_end_addr; i++, addr += sizeof(u32)) + { + const u64 target = jt_addr + static_cast(jt_data[i]); + + // Check jumptable entry conditions + if (target % 4 || target < m_start_addr || target >= m_end_addr) + { + if (i >= 2) + { + // Fix next instruction address + m_current_addr = addr; + + if (!op.lk) + { + // Get sorted set of possible targets + const std::set cases(jt_data, jt_data + i); + + // Create switch with special default case + const auto _default = BasicBlock::Create(m_context, fmt::format("loc_%llx.def", m_current_addr/* - m_start_addr*/), m_function); + const auto _switch = m_ir->CreateSwitch(m_ir->CreateLoad(m_reg_ctr), _default, ::size32(cases)); + + for (const s32 offset : cases) + { + const u64 target = jt_addr + offset; + _switch->addCase(m_ir->getInt64(target), GetBasicBlock(target)); + } + + m_ir->SetInsertPoint(_default); + Trap(m_current_addr); + return; + } + else + { + CompilationError("BCCTRL with a jt"); + } + } + + break; + } + } + + if (!op.lk) + { + // Indirect branch + m_ir->CreateBr(m_jtr); + } + else + { + // Indirect call + CallFunction(0, false, m_ir->CreateLoad(m_reg_ctr)); + } +} + +void PPUTranslator::RLWIMI(ppu_opcode_t op) +{ + const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); + Value* result; + + if (op.mb32 <= op.me32) + { + if (op.mb32 == 0 && op.me32 == 31) + { + result = RotateLeft(GetGpr(op.rs, 32), op.sh32); + } + else if (op.mb32 == 0 && op.sh32 == 31 - op.me32) + { + result = m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32); + } + else if (op.me32 == 31 && op.sh32 == 32 - op.mb32) + { + result = m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32); + } + else if (op.mb32 == 0 && op.sh32 < 31 - op.me32) + { + // INSLWI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32), mask); + } + else if (op.me32 == 31 && 32 - op.sh32 < op.mb32) + { + // INSRWI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32), mask); + } + else + { + // Generic op + result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), op.sh32), mask); + } + + // Extend 32-bit op result + result = ZExt(result); + } + else + { + // Full 64-bit op with duplication + result = m_ir->CreateAnd(RotateLeft(DuplicateExt(GetGpr(op.rs, 32)), op.sh32), mask); + } + + if (mask != -1) + { + // Insertion + result = m_ir->CreateOr(result, m_ir->CreateAnd(GetGpr(op.ra), ~mask)); + } + + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLWINM(ppu_opcode_t op) +{ + const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); + Value* result; + + if (op.mb32 <= op.me32) + { + if (op.mb32 == 0 && op.me32 == 31) + { + // ROTLWI, ROTRWI mnemonics + result = RotateLeft(GetGpr(op.rs, 32), op.sh32); + } + else if (op.mb32 == 0 && op.sh32 == 31 - op.me32) + { + // SLWI mnemonic + result = m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32); + } + else if (op.me32 == 31 && op.sh32 == 32 - op.mb32) + { + // SRWI mnemonic + result = m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32); + } + else if (op.mb32 == 0 && op.sh32 < 31 - op.me32) + { + // EXTLWI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32), mask); + } + else if (op.me32 == 31 && 32 - op.sh32 < op.mb32) + { + // EXTRWI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32), mask); + } + else + { + // Generic op, including CLRLWI, CLRRWI mnemonics + result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), op.sh32), mask); + } + + // Extend 32-bit op result + result = ZExt(result); + } + else + { + // Full 64-bit op with duplication + result = m_ir->CreateAnd(RotateLeft(DuplicateExt(GetGpr(op.rs, 32)), op.sh32), mask); + } + + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLWNM(ppu_opcode_t op) +{ + const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); + Value* result; + + if (op.mb32 <= op.me32) + { + if (op.mb32 == 0 && op.me32 == 31) + { + // ROTLW mnemonic + result = RotateLeft(GetGpr(op.rs, 32), GetGpr(op.rb, 32)); + } + else + { + // Generic op + result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), GetGpr(op.rb, 32)), mask); + } + + // Extend 32-bit op result + result = ZExt(result); + } + else + { + // Full 64-bit op with duplication + result = m_ir->CreateAnd(RotateLeft(DuplicateExt(GetGpr(op.rs, 32)), GetGpr(op.rb)), mask); + } + + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::ORI(ppu_opcode_t op) +{ + SetGpr(op.ra, m_ir->CreateOr(GetGpr(op.rs), op.uimm16)); +} + +void PPUTranslator::ORIS(ppu_opcode_t op) +{ + SetGpr(op.ra, m_ir->CreateOr(GetGpr(op.rs), op.uimm16 << 16)); +} + +void PPUTranslator::XORI(ppu_opcode_t op) +{ + SetGpr(op.ra, m_ir->CreateXor(GetGpr(op.rs), op.uimm16)); +} + +void PPUTranslator::XORIS(ppu_opcode_t op) +{ + SetGpr(op.ra, m_ir->CreateXor(GetGpr(op.rs), op.uimm16 << 16)); +} + +void PPUTranslator::ANDI(ppu_opcode_t op) +{ + const auto result = m_ir->CreateAnd(GetGpr(op.rs), op.uimm16); + SetGpr(op.ra, result); + SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::ANDIS(ppu_opcode_t op) +{ + const auto result = m_ir->CreateAnd(GetGpr(op.rs), op.uimm16 << 16); + SetGpr(op.ra, result); + SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLDICL(ppu_opcode_t op) +{ + const u32 sh = op.sh64; + const u32 mb = op.mbe64; + const u64 mask = ~0ull >> mb; + Value* result; + + if (64 - sh < mb) + { + // EXTRDI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateLShr(GetGpr(op.rs), 64 - sh), mask); + } + else if (64 - sh == mb) + { + // SRDI mnemonic + result = m_ir->CreateLShr(GetGpr(op.rs), 64 - sh); + } + else + { + // Generic op, including CLRLDI mnemonic + result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask); + } + + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLDICR(ppu_opcode_t op) +{ + const u32 sh = op.sh64; + const u32 me = op.mbe64; + const u64 mask = ~0ull << (63 - me); + Value* result; + + if (sh < 63 - me) + { + // EXTLDI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs), sh), mask); + } + else if (sh == 63 - me) + { + // SLDI mnemonic + result = m_ir->CreateShl(GetGpr(op.rs), sh); + } + else + { + // Generic op, including CLRRDI mnemonic + result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask); + } + + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLDIC(ppu_opcode_t op) +{ + const u32 sh = op.sh64; + const u32 mb = op.mbe64; + const u64 mask = ppu_rotate_mask(mb, 63 - sh); + Value* result; + + if (mb == 0 && sh == 0) + { + result = GetGpr(op.rs); + } + else if (mb <= 63 - sh) + { + // CLRLSLDI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs), sh), mask); + } + else + { + // Generic op + result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask); + } + + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLDIMI(ppu_opcode_t op) +{ + const u32 sh = op.sh64; + const u32 mb = op.mbe64; + const u64 mask = ppu_rotate_mask(mb, 63 - sh); + Value* result; + + if (mb == 0 && sh == 0) + { + result = GetGpr(op.rs); + } + else if (mb <= 63 - sh) + { + // INSRDI and other possible mnemonics + result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs), sh), mask); + } + else + { + // Generic op + result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask); + } + + if (mask != -1) + { + // Insertion + result = m_ir->CreateOr(result, m_ir->CreateAnd(GetGpr(op.ra), ~mask)); + } + + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLDCL(ppu_opcode_t op) +{ + const u32 mb = op.mbe64; + const u64 mask = ~0ull >> mb; + + const auto result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), GetGpr(op.rb)), mask); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::RLDCR(ppu_opcode_t op) +{ + const u32 me = op.mbe64; + const u64 mask = ~0ull << (63 - me); + + const auto result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), GetGpr(op.rb)), mask); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::CMP(ppu_opcode_t op) +{ + SetCrFieldSignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), GetGpr(op.rb, op.l10 ? 64 : 32)); +} + +void PPUTranslator::TW(ppu_opcode_t op) +{ + UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra, 32), GetGpr(op.rb, 32))); + Trap(m_current_addr); +} + +void PPUTranslator::LVSL(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), m_pure_attr, "__lvsl", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); +} + +void PPUTranslator::LVEBX(ppu_opcode_t op) +{ + const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); + const auto pos = m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15); + SetVr(op.vd, m_ir->CreateInsertElement(ConstantVector::getSplat(16, m_ir->getInt8(0)), ReadMemory(addr, GetType()), pos)); +} + +void PPUTranslator::SUBFC(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto result = m_ir->CreateSub(b, a); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateICmpULE(result, b)); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", m_ir->CreateNot(a), b, m_ir->getTrue())); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__subfc_get_ov", a, b)); +} + +void PPUTranslator::ADDC(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto result = m_ir->CreateAdd(a, b); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateICmpULT(result, b)); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, b, m_ir->getFalse())); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__addc_get_ov", a, b)); +} + +void PPUTranslator::MULHDU(ppu_opcode_t op) +{ + const auto a = ZExt(GetGpr(op.ra)); + const auto b = ZExt(GetGpr(op.rb)); + const auto result = Trunc(m_ir->CreateLShr(m_ir->CreateMul(a, b), 64)); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::MULHWU(ppu_opcode_t op) +{ + const auto a = ZExt(GetGpr(op.ra, 32)); + const auto b = ZExt(GetGpr(op.rb, 32)); + SetGpr(op.rd, m_ir->CreateLShr(m_ir->CreateMul(a, b), 32)); + if (op.rc) SetCrField(0, GetUndef(), GetUndef(), GetUndef()); +} + +void PPUTranslator::MFOCRF(ppu_opcode_t op) +{ + if (op.l11) + { + // MFOCRF + + const u64 pos = countLeadingZeros(op.crm, ZB_Width) - 24; + + if (pos >= 8 || 0x80 >> pos != op.crm) + { + CompilationError("MFOCRF: Undefined behaviour"); + SetGpr(op.rd, UndefValue::get(GetType())); + return; + } + } + else + { + // MFCR + } + + Value* result{}; + for (u32 i = 0; i < 8; i++) + { + if (!op.l11 || op.crm & (128 >> i)) + { + for (u32 b = i * 4; b < i * 4 + 4; b++) + { + const auto value = m_ir->CreateShl(ZExt(GetCrb(b), GetType()), 31 - b); + result = result ? m_ir->CreateOr(result, value) : value; + } + } + } + + SetGpr(op.rd, result); +} + +void PPUTranslator::LWARX(ppu_opcode_t op) +{ + SetGpr(op.rd, Call(GetType(), "__lwarx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); +} + +void PPUTranslator::LDX(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); +} + +void PPUTranslator::LWZX(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); +} + +void PPUTranslator::SLW(ppu_opcode_t op) +{ + const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x3f); + const auto shift_res = m_ir->CreateShl(GetGpr(op.rs), shift_num); + const auto result = m_ir->CreateAnd(shift_res, 0xffffffff); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::CNTLZW(ppu_opcode_t op) +{ + const auto result = Call(GetType(), "llvm.ctlz.i32", GetGpr(op.rs, 32), m_ir->getFalse()); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt32(0)); +} + +void PPUTranslator::SLD(ppu_opcode_t op) +{ + const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x7f); + const auto shift_arg = GetGpr(op.rs); + const auto result = Trunc(m_ir->CreateShl(ZExt(shift_arg), ZExt(shift_num))); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::AND(ppu_opcode_t op) +{ + const auto result = op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateAnd(GetGpr(op.rs), GetGpr(op.rb)); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::CMPL(ppu_opcode_t op) +{ + SetCrFieldUnsignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), GetGpr(op.rb, op.l10 ? 64 : 32)); +} + +void PPUTranslator::LVSR(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), m_pure_attr, "__lvsr", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); +} + +void PPUTranslator::LVEHX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -2); + const auto pos = m_ir->CreateLShr(m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15), 1); + SetVr(op.vd, m_ir->CreateInsertElement(ConstantVector::getSplat(8, m_ir->getInt16(0)), ReadMemory(addr, GetType(), true, 2), pos)); +} + +void PPUTranslator::SUBF(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto result = m_ir->CreateSub(b, a); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__subf_get_ov", a, b)); +} + +void PPUTranslator::LDUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::DCBST(ppu_opcode_t op) +{ +} + +void PPUTranslator::LWZUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::CNTLZD(ppu_opcode_t op) +{ + const auto result = Call(GetType(), "llvm.ctlz.i64", GetGpr(op.rs), m_ir->getFalse()); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::ANDC(ppu_opcode_t op) +{ + const auto result = m_ir->CreateAnd(GetGpr(op.rs), m_ir->CreateNot(GetGpr(op.rb))); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::TD(ppu_opcode_t op) +{ + UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra), GetGpr(op.rb))); + Trap(m_current_addr); +} + +void PPUTranslator::LVEWX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -4); + const auto pos = m_ir->CreateLShr(m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15), 2); + SetVr(op.vd, m_ir->CreateInsertElement(ConstantVector::getSplat(4, m_ir->getInt32(0)), ReadMemory(addr, GetType(), true, 4), pos)); + //Call(GetType(), "__trace", m_ir->getInt64(0)); +} + +void PPUTranslator::MULHD(ppu_opcode_t op) +{ + const auto a = SExt(GetGpr(op.ra)); // i128 + const auto b = SExt(GetGpr(op.rb)); + const auto result = Trunc(m_ir->CreateLShr(m_ir->CreateMul(a, b), 64)); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::MULHW(ppu_opcode_t op) +{ + const auto a = SExt(GetGpr(op.ra, 32)); + const auto b = SExt(GetGpr(op.rb, 32)); + SetGpr(op.rd, m_ir->CreateLShr(m_ir->CreateMul(a, b), 32)); + if (op.rc) SetCrField(0, GetUndef(), GetUndef(), GetUndef()); +} + +void PPUTranslator::LDARX(ppu_opcode_t op) +{ + SetGpr(op.rd, Call(GetType(), "__ldarx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); +} + +void PPUTranslator::DCBF(ppu_opcode_t op) +{ +} + +void PPUTranslator::LBZX(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); +} + +void PPUTranslator::LVX(ppu_opcode_t op) +{ + SetVr(op.vd, ReadMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -16), GetType(), true, 16)); +} + +void PPUTranslator::NEG(ppu_opcode_t op) +{ + const auto reg = GetGpr(op.ra); + const auto result = m_ir->CreateNeg(reg); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__neg_get_ov", reg)); +} + +void PPUTranslator::LBZUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::NOR(ppu_opcode_t op) +{ + const auto result = m_ir->CreateNot(op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateOr(GetGpr(op.rs), GetGpr(op.rb))); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::STVEBX(ppu_opcode_t op) +{ + const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); + WriteMemory(addr, m_ir->CreateExtractElement(GetVr(op.vs, VrType::vi8), m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15))); +} + +void PPUTranslator::SUBFE(ppu_opcode_t op) +{ + const auto a = m_ir->CreateNot(GetGpr(op.ra)); + const auto b = GetGpr(op.rb); + const auto c = GetCarry(); + const auto result = m_ir->CreateAdd(m_ir->CreateAdd(a, b), ZExt(c, GetType())); + SetGpr(op.rd, result); + SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, b, c)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__subfe_get_ov", a, b, c)); +} + +void PPUTranslator::ADDE(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto c = GetCarry(); + const auto result = m_ir->CreateAdd(m_ir->CreateAdd(a, b), ZExt(c, GetType())); + SetGpr(op.rd, result); + SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, b, c)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__adde_get_ov", a, b, c)); +} + +void PPUTranslator::MTOCRF(ppu_opcode_t op) +{ + if (op.l11) + { + // MTOCRF + const u64 pos = countLeadingZeros(op.crm, ZB_Width) - 24; + + if (pos >= 8 || 128 >> pos != op.crm) + { + CompilationError("MTOCRF: Undefined behaviour"); + return; + } + } + else + { + // MTCRF + } + + const auto value = GetGpr(op.rs); + + for (u32 i = 0; i < 8; i++) + { + if (op.crm & (128 >> i)) + { + for (u32 bit = i * 4; bit < i * 4 + 4; bit++) + { + SetCrb(bit, Trunc(m_ir->CreateLShr(value, 31 - bit), GetType())); + } + } + } +} + +void PPUTranslator::STDX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs)); +} + +void PPUTranslator::STWCX(ppu_opcode_t op) +{ + const auto bit = Call(GetType(), "__stwcx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32)); + SetCrField(0, m_ir->getFalse(), m_ir->getFalse(), bit); +} + +void PPUTranslator::STWX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32)); +} + +void PPUTranslator::STVEHX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -2); + WriteMemory(addr, m_ir->CreateExtractElement(GetVr(op.vs, VrType::vi16), m_ir->CreateLShr(m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15), 1)), true, 2); +} + +void PPUTranslator::STDUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + WriteMemory(addr, GetGpr(op.rs)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STWUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + WriteMemory(addr, GetGpr(op.rs, 32)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STVEWX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -4); + WriteMemory(addr, m_ir->CreateExtractElement(GetVr(op.vs, VrType::vi32), m_ir->CreateLShr(m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15), 2)), true, 4); +} + +void PPUTranslator::ADDZE(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto c = GetCarry(); + const auto result = m_ir->CreateAdd(a, ZExt(c, GetType())); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateICmpULT(result, a)); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, m_ir->getInt64(0), c)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__addze_get_ov", a, c)); +} + +void PPUTranslator::SUBFZE(ppu_opcode_t op) +{ + const auto a = m_ir->CreateNot(GetGpr(op.ra)); + const auto c = GetCarry(); + const auto result = m_ir->CreateAdd(a, ZExt(c, GetType())); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateICmpULT(result, a)); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, m_ir->getInt64(0), c)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__subfze_get_ov", a, c)); +} + +void PPUTranslator::STDCX(ppu_opcode_t op) +{ + const auto bit = Call(GetType(), "__stdcx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs)); + SetCrField(0, m_ir->getFalse(), m_ir->getFalse(), bit); +} + +void PPUTranslator::STBX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 8)); +} + +void PPUTranslator::STVX(ppu_opcode_t op) +{ + WriteMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -16), GetVr(op.vs, VrType::vi32), true, 16); +} + +void PPUTranslator::SUBFME(ppu_opcode_t op) +{ + const auto a = m_ir->CreateNot(GetGpr(op.ra)); + const auto c = GetCarry(); + const auto result = m_ir->CreateSub(a, ZExt(m_ir->CreateNot(c), GetType())); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateOr(c, IsNotZero(a))); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, m_ir->getInt64(-1), c)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__subfme_get_ov", a, c)); +} + +void PPUTranslator::MULLD(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto result = m_ir->CreateMul(a, b); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__mulld_get_ov", a, b)); +} + +void PPUTranslator::ADDME(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto c = GetCarry(); + const auto result = m_ir->CreateSub(a, ZExt(m_ir->CreateNot(c), GetType())); + SetGpr(op.rd, result); + SetCarry(m_ir->CreateOr(c, IsNotZero(a))); + //SetCarry(Call(GetType(), m_pure_attr, "__adde_get_ca", a, m_ir->getInt64(-1), c)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__addme_get_ov", a, c)); +} + +void PPUTranslator::MULLW(ppu_opcode_t op) +{ + const auto a = SExt(GetGpr(op.ra, 32)); + const auto b = SExt(GetGpr(op.rb, 32)); + const auto result = m_ir->CreateMul(a, b); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__mullw_get_ov", a, b)); +} + +void PPUTranslator::DCBTST(ppu_opcode_t op) +{ +} + +void PPUTranslator::STBUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + WriteMemory(addr, GetGpr(op.rs, 8)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::ADD(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto result = m_ir->CreateAdd(a, b); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(Call(GetType(), m_pure_attr, "__add_get_ov", a, b)); +} + +void PPUTranslator::DCBT(ppu_opcode_t op) +{ +} + +void PPUTranslator::LHZX(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); +} + +void PPUTranslator::EQV(ppu_opcode_t op) +{ + const auto result = m_ir->CreateNot(m_ir->CreateXor(GetGpr(op.rs), GetGpr(op.rb))); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::ECIWX(ppu_opcode_t op) +{ + SetGpr(op.rd, Call(GetType(), "__eciwx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); +} + +void PPUTranslator::LHZUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::XOR(ppu_opcode_t op) +{ + const auto result = op.rs == op.rb ? (Value*)m_ir->getInt64(0) : m_ir->CreateXor(GetGpr(op.rs), GetGpr(op.rb)); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::MFSPR(ppu_opcode_t op) +{ + Value* result; + switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5)) + { + case 0x001: // MFXER + result = ZExt(m_ir->CreateLoad(m_xer_count), GetType()); + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_xer_so), GetType()), 29)); + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_xer_ov), GetType()), 30)); + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(m_ir->CreateLoad(m_xer_ca), GetType()), 31)); + break; + case 0x008: // MFLR + result = m_ir->CreateLoad(m_reg_lr); + break; + case 0x009: // MFCTR + result = m_ir->CreateLoad(m_reg_ctr); + break; + case 0x100: + result = ZExt(m_ir->CreateLoad(m_reg_vrsave)); + break; + case 0x10C: // MFTB + result = ZExt(Call(GetType(), m_pure_attr, "__get_tbl")); + break; + case 0x10D: // MFTBU + result = ZExt(Call(GetType(), m_pure_attr, "__get_tbh")); + break; + default: + result = Call(GetType(), fmt::format("__mfspr_%u", n)); + break; + } + + SetGpr(op.rd, result); +} + +void PPUTranslator::LWAX(ppu_opcode_t op) +{ + SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType()))); +} + +void PPUTranslator::DST(ppu_opcode_t op) +{ +} + +void PPUTranslator::LHAX(ppu_opcode_t op) +{ + SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType()), GetType())); +} + +void PPUTranslator::LVXL(ppu_opcode_t op) +{ + return LVX(op); +} + +void PPUTranslator::MFTB(ppu_opcode_t op) +{ + return MFSPR(op); +} + +void PPUTranslator::LWAUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetGpr(op.rd, SExt(ReadMemory(addr, GetType()))); + SetGpr(op.ra, addr); +} + +void PPUTranslator::DSTST(ppu_opcode_t op) +{ +} + +void PPUTranslator::LHAUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetGpr(op.rd, SExt(ReadMemory(addr, GetType()), GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STHX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 16)); +} + +void PPUTranslator::ORC(ppu_opcode_t op) +{ + const auto result = op.rs == op.rb ? (Value*)m_ir->getInt64(-1) : m_ir->CreateOr(GetGpr(op.rs), m_ir->CreateNot(GetGpr(op.rb))); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::ECOWX(ppu_opcode_t op) +{ + Call(GetType(), "__ecowx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32)); +} + +void PPUTranslator::STHUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + WriteMemory(addr, GetGpr(op.rs, 16)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::OR(ppu_opcode_t op) +{ + const auto result = op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateOr(GetGpr(op.rs), GetGpr(op.rb)); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::DIVDU(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto o = IsZero(b); + const auto result = m_ir->CreateUDiv(a, m_ir->CreateSelect(o, m_ir->getInt64(-1), b)); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(o); +} + +void PPUTranslator::DIVWU(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra, 32); + const auto b = GetGpr(op.rb, 32); + const auto o = IsZero(b); + SetGpr(op.rd, m_ir->CreateUDiv(a, m_ir->CreateSelect(o, m_ir->getInt32(0xffffffff), b))); + if (op.rc) SetCrField(0, GetUndef(), GetUndef(), GetUndef()); + if (op.oe) SetOverflow(o); +} + +void PPUTranslator::MTSPR(ppu_opcode_t op) +{ + const auto value = GetGpr(op.rs); + + switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5)) + { + case 0x001: // MTXER + m_ir->CreateStore(Trunc(m_ir->CreateLShr(value, 31), GetType()), m_xer_ca); + m_ir->CreateStore(Trunc(m_ir->CreateLShr(value, 30), GetType()), m_xer_ov); + m_ir->CreateStore(Trunc(m_ir->CreateLShr(value, 29), GetType()), m_xer_so); + m_ir->CreateStore(Trunc(value, GetType()), m_xer_count); + break; + case 0x008: // MTLR + m_ir->CreateStore(value, m_reg_lr); + break; + case 0x009: // MTCTR + m_ir->CreateStore(value, m_reg_ctr); + break; + case 0x100: + m_ir->CreateStore(Trunc(value), m_reg_vrsave); + break; + default: + Call(GetType(), fmt::format("__mtspr_%u", n), value); + break; + } +} + +void PPUTranslator::NAND(ppu_opcode_t op) +{ + const auto result = m_ir->CreateNot(op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateAnd(GetGpr(op.rs), GetGpr(op.rb))); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::STVXL(ppu_opcode_t op) +{ + return STVX(op); +} + +void PPUTranslator::DIVD(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra); + const auto b = GetGpr(op.rb); + const auto o = m_ir->CreateOr(IsZero(b), m_ir->CreateAnd(m_ir->CreateICmpEQ(a, m_ir->getInt64(1ull << 63)), IsOnes(b))); + const auto result = m_ir->CreateSDiv(a, m_ir->CreateSelect(o, m_ir->getInt64(1ull << 63), b)); + SetGpr(op.rd, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); + if (op.oe) SetOverflow(o); +} + +void PPUTranslator::DIVW(ppu_opcode_t op) +{ + const auto a = GetGpr(op.ra, 32); + const auto b = GetGpr(op.rb, 32); + const auto o = m_ir->CreateOr(IsZero(b), m_ir->CreateAnd(m_ir->CreateICmpEQ(a, m_ir->getInt32(1 << 31)), IsOnes(b))); + SetGpr(op.rd, m_ir->CreateSDiv(a, m_ir->CreateSelect(o, m_ir->getInt32(1 << 31), b))); + if (op.rc) SetCrField(0, GetUndef(), GetUndef(), GetUndef()); + if (op.oe) SetOverflow(o); +} + +void PPUTranslator::LVLX(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), "__lvlx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); +} + +void PPUTranslator::LDBRX(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType(), false)); +} + +void PPUTranslator::LSWX(ppu_opcode_t op) +{ + Call(GetType(), "__lswx", m_ir->getInt32(op.rd), m_ir->CreateLoad(m_xer_count), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); +} + +void PPUTranslator::LWBRX(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType(), false)); +} + +void PPUTranslator::LFSX(ppu_opcode_t op) +{ + SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); +} + +void PPUTranslator::SRW(ppu_opcode_t op) +{ + const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x3f); + const auto shift_arg = m_ir->CreateAnd(GetGpr(op.rs), 0xffffffff); + const auto result = m_ir->CreateLShr(shift_arg, shift_num); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::SRD(ppu_opcode_t op) +{ + const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x7f); + const auto shift_arg = GetGpr(op.rs); + const auto result = Trunc(m_ir->CreateLShr(ZExt(shift_arg), ZExt(shift_num))); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::LVRX(ppu_opcode_t op) +{ + SetVr(op.vd, Call(GetType(), "__lvrx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb))); +} + +void PPUTranslator::LSWI(ppu_opcode_t op) +{ + Call(GetType(), "__lswi", m_ir->getInt32(op.rd), m_ir->getInt32(op.rb), op.ra ? GetGpr(op.ra) : m_ir->getInt64(0)); +} + +void PPUTranslator::LFSUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetFpr(op.frd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::SYNC(ppu_opcode_t op) +{ + m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent); +} + +void PPUTranslator::LFDX(ppu_opcode_t op) +{ + SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType())); +} + +void PPUTranslator::LFDUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + SetFpr(op.frd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STVLX(ppu_opcode_t op) +{ + Call(GetType(), "__stvlx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetVr(op.vs, VrType::vi8)); +} + +void PPUTranslator::STDBRX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs), false); +} + +void PPUTranslator::STSWX(ppu_opcode_t op) +{ + Call(GetType(), "__stswx", m_ir->getInt32(op.rs), m_ir->CreateLoad(m_xer_count), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)); +} + +void PPUTranslator::STWBRX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32), false); +} + +void PPUTranslator::STFSX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetFpr(op.frs, 32)); +} + +void PPUTranslator::STVRX(ppu_opcode_t op) +{ + Call(GetType(), "__stvrx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetVr(op.vs, VrType::vi8)); +} + +void PPUTranslator::STFSUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + WriteMemory(addr, GetFpr(op.frs, 32)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STSWI(ppu_opcode_t op) +{ + Call(GetType(), "__stswi", m_ir->getInt32(op.rd), m_ir->getInt32(op.rb), op.ra ? GetGpr(op.ra) : m_ir->getInt64(0)); +} + +void PPUTranslator::STFDX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetFpr(op.frs)); +} + +void PPUTranslator::STFDUX(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)); + WriteMemory(addr, GetFpr(op.frs)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LVLXL(ppu_opcode_t op) +{ + return LVLX(op); +} + +void PPUTranslator::LHBRX(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType(), false)); +} + +void PPUTranslator::SRAW(ppu_opcode_t op) +{ + const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x3f); + const auto shift_arg = GetGpr(op.rs, 32); + const auto result = m_ir->CreateAShr(SExt(shift_arg), shift_num); + SetGpr(op.ra, result); + //const auto carry_mask = m_ir->CreateNot(m_ir->CreateShl(m_ir->getInt64(-1), shift_num)); + //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, carry_mask), m_ir->getInt64(0)))); + SetCarry(Call(GetType(), m_pure_attr, "__sraw_get_ca", shift_arg, shift_num)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::SRAD(ppu_opcode_t op) +{ + const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x7f); + const auto shift_arg = GetGpr(op.rs); + const auto result = Trunc(m_ir->CreateAShr(SExt(shift_arg), ZExt(shift_num))); + SetGpr(op.ra, result); + //const auto carry_mask = m_ir->CreateNot(m_ir->CreateShl(m_ir->getInt64(-1), shift_num)); + //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateOr(overshift, m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, carry_mask), m_ir->getInt64(0))))); + SetCarry(Call(GetType(), m_pure_attr, "__srad_get_ca", shift_arg, shift_num)); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::LVRXL(ppu_opcode_t op) +{ + return LVRX(op); +} + +void PPUTranslator::DSS(ppu_opcode_t op) +{ +} + +void PPUTranslator::SRAWI(ppu_opcode_t op) +{ + const auto shift_arg = GetGpr(op.rs, 32); + const auto result = m_ir->CreateAShr(SExt(shift_arg), op.sh32); + SetGpr(op.ra, result); + //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt32(0)), m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, ~(~0ull << op.sh32)), m_ir->getInt32(0)))); + SetCarry(Call(GetType(), m_pure_attr, "__sraw_get_ca", shift_arg, m_ir->getInt64(op.sh32))); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::SRADI(ppu_opcode_t op) +{ + const auto shift_arg = GetGpr(op.rs); + const auto result = m_ir->CreateAShr(shift_arg, op.sh64); + SetGpr(op.ra, result); + //SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(m_ir->CreateAnd(shift_arg, ~(~0ull << op.sh64)), m_ir->getInt64(0)))); + SetCarry(Call(GetType(), m_pure_attr, "__srad_get_ca", shift_arg, m_ir->getInt64(op.sh32))); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::EIEIO(ppu_opcode_t op) +{ + // TODO + m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent); +} + +void PPUTranslator::STVLXL(ppu_opcode_t op) +{ + return STVLX(op); +} + +void PPUTranslator::STHBRX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 16), false); +} + +void PPUTranslator::EXTSH(ppu_opcode_t op) +{ + const auto result = SExt(GetGpr(op.rs, 16), GetType()); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::STVRXL(ppu_opcode_t op) +{ + return STVRX(op); +} + +void PPUTranslator::EXTSB(ppu_opcode_t op) +{ + const auto result = SExt(GetGpr(op.rs, 8), GetType()); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::STFIWX(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetFpr(op.frs, 32, true)); +} + +void PPUTranslator::EXTSW(ppu_opcode_t op) +{ + const auto result = SExt(GetGpr(op.rs, 32)); + SetGpr(op.ra, result); + if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0)); +} + +void PPUTranslator::ICBI(ppu_opcode_t op) +{ +} + +void PPUTranslator::DCBZ(ppu_opcode_t op) +{ + const auto ptr = GetMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -128), GetType()); + Call(GetType(), "llvm.memset.p0i8.i32", ptr, m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getInt32(16), m_ir->getTrue()); +} + +void PPUTranslator::LWZ(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetType())); +} + +void PPUTranslator::LWZU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LBZ(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetType())); +} + +void PPUTranslator::LBZU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STW(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetGpr(op.rs, 32)); +} + +void PPUTranslator::STWU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + WriteMemory(addr, GetGpr(op.rs, 32)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STB(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetGpr(op.rs, 8)); +} + +void PPUTranslator::STBU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + WriteMemory(addr, GetGpr(op.rs, 8)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LHZ(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetType())); +} + +void PPUTranslator::LHZU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LHA(ppu_opcode_t op) +{ + SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetType()), GetType())); +} + +void PPUTranslator::LHAU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + SetGpr(op.rd, SExt(ReadMemory(addr, GetType()), GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STH(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetGpr(op.rs, 16)); +} + +void PPUTranslator::STHU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + WriteMemory(addr, GetGpr(op.rs, 16)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LMW(ppu_opcode_t op) +{ + Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); + for (u32 i = 0; i < 32 - op.rd; i++) + { + SetGpr(i + op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetType())); + } +} + +void PPUTranslator::STMW(ppu_opcode_t op) +{ + Call(GetType(), "__trace", m_ir->getInt64(m_current_addr)); + for (u32 i = 0; i < 32 - op.rs; i++) + { + WriteMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetGpr(i + op.rs, 32)); + } +} + +void PPUTranslator::LFS(ppu_opcode_t op) +{ + SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetType())); +} + +void PPUTranslator::LFSU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + SetFpr(op.frd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LFD(ppu_opcode_t op) +{ + SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetType())); +} + +void PPUTranslator::LFDU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + SetFpr(op.frd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STFS(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetFpr(op.frs, 32)); +} + +void PPUTranslator::STFSU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + WriteMemory(addr, GetFpr(op.frs, 32)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::STFD(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)) : m_ir->getInt64(op.simm16), GetFpr(op.frs)); +} + +void PPUTranslator::STFDU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.simm16)); + WriteMemory(addr, GetFpr(op.frs)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LD(ppu_opcode_t op) +{ + SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.ds << 2)) : m_ir->getInt64(op.ds << 2), GetType())); +} + +void PPUTranslator::LDU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.ds << 2)); + SetGpr(op.rd, ReadMemory(addr, GetType())); + SetGpr(op.ra, addr); +} + +void PPUTranslator::LWA(ppu_opcode_t op) +{ + SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.ds << 2)) : m_ir->getInt64(op.ds << 2), GetType()))); +} + +void PPUTranslator::STD(ppu_opcode_t op) +{ + WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.ds << 2)) : m_ir->getInt64(op.ds << 2), GetGpr(op.rs)); +} + +void PPUTranslator::STDU(ppu_opcode_t op) +{ + const auto addr = m_ir->CreateAdd(GetGpr(op.ra), m_ir->getInt64(op.ds << 2)); + WriteMemory(addr, GetGpr(op.rs)); + SetGpr(op.ra, addr); +} + +void PPUTranslator::FDIVS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFDiv(a, b), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fdivs_get_fr", a, b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fdivs_get_fi", a, b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fdivs_get_ox", a, b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fdivs_get_ux", a, b)); + //SetFPSCRException(m_fpscr_zx, Call(GetType(), m_pure_attr, "__fdivs_get_zx", a, b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fdivs_get_vxsnan", a, b)); + //SetFPSCRException(m_fpscr_vxidi, Call(GetType(), m_pure_attr, "__fdivs_get_vxidi", a, b)); + //SetFPSCRException(m_fpscr_vxzdz, Call(GetType(), m_pure_attr, "__fdivs_get_vxzdz", a, b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FSUBS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFSub(a, b), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fsubs_get_fr", a, b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fsubs_get_fi", a, b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fsubs_get_ox", a, b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fsubs_get_ux", a, b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fsubs_get_vxsnan", a, b)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fsubs_get_vxisi", a, b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FADDS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFAdd(a, b), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fadds_get_fr", a, b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fadds_get_fi", a, b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fadds_get_ox", a, b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fadds_get_ux", a, b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fadds_get_vxsnan", a, b)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fadds_get_vxisi", a, b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FSQRTS(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFPTrunc(Call(GetType(), "llvm.sqrt.f64", b), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fsqrts_get_fr", b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fsqrts_get_fi", b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fsqrts_get_ox", b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fsqrts_get_ux", b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fsqrts_get_vxsnan", b)); + //SetFPSCRException(m_fpscr_vxsqrt, Call(GetType(), m_pure_attr, "__fsqrts_get_vxsqrt", b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FRES(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb, 32); + const auto result = Call(GetType(), m_pure_attr, "__fre", b); + SetFpr(op.frd, result); + + //m_ir->CreateStore(GetUndef(), m_fpscr_fr); + //m_ir->CreateStore(GetUndef(), m_fpscr_fi); + //m_ir->CreateStore(GetUndef(), m_fpscr_xx); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fres_get_ox", b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fres_get_ux", b)); + //SetFPSCRException(m_fpscr_zx, Call(GetType(), m_pure_attr, "__fres_get_zx", b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fres_get_vxsnan", b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FMULS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFMul(a, c), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmuls_get_fr", a, c)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmuls_get_fi", a, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmuls_get_ox", a, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmuls_get_ux", a, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmuls_get_vxsnan", a, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmuls_get_vximz", a, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FMADDS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadds_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadds_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadds_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadds_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FMSUBS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFSub(m_ir->CreateFMul(a, c), b), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ??? + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadds_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadds_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadds_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadds_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FNMSUBS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFSub(b, m_ir->CreateFMul(a, c)), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ??? + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadds_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadds_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadds_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadds_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FNMADDS(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFPTrunc(m_ir->CreateFNeg(m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b)), GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ??? + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadds_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadds_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadds_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadds_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadds_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::MTFSB1(ppu_opcode_t op) +{ + CompilationError("MTFSB1"); + + SetFPSCRBit(op.crbd, m_ir->getTrue(), true); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::MCRFS(ppu_opcode_t op) +{ + CompilationError("MCRFS"); + + const auto lt = GetFPSCRBit(op.crfs * 4 + 0); + const auto gt = GetFPSCRBit(op.crfs * 4 + 1); + const auto eq = GetFPSCRBit(op.crfs * 4 + 2); + const auto un = GetFPSCRBit(op.crfs * 4 + 3); + SetCrField(op.crfd, lt, gt, eq, un); +} + +void PPUTranslator::MTFSB0(ppu_opcode_t op) +{ + CompilationError("MTFSB0"); + + SetFPSCRBit(op.crbd, m_ir->getFalse(), false); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::MTFSFI(ppu_opcode_t op) +{ + CompilationError("MTFSFI"); + + SetFPSCRBit(op.crfd * 4 + 0, m_ir->getInt1((op.i & 8) != 0), false); + if (op.crfd != 0) SetFPSCRBit(op.crfd * 4 + 1, m_ir->getInt1((op.i & 4) != 0), false); + if (op.crfd != 0) SetFPSCRBit(op.crfd * 4 + 2, m_ir->getInt1((op.i & 2) != 0), false); + SetFPSCRBit(op.crfd * 4 + 3, m_ir->getInt1((op.i & 1) != 0), false); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::MFFS(ppu_opcode_t op) +{ + CompilationError("MFFS"); + + Value* result = m_ir->getInt64(0); + + for (u32 i = 0; i < 32; i++) + { + if (const auto bit = m_fpscr[i] ? m_ir->CreateLoad(m_fpscr[i]) : GetFPSCRBit(i)) + { + result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(bit, GetType()), i ^ 31)); + } + } + + SetFpr(op.frd, result); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::MTFSF(ppu_opcode_t op) +{ + CompilationError("MTFSF"); + + const auto value = GetFpr(op.frb, 32, true); + + for (u32 i = 0; i < 32; i++) + { + if (i != 1 && i != 2 && (op.flm & (128 >> (i / 4))) != 0) + { + SetFPSCRBit(i, Trunc(m_ir->CreateLShr(value, i ^ 31), GetType()), false); + } + } + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::FCMPU(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto lt = m_ir->CreateFCmpOLT(a, b); + const auto gt = m_ir->CreateFCmpOGT(a, b); + const auto eq = m_ir->CreateFCmpOEQ(a, b); + const auto un = m_ir->CreateFCmpUNO(a, b); + SetCrField(op.crfd, lt, gt, eq, un); + SetFPCC(lt, gt, eq, un); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fcmpu_get_vxsnan", a, b)); +} + +void PPUTranslator::FRSP(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFPTrunc(b, GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__frsp_get_fr", b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__frsp_get_fi", b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__frsp_get_ox", b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__frsp_get_ux", b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__frsp_get_vxsnan", b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FCTIW(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + //const auto sat_l = m_ir->CreateFCmpULT(b, ConstantFP::get(GetType(), -std::pow(2, 31))); // TODO ??? + //const auto sat_h = m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::pow(2, 31))); + //const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, b), GetType()); + //SetFpr(op.frd, m_ir->CreateSelect(sat_h, m_ir->getInt64(0x7fffffff), converted)); + SetFpr(op.frd, m_ir->CreateFPToSI(b, GetType())); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fctiw_get_fr", b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fctiw_get_fi", b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fctiw_get_vxsnan", b)); + //SetFPSCRException(m_fpscr_vxcvi, m_ir->CreateOr(sat_l, sat_h)); + //m_ir->CreateStore(GetUndef(), m_fpscr_c); + //SetFPCC(GetUndef(), GetUndef(), GetUndef(), GetUndef(), op.rc != 0); +} + +void PPUTranslator::FCTIWZ(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + SetFpr(op.frd, m_ir->CreateFPToSI(b, GetType())); +} + +void PPUTranslator::FDIV(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFDiv(a, b); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fdiv_get_fr", a, b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fdiv_get_fi", a, b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fdiv_get_ox", a, b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fdiv_get_ux", a, b)); + //SetFPSCRException(m_fpscr_zx, Call(GetType(), m_pure_attr, "__fdiv_get_zx", a, b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fdiv_get_vxsnan", a, b)); + //SetFPSCRException(m_fpscr_vxidi, Call(GetType(), m_pure_attr, "__fdiv_get_vxidi", a, b)); + //SetFPSCRException(m_fpscr_vxzdz, Call(GetType(), m_pure_attr, "__fdiv_get_vxzdz", a, b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FSUB(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFSub(a, b); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fsub_get_fr", a, b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fsub_get_fi", a, b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fsub_get_ox", a, b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fsub_get_ux", a, b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fsub_get_vxsnan", a, b)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fsub_get_vxisi", a, b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FADD(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto result = m_ir->CreateFAdd(a, b); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fadd_get_fr", a, b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fadd_get_fi", a, b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fadd_get_ox", a, b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fadd_get_ux", a, b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fadd_get_vxsnan", a, b)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fadd_get_vxisi", a, b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FSQRT(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + const auto result = Call(GetType(), "llvm.sqrt.f64", b); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fsqrt_get_fr", b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fsqrt_get_fi", b)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fsqrt_get_ox", b)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fsqrt_get_ux", b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fsqrt_get_vxsnan", b)); + //SetFPSCRException(m_fpscr_vxsqrt, Call(GetType(), m_pure_attr, "__fsqrt_get_vxsqrt", b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FSEL(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(a, ConstantFP::get(GetType(), 0.0)), c, b)); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::FMUL(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFMul(a, c); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmul_get_fr", a, c)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmul_get_fi", a, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmul_get_ox", a, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmul_get_ux", a, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmul_get_vxsnan", a, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmul_get_vximz", a, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FRSQRTE(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb, 32); + const auto result = Call(GetType(), m_pure_attr, "__frsqrte", b); + SetFpr(op.frd, result); + + //m_ir->CreateStore(GetUndef(), m_fpscr_fr); + //m_ir->CreateStore(GetUndef(), m_fpscr_fi); + //m_ir->CreateStore(GetUndef(), m_fpscr_xx); + //SetFPSCRException(m_fpscr_zx, Call(GetType(), m_pure_attr, "__frsqrte_get_zx", b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__frsqrte_get_vxsnan", b)); + //SetFPSCRException(m_fpscr_vxsqrt, Call(GetType(), m_pure_attr, "__frsqrte_get_vxsqrt", b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FMSUB(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ??? + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadd_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadd_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadd_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadd_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadd_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FMADD(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadd_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadd_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadd_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadd_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadd_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FNMSUB(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFSub(b, m_ir->CreateFMul(a, c)); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ??? + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadd_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadd_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadd_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadd_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadd_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FNMADD(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto c = GetFpr(op.frc); + const auto result = m_ir->CreateFNeg(m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b)); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ??? + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fmadd_get_fi", a, b, c)); + //SetFPSCRException(m_fpscr_ox, Call(GetType(), m_pure_attr, "__fmadd_get_ox", a, b, c)); + //SetFPSCRException(m_fpscr_ux, Call(GetType(), m_pure_attr, "__fmadd_get_ux", a, b, c)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c)); + //SetFPSCRException(m_fpscr_vxisi, Call(GetType(), m_pure_attr, "__fmadd_get_vxisi", a, b, c)); + //SetFPSCRException(m_fpscr_vximz, Call(GetType(), m_pure_attr, "__fmadd_get_vximz", a, b, c)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::FCMPO(ppu_opcode_t op) +{ + const auto a = GetFpr(op.fra); + const auto b = GetFpr(op.frb); + const auto lt = m_ir->CreateFCmpOLT(a, b); + const auto gt = m_ir->CreateFCmpOGT(a, b); + const auto eq = m_ir->CreateFCmpOEQ(a, b); + const auto un = m_ir->CreateFCmpUNO(a, b); + SetCrField(op.crfd, lt, gt, eq, un); + SetFPCC(lt, gt, eq, un); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fcmpo_get_vxsnan", a, b)); + //SetFPSCRException(m_fpscr_vxvc, Call(GetType(), m_pure_attr, "__fcmpo_get_vxvc", a, b)); +} + +void PPUTranslator::FNEG(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + SetFpr(op.frd, m_ir->CreateFNeg(b)); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::FMR(ppu_opcode_t op) +{ + SetFpr(op.frd, GetFpr(op.frb)); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::FNABS(ppu_opcode_t op) +{ + SetFpr(op.frd, m_ir->CreateFNeg(Call(GetType(), "llvm.fabs.f64", GetFpr(op.frb)))); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::FABS(ppu_opcode_t op) +{ + SetFpr(op.frd, Call(GetType(), "llvm.fabs.f64", GetFpr(op.frb))); + + if (op.rc) SetCrField(1, m_ir->CreateLoad(m_fpscr_lt), m_ir->CreateLoad(m_fpscr_gt), m_ir->CreateLoad(m_fpscr_eq), m_ir->CreateLoad(m_fpscr_un)); +} + +void PPUTranslator::FCTID(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + //const auto sat_l = m_ir->CreateFCmpULT(b, ConstantFP::get(GetType(), -std::pow(2, 63))); + //const auto sat_h = m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::pow(2, 63))); + //const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, b), GetType()); + //SetFpr(op.frd, m_ir->CreateSelect(sat_h, m_ir->getInt64(0x7fffffffffffffff), converted)); + SetFpr(op.frd, m_ir->CreateFPToSI(b, GetType())); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fctid_get_fr", b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fctid_get_fi", b)); + //SetFPSCRException(m_fpscr_vxsnan, Call(GetType(), m_pure_attr, "__fctid_get_vxsnan", b)); + //SetFPSCRException(m_fpscr_vxcvi, m_ir->CreateOr(sat_l, sat_h)); + //m_ir->CreateStore(GetUndef(), m_fpscr_c); + //SetFPCC(GetUndef(), GetUndef(), GetUndef(), GetUndef(), op.rc != 0); +} + +void PPUTranslator::FCTIDZ(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb); + SetFpr(op.frd, m_ir->CreateFPToSI(b, GetType())); +} + +void PPUTranslator::FCFID(ppu_opcode_t op) +{ + const auto b = GetFpr(op.frb, 64, true); + const auto result = m_ir->CreateSIToFP(b, GetType()); + SetFpr(op.frd, result); + + //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fcfid_get_fr", b)); + //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fcfid_get_fi", b)); + SetFPRF(result, op.rc != 0); +} + +void PPUTranslator::UNK(ppu_opcode_t op) +{ + LOG_WARNING(PPU, "0x%08llx: Unknown/illegal opcode 0x%08x", m_current_addr, op.opcode); + m_ir->CreateUnreachable(); +} + + +Value* PPUTranslator::GetGpr(u32 r, u32 num_bits) +{ + m_value_usage[m_gpr[r]]++; + return m_ir->CreateTrunc(m_ir->CreateLoad(m_gpr[r]), m_ir->getIntNTy(num_bits)); +} + +void PPUTranslator::SetGpr(u32 r, Value* value) +{ + m_ir->CreateStore(m_ir->CreateZExt(value, GetType()), m_gpr[r]); + m_value_usage[m_gpr[r]]++; +} + +Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int) +{ + const auto value = m_ir->CreateAlignedLoad(m_fpr[r], 8); + m_value_usage[m_fpr[r]]++; + + if (!as_int && bits == 64) + { + return value; + } + else if (!as_int && bits == 32) + { + return m_ir->CreateFPTrunc(value, GetType()); + } + else + { + return m_ir->CreateTrunc(m_ir->CreateBitCast(value, GetType()), m_ir->getIntNTy(bits)); + } +} + +void PPUTranslator::SetFpr(u32 r, Value* val) +{ + const auto f64_val = + val->getType() == GetType() ? m_ir->CreateBitCast(ZExt(val), GetType()) : + val->getType() == GetType() ? m_ir->CreateBitCast(val, GetType()) : + val->getType() == GetType() ? m_ir->CreateFPExt(val, GetType()) : val; + + m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8); + m_value_usage[m_fpr[r]]++; +} + +Value* PPUTranslator::GetVr(u32 vr, VrType type) +{ + const auto value = m_ir->CreateAlignedLoad(m_vr[vr], 16); + m_value_usage[m_vr[vr]]++; + + switch (type) + { + case VrType::vi32: return value; + case VrType::vi8: return m_ir->CreateBitCast(value, GetType()); + case VrType::vi16: return m_ir->CreateBitCast(value, GetType()); + case VrType::vf: return m_ir->CreateBitCast(value, GetType()); + case VrType::i128: return m_ir->CreateBitCast(value, GetType()); + } + + throw std::logic_error("GetVr(): invalid type"); +} + +void PPUTranslator::SetVr(u32 vr, Value* value) +{ + const auto type = value->getType(); + const auto size = type->getPrimitiveSizeInBits(); + + if (type->isVectorTy() && size != 128) + { + if (type->getScalarType()->isIntegerTy(1)) + { + // Sign-extend bool values + value = SExt(value, ScaleType(type, 7 - s32(std::log2(size)))); + } + else if (size == 256 || size == 512) + { + // Truncate big vectors + value = Trunc(value, ScaleType(type, 7 - s32(std::log2(size)))); + } + } + + m_ir->CreateAlignedStore(m_ir->CreateBitCast(value, GetType()), m_vr[vr], 16); + m_value_usage[m_vr[vr]]++; +} + +Value* PPUTranslator::GetCrb(u32 crb) +{ + return m_ir->CreateLoad(m_cr[crb]); +} + +void PPUTranslator::SetCrb(u32 crb, Value* value) +{ + m_ir->CreateStore(value, m_cr[crb]); +} + +void PPUTranslator::SetCrField(u32 group, Value* lt, Value* gt, Value* eq, Value* so) +{ + SetCrb(group * 4 + 0, lt ? lt : GetUndef()); + SetCrb(group * 4 + 1, gt ? gt : GetUndef()); + SetCrb(group * 4 + 2, eq ? eq : GetUndef()); + SetCrb(group * 4 + 3, so ? so : m_ir->CreateLoad(m_xer_so)); +} + +void PPUTranslator::SetCrFieldSignedCmp(u32 n, Value* a, Value* b) +{ + const auto lt = m_ir->CreateICmpSLT(a, b); + const auto gt = m_ir->CreateICmpSGT(a, b); + const auto eq = m_ir->CreateICmpEQ(a, b); + SetCrField(n, lt, gt, eq); +} + +void PPUTranslator::SetCrFieldUnsignedCmp(u32 n, Value* a, Value* b) +{ + const auto lt = m_ir->CreateICmpULT(a, b); + const auto gt = m_ir->CreateICmpUGT(a, b); + const auto eq = m_ir->CreateICmpEQ(a, b); + SetCrField(n, lt, gt, eq); +} + +void PPUTranslator::SetFPCC(Value* lt, Value* gt, Value* eq, Value* un, bool set_cr) +{ + m_ir->CreateStore(lt, m_fpscr_lt); + m_ir->CreateStore(gt, m_fpscr_gt); + m_ir->CreateStore(eq, m_fpscr_eq); + m_ir->CreateStore(un, m_fpscr_un); + if (set_cr) SetCrField(1, lt, gt, eq, un); +} + +void PPUTranslator::SetFPRF(Value* value, bool set_cr) +{ + const bool is32 = + value->getType()->isFloatTy() ? true : + value->getType()->isDoubleTy() ? false : + throw std::logic_error("SetFPRF(): invalid value type"); + + //const auto zero = ConstantFP::get(value->getType(), 0.0); + //const auto is_nan = m_ir->CreateFCmpUNO(value, zero); + //const auto is_inf = Call(GetType(), m_pure_attr, is32 ? "__is_inf32" : "__is_inf", value); // TODO + //const auto is_denorm = Call(GetType(), m_pure_attr, is32 ? "__is_denorm32" : "__is_denorm", value); // TODO + //const auto is_neg_zero = Call(GetType(), m_pure_attr, is32 ? "__is_neg_zero32" : "__is_neg_zero", value); // TODO + + //const auto cc = m_ir->CreateOr(is_nan, m_ir->CreateOr(is_denorm, is_neg_zero)); + //const auto lt = m_ir->CreateFCmpOLT(value, zero); + //const auto gt = m_ir->CreateFCmpOGT(value, zero); + //const auto eq = m_ir->CreateFCmpOEQ(value, zero); + //const auto un = m_ir->CreateOr(is_nan, is_inf); + //m_ir->CreateStore(cc, m_fpscr_c); + //SetFPCC(lt, gt, eq, un, set_cr); +} + +void PPUTranslator::SetFPSCR_FR(Value* value) +{ + m_ir->CreateStore(value, m_fpscr_fr); +} + +void PPUTranslator::SetFPSCR_FI(Value* value) +{ + m_ir->CreateStore(value, m_fpscr_fi); + SetFPSCRException(m_fpscr_xx, value); +} + +void PPUTranslator::SetFPSCRException(Value* ptr, Value* value) +{ + m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(ptr), value), ptr); + m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx); +} + +Value* PPUTranslator::GetFPSCRBit(u32 n) +{ + if (n == 1 && m_fpscr[24]) + { + // Floating-Point Enabled Exception Summary (FEX) 24-29 + Value* value = m_ir->CreateLoad(m_fpscr[24]); + for (u32 i = 25; i <= 29; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); + return value; + } + + if (n == 2 && m_fpscr[7]) + { + // Floating-Point Invalid Operation Exception Summary (VX) 7-12, 21-23 + Value* value = m_ir->CreateLoad(m_fpscr[7]); + for (u32 i = 8; i <= 12; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); + for (u32 i = 21; i <= 23; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i])); + return value; + } + + if (n >= 32 || !m_fpscr[n]) + { + return nullptr; // ??? + } + + // Get bit + const auto value = m_ir->CreateLoad(m_fpscr[n]); + + if (n == 0 || (n >= 3 && n <= 12) || (n >= 21 && n <= 23)) + { + // Clear FX or exception bits + m_ir->CreateStore(m_ir->getFalse(), m_fpscr[n]); + } + + return value; +} + +void PPUTranslator::SetFPSCRBit(u32 n, Value* value, bool update_fx) +{ + if (n >= 32 || !m_fpscr[n]) + { + //CompilationError("SetFPSCRBit(): inaccessible bit " + std::to_string(n)); + return; // ??? + } + + if (update_fx) + { + if ((n >= 3 && n <= 12) || (n >= 21 && n <= 23)) + { + // Update FX bit if necessary + m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx); + } + } + + //if (n >= 24 && n <= 28) CompilationError("SetFPSCRBit: exception enable bit " + std::to_string(n)); + //if (n == 29) CompilationError("SetFPSCRBit: NI bit"); + //if (n >= 30) CompilationError("SetFPSCRBit: RN bit"); + + // Store the bit + m_ir->CreateStore(value, m_fpscr[n]); +} + +Value* PPUTranslator::GetCarry() +{ + return m_ir->CreateLoad(m_xer_ca); +} + +void PPUTranslator::SetCarry(Value* bit) +{ + m_ir->CreateStore(bit, m_xer_ca); +} + +void PPUTranslator::SetOverflow(Value* bit) +{ + m_ir->CreateStore(bit, m_xer_ov); + m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_xer_so), bit), m_xer_so); +} + +void PPUTranslator::SetSat(Value* bit) +{ + m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_vscr_sat), bit), m_vscr_sat); +} + +Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right) +{ + Value* trap_condition = m_ir->getFalse(); + if (to & 0x10) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpSLT(left, right)); + if (to & 0x8) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpSGT(left, right)); + if (to & 0x4) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpEQ(left, right)); + if (to & 0x2) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpULT(left, right)); + if (to & 0x1) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpUGT(left, right)); + return trap_condition; +} + +Value* PPUTranslator::Trap(u64 addr) +{ + return Call(GetType(), /*AttributeSet::get(m_context, AttributeSet::FunctionIndex, Attribute::NoReturn),*/ "__trap", m_ir->getInt64(m_current_addr)); +} + +Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi) +{ + const bool bo0 = (bo & 0x10) != 0; + const bool bo1 = (bo & 0x08) != 0; + const bool bo2 = (bo & 0x04) != 0; + const bool bo3 = (bo & 0x02) != 0; + + // Decrement counter if necessary + const auto ctr = bo2 ? nullptr : m_ir->CreateSub(m_ir->CreateLoad(m_reg_ctr), m_ir->getInt64(1)); + + // Store counter if necessary + if (ctr) m_ir->CreateStore(ctr, m_reg_ctr); + + // Generate counter condition + const auto use_ctr = bo2 ? nullptr : m_ir->CreateICmp(bo3 ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, ctr, m_ir->getInt64(0)); + + // Generate condition bit access + const auto use_cond = bo0 ? nullptr : bo1 ? GetCrb(bi) : m_ir->CreateNot(GetCrb(bi)); + + if (use_ctr && use_cond) + { + // Combine conditions if necessary + return m_ir->CreateAnd(use_ctr, use_cond); + } + + return use_ctr ? use_ctr : use_cond; +} + +bool PPUTranslator::IsStackAddr(Value* addr) +{ + // Analyse various binary ops + if (const auto bin_op = dyn_cast(addr)) + { + if (bin_op->isBinaryOp(Instruction::Add) || bin_op->isBinaryOp(Instruction::And) || bin_op->isBinaryOp(Instruction::Or) || bin_op->isBinaryOp(Instruction::Xor)) + { + return IsStackAddr(bin_op->getOperand(0)) || IsStackAddr(bin_op->getOperand(1)); + } + + if (bin_op->isBinaryOp(Instruction::Sub)) + { + return IsStackAddr(bin_op->getOperand(0)); + } + + // TODO + } + + // Detect load instruction + if (const auto load_op = dyn_cast(addr)) + { + return load_op->getOperand(0) == m_gpr[1]; + } + + return false; +} + +#endif diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h new file mode 100644 index 0000000000..253d73a0cc --- /dev/null +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -0,0 +1,830 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "../rpcs3/Emu/Cell/PPUOpcodes.h" + +#ifdef _MSC_VER +#pragma warning(push, 0) +#endif +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#include "../Utilities/types.h" +#include "../Utilities/StrFmt.h" +#include "../Utilities/Macro.h" +#include "../Utilities/BEType.h" + +template +struct TypeGen +{ + static_assert(!sizeof(T), "GetType<>() error: unknown type"); +}; + +template +struct TypeGen::value>> +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getVoidTy(context); } +}; + +template +struct TypeGen::value || std::is_same::value>> +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt64Ty(context); } +}; + +template +struct TypeGen::value || std::is_same::value>> +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt32Ty(context); } +}; + +template +struct TypeGen::value || std::is_same::value>> +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt16Ty(context); } +}; + +template +struct TypeGen::value || std::is_same::value || std::is_same::value>> +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt8Ty(context); } +}; + +template<> +struct TypeGen +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getFloatTy(context); } +}; + +template<> +struct TypeGen +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getDoubleTy(context); } +}; + +template<> +struct TypeGen +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getInt1Ty(context); } +}; + +template<> +struct TypeGen +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::Type::getIntNTy(context, 128); } +}; + +// Pointer type +template +struct TypeGen +{ + static llvm::Type* get(llvm::LLVMContext& context) { return TypeGen::get(context)->getPointerTo(); } +}; + +// Vector type +template +struct TypeGen +{ + static llvm::Type* get(llvm::LLVMContext& context) { return llvm::VectorType::get(TypeGen::get(context), N); } +}; + +class PPUTranslator final //: public CPUTranslator +{ + // LLVM context + llvm::LLVMContext& m_context; + + // Module to which all generated code is output to + llvm::Module* const m_module; + + // Base address (TODO) + const u64 m_base_addr; + + // Endianness, affects vector element numbering (TODO) + const bool m_is_be; + + // Attributes for function calls which are "pure" and may be optimized away if their results are unused + const llvm::AttributeSet m_pure_attr; + + // Available functions: types (not set or nullptr for untyped) + std::unordered_map m_func_types; + + // Available functions + std::unordered_map m_func_list; + + // LLVM IR builder + llvm::IRBuilder<>* m_ir; + + // LLVM function + llvm::Function* m_function; + + // LLVM function type (may be null) + llvm::FunctionType* m_function_type; + + // Function range + u64 m_start_addr, m_end_addr, m_current_addr; + + // Basic blocks for current function + std::unordered_map m_blocks; + + // Supplementary block info for all functions + std::set m_block_info; + + // JT resolver block + llvm::BasicBlock* m_jtr; + + // Current binary data + be_t* m_bin{}; + + /* Variables */ + + // Explicit register usage counter + std::unordered_map m_value_usage; + + // Memory base + llvm::Value* m_base; + + // Thread context (obtained by __context) + llvm::Value* m_thread; + + // Thread context struct + llvm::StructType* m_thread_type; + + llvm::Value* m_globals[96]{}; + llvm::Value** const m_g_gpr = m_globals + 0; + llvm::Value** const m_g_fpr = m_globals + 32; + llvm::Value** const m_g_vr = m_globals + 64; + + llvm::Value* m_locals[96]{}; + llvm::Value** const m_gpr = m_locals + 0; + llvm::Value** const m_fpr = m_locals + 32; + llvm::Value** const m_vr = m_locals + 64; + + llvm::Value* m_cr[32]{}; + llvm::Value* m_reg_lr; + llvm::Value* m_reg_ctr; // CTR register (counter) + llvm::Value* m_reg_vrsave; + llvm::Value* m_xer_so; // XER.SO bit, summary overflow + llvm::Value* m_xer_ov; // XER.OV bit, overflow flag + llvm::Value* m_xer_ca; // XER.CA bit, carry flag + llvm::Value* m_xer_count; + llvm::Value* m_vscr_nj; // VSCR.NJ bit, non-Java mode + llvm::Value* m_vscr_sat; // VSCR.SAT bit, sticky saturation flag + + llvm::Value* m_fpscr[32]{}; + llvm::Value* m_fpscr_fx; // bit 32 (first) + llvm::Value* m_fpscr_ox; // bit 35 (4th) + llvm::Value* m_fpscr_ux; + llvm::Value* m_fpscr_zx; + llvm::Value* m_fpscr_xx; + llvm::Value* m_fpscr_vxsnan; + llvm::Value* m_fpscr_vxisi; + llvm::Value* m_fpscr_vxidi; + llvm::Value* m_fpscr_vxzdz; + llvm::Value* m_fpscr_vximz; + llvm::Value* m_fpscr_vxvc; + llvm::Value* m_fpscr_fr; + llvm::Value* m_fpscr_fi; + llvm::Value* m_fpscr_c; + llvm::Value* m_fpscr_lt; + llvm::Value* m_fpscr_gt; + llvm::Value* m_fpscr_eq; + llvm::Value* m_fpscr_un; + llvm::Value* m_fpscr_reserved; + llvm::Value* m_fpscr_vxsoft; + llvm::Value* m_fpscr_vxsqrt; + llvm::Value* m_fpscr_vxcvi; + llvm::Value* m_fpscr_ve; + llvm::Value* m_fpscr_oe; + llvm::Value* m_fpscr_ue; + llvm::Value* m_fpscr_ze; + llvm::Value* m_fpscr_xe; + llvm::Value* m_fpscr_ni; + llvm::Value* m_fpscr_rnh; // RN high bit + llvm::Value* m_fpscr_rnl; // RN low bit + +public: + + // Change integer size for integer or integer vector type (by 2^degree) + llvm::Type* ScaleType(llvm::Type*, s32 pow2 = 0); + + // Extend arg to double width with its copy + llvm::Value* DuplicateExt(llvm::Value* arg); + + // Rotate arg left by n (n must be < bitwidth) + llvm::Value* RotateLeft(llvm::Value* arg, u64 n); + + // Rotate arg left by n (n will be masked) + llvm::Value* RotateLeft(llvm::Value* arg, llvm::Value* n); + + // Emit function call + void CallFunction(u64 target, bool tail, llvm::Value* indirect = nullptr); + + // Set some registers to undef (after function call) + void UndefineVolatileRegisters(); + + // Get the basic block for the specified address + llvm::BasicBlock* GetBasicBlock(u64 addr); + + // Load GPR + llvm::Value* GetGpr(u32 r, u32 num_bits = 64); + + // Set GPR + void SetGpr(u32 r, llvm::Value* value); + + // Get FPR + llvm::Value* GetFpr(u32 r, u32 bits = 64, bool as_int = false); + + // Set FPR + void SetFpr(u32 r, llvm::Value* val); + + // Vector register type + enum class VrType + { + vi8, // i8 vector + vi16, // i16 vector + vi32, // i32 vector + vf, // f32 vector + i128, // Solid 128-bit integer + }; + + // Load VR + llvm::Value* GetVr(u32 vr, VrType); + + // Load VRs + template + std::array GetVrs(VrType type, Vrs... regs) + { + static_assert(sizeof...(Vrs), "Empty VR list"); + return{ GetVr(regs, type)... }; + } + + // Set VR to the specified value + void SetVr(u32 vr, llvm::Value*); + + // Bitcast to scalar integer value + llvm::Value* Solid(llvm::Value*); + + // Compare value with zero constant of appropriate size + llvm::Value* IsZero(llvm::Value*); llvm::Value* IsNotZero(llvm::Value*); + + // Compare value with all-ones constant of appropriate size + llvm::Value* IsOnes(llvm::Value*); llvm::Value* IsNotOnes(llvm::Value*); + + // Broadcast specified value + llvm::Value* Broadcast(llvm::Value* value, u32 count); + + // Saturate scalar or vector given the comparison operand and the extreme value to compare with (second result is the comparison result) + std::pair Saturate(llvm::Value* value, llvm::CmpInst::Predicate inst, llvm::Value* extreme); + + // Saturate signed value (second result is the disjunction of comparison results) + std::pair SaturateSigned(llvm::Value* value, u64 min, u64 max); + + // Multiply FP value or vector by the pow(2, scale) + llvm::Value* Scale(llvm::Value* value, s32 scale); + + // Create shuffle instruction with constant args + llvm::Value* Shuffle(llvm::Value* left, llvm::Value* right, std::initializer_list indices); + + template + std::array Shuffle(std::array left, std::array right, std::initializer_list indices) + { + for (std::size_t i = 0; i < N; i++) left[i] = Shuffle(left[i], right[i], indices); + return left; + } + + // Create sign extension (with double size if type is nullptr) + llvm::Value* SExt(llvm::Value* value, llvm::Type* = nullptr); + + template + std::array SExt(std::array values, llvm::Type* type = nullptr) + { + for (std::size_t i = 0; i < N; i++) values[i] = SExt(values[i], type); + return values; + } + + // Create zero extension (with double size if type is nullptr) + llvm::Value* ZExt(llvm::Value*, llvm::Type* = nullptr); + + template + std::array ZExt(std::array values, llvm::Type* type = nullptr) + { + for (std::size_t i = 0; i < N; i++) values[i] = ZExt(values[i], type); + return values; + } + + // Add multiple elements + llvm::Value* Add(std::initializer_list); + + // Create tuncation (with half size if type is nullptr) + llvm::Value* Trunc(llvm::Value*, llvm::Type* = nullptr); + + // Get specified CR bit + llvm::Value* GetCrb(u32 crb); + + // Set specified CR bit + void SetCrb(u32 crb, llvm::Value* value); + + // Set CR field, if `so` value (5th arg) is nullptr, loaded from XER.SO + void SetCrField(u32 group, llvm::Value* lt, llvm::Value* gt, llvm::Value* eq, llvm::Value* so = nullptr); + + // Set CR field based on signed comparison + void SetCrFieldSignedCmp(u32 n, llvm::Value* a, llvm::Value* b); + + // Set CR field based on unsigned comparison + void SetCrFieldUnsignedCmp(u32 n, llvm::Value* a, llvm::Value* b); + + // Set FPSCR CC fields provided, optionally updating CR1 + void SetFPCC(llvm::Value* lt, llvm::Value* gt, llvm::Value* eq, llvm::Value* un, bool set_cr = false); + + // Update FPRF fields for the value, optionally updating CR1 + void SetFPRF(llvm::Value* value, bool set_cr); + + // Update FR bit + void SetFPSCR_FR(llvm::Value* value); + + // Update FI bit (and set XX exception) + void SetFPSCR_FI(llvm::Value* value); + + // Update sticky FPSCR exception bit, update FPSCR.FX + void SetFPSCRException(llvm::Value* ptr, llvm::Value* value); + + // Get FPSCR bit (exception bits are cleared) + llvm::Value* GetFPSCRBit(u32 n); + + // Set FPSCR bit + void SetFPSCRBit(u32 n, llvm::Value*, bool update_fx); + + // Get XER.CA bit + llvm::Value* GetCarry(); + + // Set XER.CA bit + void SetCarry(llvm::Value*); + + // Set XER.OV bit, and update XER.SO bit (|=) + void SetOverflow(llvm::Value*); + + // Update sticky VSCR.SAT bit (|=) + void SetSat(llvm::Value*); + + // Check condition for trap instructions + llvm::Value* CheckTrapCondition(u32 to, llvm::Value* left, llvm::Value* right); + + // Emit trap + llvm::Value* Trap(u64 addr); + + // Check condition for branch instructions + llvm::Value* CheckBranchCondition(u32 bo, u32 bi); + + // Branch to next instruction if condition failed, never branch on nullptr + void UseCondition(llvm::Value* = nullptr); + + // Check whether the address is stack + bool IsStackAddr(llvm::Value* addr); + + // Get memory pointer + llvm::Value* GetMemory(llvm::Value* addr, llvm::Type* type); + + // Read from memory + llvm::Value* ReadMemory(llvm::Value* addr, llvm::Type* type, bool is_be = true, u32 align = 1); + + // Write to memory + void WriteMemory(llvm::Value* addr, llvm::Value* value, bool is_be = true, u32 align = 1); + + // Convert a C++ type to an LLVM type + template + llvm::Type* GetType() + { + return TypeGen::get(m_context); + } + + // Get an undefined value with specified type + template + llvm::Value* GetUndef() + { + return llvm::UndefValue::get(GetType()); + } + + // Call a function with attribute list + template + llvm::Value* Call(llvm::Type* ret, llvm::AttributeSet attr, llvm::StringRef name, Args... args) + { + // Call the function + return m_ir->CreateCall(m_module->getOrInsertFunction(name, llvm::FunctionType::get(ret, {args->getType()...}, false), attr), {args...}); + } + + // Call a function + template + llvm::Value* Call(llvm::Type* ret, llvm::StringRef name, Args... args) + { + return Call(ret, llvm::AttributeSet{}, name, args...); + } + + // Handle compilation errors + void CompilationError(const std::string& error); + + PPUTranslator(llvm::LLVMContext& context, llvm::Module* module, u64 base, u64 entry); + ~PPUTranslator(); + + // Add function + void AddFunction(u64 addr, llvm::Function* func, llvm::FunctionType* type = nullptr); + + // Add block entry hint (not essential) + void AddBlockInfo(u64 addr); + + // Parses PPU opcodes and translate them into LLVM IR + llvm::Function* TranslateToIR(u64 start_addr, u64 end_addr, be_t* bin, void(*custom)(PPUTranslator*) = nullptr); + + void MFVSCR(ppu_opcode_t op); + void MTVSCR(ppu_opcode_t op); + void VADDCUW(ppu_opcode_t op); + void VADDFP(ppu_opcode_t op); + void VADDSBS(ppu_opcode_t op); + void VADDSHS(ppu_opcode_t op); + void VADDSWS(ppu_opcode_t op); + void VADDUBM(ppu_opcode_t op); + void VADDUBS(ppu_opcode_t op); + void VADDUHM(ppu_opcode_t op); + void VADDUHS(ppu_opcode_t op); + void VADDUWM(ppu_opcode_t op); + void VADDUWS(ppu_opcode_t op); + void VAND(ppu_opcode_t op); + void VANDC(ppu_opcode_t op); + void VAVGSB(ppu_opcode_t op); + void VAVGSH(ppu_opcode_t op); + void VAVGSW(ppu_opcode_t op); + void VAVGUB(ppu_opcode_t op); + void VAVGUH(ppu_opcode_t op); + void VAVGUW(ppu_opcode_t op); + void VCFSX(ppu_opcode_t op); + void VCFUX(ppu_opcode_t op); + void VCMPBFP(ppu_opcode_t op); + void VCMPEQFP(ppu_opcode_t op); + void VCMPEQUB(ppu_opcode_t op); + void VCMPEQUH(ppu_opcode_t op); + void VCMPEQUW(ppu_opcode_t op); + void VCMPGEFP(ppu_opcode_t op); + void VCMPGTFP(ppu_opcode_t op); + void VCMPGTSB(ppu_opcode_t op); + void VCMPGTSH(ppu_opcode_t op); + void VCMPGTSW(ppu_opcode_t op); + void VCMPGTUB(ppu_opcode_t op); + void VCMPGTUH(ppu_opcode_t op); + void VCMPGTUW(ppu_opcode_t op); + void VCTSXS(ppu_opcode_t op); + void VCTUXS(ppu_opcode_t op); + void VEXPTEFP(ppu_opcode_t op); + void VLOGEFP(ppu_opcode_t op); + void VMADDFP(ppu_opcode_t op); + void VMAXFP(ppu_opcode_t op); + void VMAXSB(ppu_opcode_t op); + void VMAXSH(ppu_opcode_t op); + void VMAXSW(ppu_opcode_t op); + void VMAXUB(ppu_opcode_t op); + void VMAXUH(ppu_opcode_t op); + void VMAXUW(ppu_opcode_t op); + void VMHADDSHS(ppu_opcode_t op); + void VMHRADDSHS(ppu_opcode_t op); + void VMINFP(ppu_opcode_t op); + void VMINSB(ppu_opcode_t op); + void VMINSH(ppu_opcode_t op); + void VMINSW(ppu_opcode_t op); + void VMINUB(ppu_opcode_t op); + void VMINUH(ppu_opcode_t op); + void VMINUW(ppu_opcode_t op); + void VMLADDUHM(ppu_opcode_t op); + void VMRGHB(ppu_opcode_t op); + void VMRGHH(ppu_opcode_t op); + void VMRGHW(ppu_opcode_t op); + void VMRGLB(ppu_opcode_t op); + void VMRGLH(ppu_opcode_t op); + void VMRGLW(ppu_opcode_t op); + void VMSUMMBM(ppu_opcode_t op); + void VMSUMSHM(ppu_opcode_t op); + void VMSUMSHS(ppu_opcode_t op); + void VMSUMUBM(ppu_opcode_t op); + void VMSUMUHM(ppu_opcode_t op); + void VMSUMUHS(ppu_opcode_t op); + void VMULESB(ppu_opcode_t op); + void VMULESH(ppu_opcode_t op); + void VMULEUB(ppu_opcode_t op); + void VMULEUH(ppu_opcode_t op); + void VMULOSB(ppu_opcode_t op); + void VMULOSH(ppu_opcode_t op); + void VMULOUB(ppu_opcode_t op); + void VMULOUH(ppu_opcode_t op); + void VNMSUBFP(ppu_opcode_t op); + void VNOR(ppu_opcode_t op); + void VOR(ppu_opcode_t op); + void VPERM(ppu_opcode_t op); + void VPKPX(ppu_opcode_t op); + void VPKSHSS(ppu_opcode_t op); + void VPKSHUS(ppu_opcode_t op); + void VPKSWSS(ppu_opcode_t op); + void VPKSWUS(ppu_opcode_t op); + void VPKUHUM(ppu_opcode_t op); + void VPKUHUS(ppu_opcode_t op); + void VPKUWUM(ppu_opcode_t op); + void VPKUWUS(ppu_opcode_t op); + void VREFP(ppu_opcode_t op); + void VRFIM(ppu_opcode_t op); + void VRFIN(ppu_opcode_t op); + void VRFIP(ppu_opcode_t op); + void VRFIZ(ppu_opcode_t op); + void VRLB(ppu_opcode_t op); + void VRLH(ppu_opcode_t op); + void VRLW(ppu_opcode_t op); + void VRSQRTEFP(ppu_opcode_t op); + void VSEL(ppu_opcode_t op); + void VSL(ppu_opcode_t op); + void VSLB(ppu_opcode_t op); + void VSLDOI(ppu_opcode_t op); + void VSLH(ppu_opcode_t op); + void VSLO(ppu_opcode_t op); + void VSLW(ppu_opcode_t op); + void VSPLTB(ppu_opcode_t op); + void VSPLTH(ppu_opcode_t op); + void VSPLTISB(ppu_opcode_t op); + void VSPLTISH(ppu_opcode_t op); + void VSPLTISW(ppu_opcode_t op); + void VSPLTW(ppu_opcode_t op); + void VSR(ppu_opcode_t op); + void VSRAB(ppu_opcode_t op); + void VSRAH(ppu_opcode_t op); + void VSRAW(ppu_opcode_t op); + void VSRB(ppu_opcode_t op); + void VSRH(ppu_opcode_t op); + void VSRO(ppu_opcode_t op); + void VSRW(ppu_opcode_t op); + void VSUBCUW(ppu_opcode_t op); + void VSUBFP(ppu_opcode_t op); + void VSUBSBS(ppu_opcode_t op); + void VSUBSHS(ppu_opcode_t op); + void VSUBSWS(ppu_opcode_t op); + void VSUBUBM(ppu_opcode_t op); + void VSUBUBS(ppu_opcode_t op); + void VSUBUHM(ppu_opcode_t op); + void VSUBUHS(ppu_opcode_t op); + void VSUBUWM(ppu_opcode_t op); + void VSUBUWS(ppu_opcode_t op); + void VSUMSWS(ppu_opcode_t op); + void VSUM2SWS(ppu_opcode_t op); + void VSUM4SBS(ppu_opcode_t op); + void VSUM4SHS(ppu_opcode_t op); + void VSUM4UBS(ppu_opcode_t op); + void VUPKHPX(ppu_opcode_t op); + void VUPKHSB(ppu_opcode_t op); + void VUPKHSH(ppu_opcode_t op); + void VUPKLPX(ppu_opcode_t op); + void VUPKLSB(ppu_opcode_t op); + void VUPKLSH(ppu_opcode_t op); + void VXOR(ppu_opcode_t op); + + void TDI(ppu_opcode_t op); + void TWI(ppu_opcode_t op); + void MULLI(ppu_opcode_t op); + void SUBFIC(ppu_opcode_t op); + void CMPLI(ppu_opcode_t op); + void CMPI(ppu_opcode_t op); + void ADDIC(ppu_opcode_t op); + void ADDI(ppu_opcode_t op); + void ADDIS(ppu_opcode_t op); + void BC(ppu_opcode_t op); + void HACK(ppu_opcode_t op); + void SC(ppu_opcode_t op); + void B(ppu_opcode_t op); + void MCRF(ppu_opcode_t op); + void BCLR(ppu_opcode_t op); + void CRNOR(ppu_opcode_t op); + void CRANDC(ppu_opcode_t op); + void ISYNC(ppu_opcode_t op); + void CRXOR(ppu_opcode_t op); + void CRNAND(ppu_opcode_t op); + void CRAND(ppu_opcode_t op); + void CREQV(ppu_opcode_t op); + void CRORC(ppu_opcode_t op); + void CROR(ppu_opcode_t op); + void BCCTR(ppu_opcode_t op); + void RLWIMI(ppu_opcode_t op); + void RLWINM(ppu_opcode_t op); + void RLWNM(ppu_opcode_t op); + void ORI(ppu_opcode_t op); + void ORIS(ppu_opcode_t op); + void XORI(ppu_opcode_t op); + void XORIS(ppu_opcode_t op); + void ANDI(ppu_opcode_t op); + void ANDIS(ppu_opcode_t op); + void RLDICL(ppu_opcode_t op); + void RLDICR(ppu_opcode_t op); + void RLDIC(ppu_opcode_t op); + void RLDIMI(ppu_opcode_t op); + void RLDCL(ppu_opcode_t op); + void RLDCR(ppu_opcode_t op); + void CMP(ppu_opcode_t op); + void TW(ppu_opcode_t op); + void LVSL(ppu_opcode_t op); + void LVEBX(ppu_opcode_t op); + void SUBFC(ppu_opcode_t op); + void MULHDU(ppu_opcode_t op); + void ADDC(ppu_opcode_t op); + void MULHWU(ppu_opcode_t op); + void MFOCRF(ppu_opcode_t op); + void LWARX(ppu_opcode_t op); + void LDX(ppu_opcode_t op); + void LWZX(ppu_opcode_t op); + void SLW(ppu_opcode_t op); + void CNTLZW(ppu_opcode_t op); + void SLD(ppu_opcode_t op); + void AND(ppu_opcode_t op); + void CMPL(ppu_opcode_t op); + void LVSR(ppu_opcode_t op); + void LVEHX(ppu_opcode_t op); + void SUBF(ppu_opcode_t op); + void LDUX(ppu_opcode_t op); + void DCBST(ppu_opcode_t op); + void LWZUX(ppu_opcode_t op); + void CNTLZD(ppu_opcode_t op); + void ANDC(ppu_opcode_t op); + void TD(ppu_opcode_t op); + void LVEWX(ppu_opcode_t op); + void MULHD(ppu_opcode_t op); + void MULHW(ppu_opcode_t op); + void LDARX(ppu_opcode_t op); + void DCBF(ppu_opcode_t op); + void LBZX(ppu_opcode_t op); + void LVX(ppu_opcode_t op); + void NEG(ppu_opcode_t op); + void LBZUX(ppu_opcode_t op); + void NOR(ppu_opcode_t op); + void STVEBX(ppu_opcode_t op); + void SUBFE(ppu_opcode_t op); + void ADDE(ppu_opcode_t op); + void MTOCRF(ppu_opcode_t op); + void STDX(ppu_opcode_t op); + void STWCX(ppu_opcode_t op); + void STWX(ppu_opcode_t op); + void STVEHX(ppu_opcode_t op); + void STDUX(ppu_opcode_t op); + void STWUX(ppu_opcode_t op); + void STVEWX(ppu_opcode_t op); + void SUBFZE(ppu_opcode_t op); + void ADDZE(ppu_opcode_t op); + void STDCX(ppu_opcode_t op); + void STBX(ppu_opcode_t op); + void STVX(ppu_opcode_t op); + void MULLD(ppu_opcode_t op); + void SUBFME(ppu_opcode_t op); + void ADDME(ppu_opcode_t op); + void MULLW(ppu_opcode_t op); + void DCBTST(ppu_opcode_t op); + void STBUX(ppu_opcode_t op); + void ADD(ppu_opcode_t op); + void DCBT(ppu_opcode_t op); + void LHZX(ppu_opcode_t op); + void EQV(ppu_opcode_t op); + void ECIWX(ppu_opcode_t op); + void LHZUX(ppu_opcode_t op); + void XOR(ppu_opcode_t op); + void MFSPR(ppu_opcode_t op); + void LWAX(ppu_opcode_t op); + void DST(ppu_opcode_t op); + void LHAX(ppu_opcode_t op); + void LVXL(ppu_opcode_t op); + void MFTB(ppu_opcode_t op); + void LWAUX(ppu_opcode_t op); + void DSTST(ppu_opcode_t op); + void LHAUX(ppu_opcode_t op); + void STHX(ppu_opcode_t op); + void ORC(ppu_opcode_t op); + void ECOWX(ppu_opcode_t op); + void STHUX(ppu_opcode_t op); + void OR(ppu_opcode_t op); + void DIVDU(ppu_opcode_t op); + void DIVWU(ppu_opcode_t op); + void MTSPR(ppu_opcode_t op); + void DCBI(ppu_opcode_t op); + void NAND(ppu_opcode_t op); + void STVXL(ppu_opcode_t op); + void DIVD(ppu_opcode_t op); + void DIVW(ppu_opcode_t op); + void LVLX(ppu_opcode_t op); + void LDBRX(ppu_opcode_t op); + void LSWX(ppu_opcode_t op); + void LWBRX(ppu_opcode_t op); + void LFSX(ppu_opcode_t op); + void SRW(ppu_opcode_t op); + void SRD(ppu_opcode_t op); + void LVRX(ppu_opcode_t op); + void LSWI(ppu_opcode_t op); + void LFSUX(ppu_opcode_t op); + void SYNC(ppu_opcode_t op); + void LFDX(ppu_opcode_t op); + void LFDUX(ppu_opcode_t op); + void STVLX(ppu_opcode_t op); + void STDBRX(ppu_opcode_t op); + void STSWX(ppu_opcode_t op); + void STWBRX(ppu_opcode_t op); + void STFSX(ppu_opcode_t op); + void STVRX(ppu_opcode_t op); + void STFSUX(ppu_opcode_t op); + void STSWI(ppu_opcode_t op); + void STFDX(ppu_opcode_t op); + void STFDUX(ppu_opcode_t op); + void LVLXL(ppu_opcode_t op); + void LHBRX(ppu_opcode_t op); + void SRAW(ppu_opcode_t op); + void SRAD(ppu_opcode_t op); + void LVRXL(ppu_opcode_t op); + void DSS(ppu_opcode_t op); + void SRAWI(ppu_opcode_t op); + void SRADI(ppu_opcode_t op); + void EIEIO(ppu_opcode_t op); + void STVLXL(ppu_opcode_t op); + void STHBRX(ppu_opcode_t op); + void EXTSH(ppu_opcode_t op); + void STVRXL(ppu_opcode_t op); + void EXTSB(ppu_opcode_t op); + void STFIWX(ppu_opcode_t op); + void EXTSW(ppu_opcode_t op); + void ICBI(ppu_opcode_t op); + void DCBZ(ppu_opcode_t op); + void LWZ(ppu_opcode_t op); + void LWZU(ppu_opcode_t op); + void LBZ(ppu_opcode_t op); + void LBZU(ppu_opcode_t op); + void STW(ppu_opcode_t op); + void STWU(ppu_opcode_t op); + void STB(ppu_opcode_t op); + void STBU(ppu_opcode_t op); + void LHZ(ppu_opcode_t op); + void LHZU(ppu_opcode_t op); + void LHA(ppu_opcode_t op); + void LHAU(ppu_opcode_t op); + void STH(ppu_opcode_t op); + void STHU(ppu_opcode_t op); + void LMW(ppu_opcode_t op); + void STMW(ppu_opcode_t op); + void LFS(ppu_opcode_t op); + void LFSU(ppu_opcode_t op); + void LFD(ppu_opcode_t op); + void LFDU(ppu_opcode_t op); + void STFS(ppu_opcode_t op); + void STFSU(ppu_opcode_t op); + void STFD(ppu_opcode_t op); + void STFDU(ppu_opcode_t op); + void LD(ppu_opcode_t op); + void LDU(ppu_opcode_t op); + void LWA(ppu_opcode_t op); + void STD(ppu_opcode_t op); + void STDU(ppu_opcode_t op); + + void FDIVS(ppu_opcode_t op); + void FSUBS(ppu_opcode_t op); + void FADDS(ppu_opcode_t op); + void FSQRTS(ppu_opcode_t op); + void FRES(ppu_opcode_t op); + void FMULS(ppu_opcode_t op); + void FMADDS(ppu_opcode_t op); + void FMSUBS(ppu_opcode_t op); + void FNMSUBS(ppu_opcode_t op); + void FNMADDS(ppu_opcode_t op); + void MTFSB1(ppu_opcode_t op); + void MCRFS(ppu_opcode_t op); + void MTFSB0(ppu_opcode_t op); + void MTFSFI(ppu_opcode_t op); + void MFFS(ppu_opcode_t op); + void MTFSF(ppu_opcode_t op); + void FCMPU(ppu_opcode_t op); + void FRSP(ppu_opcode_t op); + void FCTIW(ppu_opcode_t op); + void FCTIWZ(ppu_opcode_t op); + void FDIV(ppu_opcode_t op); + void FSUB(ppu_opcode_t op); + void FADD(ppu_opcode_t op); + void FSQRT(ppu_opcode_t op); + void FSEL(ppu_opcode_t op); + void FMUL(ppu_opcode_t op); + void FRSQRTE(ppu_opcode_t op); + void FMSUB(ppu_opcode_t op); + void FMADD(ppu_opcode_t op); + void FNMSUB(ppu_opcode_t op); + void FNMADD(ppu_opcode_t op); + void FCMPO(ppu_opcode_t op); + void FNEG(ppu_opcode_t op); + void FMR(ppu_opcode_t op); + void FNABS(ppu_opcode_t op); + void FABS(ppu_opcode_t op); + void FCTID(ppu_opcode_t op); + void FCTIDZ(ppu_opcode_t op); + void FCFID(ppu_opcode_t op); + + void UNK(ppu_opcode_t op); +}; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index b339abedf5..a38fc75b86 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -192,6 +192,12 @@ void SPUThread::cpu_task() return custom_task(*this); } + if (g_cfg_spu_decoder.get() == spu_decoder_type::asmjit) + { + if (!spu_db) spu_db = fxm::get_always(); + return spu_recompiler_base::enter(*this); + } + g_tls_log_prefix = [] { const auto cpu = static_cast(get_current_cpu_thread()); @@ -199,12 +205,6 @@ void SPUThread::cpu_task() return fmt::format("%s [0x%05x]", cpu->get_name(), cpu->pc); }; - if (g_cfg_spu_decoder.get() == spu_decoder_type::asmjit) - { - if (!spu_db) spu_db = fxm::get_always(); - return spu_recompiler_base::enter(*this); - } - // Select opcode table const auto& table = *( g_cfg_spu_decoder.get() == spu_decoder_type::precise ? &s_spu_interpreter_precise.get_table() : diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 7a1788939b..0c4ae498c0 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -56,11 +56,7 @@ LOG_CHANNEL(sys_vm); extern std::string ppu_get_syscall_name(u64 code); -static void null_func(PPUThread& ppu) -{ - LOG_TODO(HLE, "Unimplemented syscall %s -> CELL_OK", ppu_get_syscall_name(ppu.GPR[11])); - ppu.GPR[3] = 0; -} +static constexpr ppu_function_t null_func = nullptr; // UNS = Unused // ROOT = Root @@ -920,23 +916,28 @@ extern void ppu_execute_syscall(PPUThread& ppu, u64 code) } // If autopause occures, check_status() will hold the thread till unpaused. - if (debug::autopause::pause_syscall(code) && ppu.check_status()) throw cpu_state::ret; + if (debug::autopause::pause_syscall(code) && ppu.check_status()) + { + throw cpu_state::ret; + } const auto previous_function = ppu.last_function; // TODO: use gsl::finally or something try { - g_ppu_syscall_table[code](ppu); - } - catch (EmulationStopped) - { - LOG_WARNING(PPU, "Syscall '%s' (%llu) aborted", ppu_get_syscall_name(code), code); - ppu.last_function = previous_function; - throw; + if (auto func = g_ppu_syscall_table[code]) + { + func(ppu); + } + else + { + LOG_TODO(HLE, "Unimplemented syscall %s -> CELL_OK", ppu_get_syscall_name(code)); + ppu.GPR[3] = 0; + } } catch (...) { - LOG_ERROR(PPU, "Syscall '%s' (%llu) aborted", ppu_get_syscall_name(code), code); + logs::PPU.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Syscall '%s' (%llu) aborted", ppu_get_syscall_name(code), code); ppu.last_function = previous_function; throw; } diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index d269c71245..f191aa2dba 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -13,9 +13,7 @@ logs::channel sys_ppu_thread("sys_ppu_thread", logs::level::notice); void _sys_ppu_thread_exit(PPUThread& ppu, u64 errorcode) { - sys_ppu_thread.trace("_sys_ppu_thread_exit(errorcode=0x%llx)", errorcode); - - LV2_LOCK; + sys_ppu_thread.warning("_sys_ppu_thread_exit(errorcode=0x%llx)", errorcode); // TODO: Should we really unlock mutexes? @@ -29,13 +27,17 @@ void _sys_ppu_thread_exit(PPUThread& ppu, u64 errorcode) // } //} - ppu.state += cpu_state::exit; - //ppu.handle_interrupt(); - - // Delete detached thread - if (!ppu.is_joinable) { - idm::remove(ppu.id); + LV2_LOCK; + + ppu.state += cpu_state::exit; + //ppu.handle_interrupt(); + + // Delete detached thread + if (!ppu.is_joinable) + { + idm::remove(ppu.id); + } } // Throw if this syscall was not called directly by the SC instruction (hack) diff --git a/rpcs3/Emu/Cell/lv2/sys_prx.h b/rpcs3/Emu/Cell/lv2/sys_prx.h index 7893e7918a..f9dc8578c8 100644 --- a/rpcs3/Emu/Cell/lv2/sys_prx.h +++ b/rpcs3/Emu/Cell/lv2/sys_prx.h @@ -79,6 +79,7 @@ struct lv2_prx_t bool is_started = false; std::unordered_map specials; + std::vector> func; vm::ptr argv)> start = vm::null; vm::ptr argv)> stop = vm::null; diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h index f5cff7fc3b..c2e96b46b5 100644 --- a/rpcs3/Emu/Memory/vm_ptr.h +++ b/rpcs3/Emu/Memory/vm_ptr.h @@ -178,7 +178,7 @@ namespace vm _ptr_base operator --(int) { - _ptr_base result = m_addr; + _ptr_base result = *this; m_addr = vm::cast(m_addr, HERE) - SIZE_32(T); return result; } diff --git a/rpcs3/Emu/PSP2/ARMv7Module.cpp b/rpcs3/Emu/PSP2/ARMv7Module.cpp index 69d4802e77..bf61a8fec4 100644 --- a/rpcs3/Emu/PSP2/ARMv7Module.cpp +++ b/rpcs3/Emu/PSP2/ARMv7Module.cpp @@ -91,15 +91,9 @@ extern void arm_execute_function(ARMv7Thread& cpu, u32 index) { func(cpu); } - catch (EmulationStopped) - { - LOG_WARNING(ARMv7, "Function '%s' aborted", cpu.last_function); - cpu.last_function = previous_function; - throw; - } catch (...) { - LOG_ERROR(ARMv7, "Function '%s' aborted", cpu.last_function); + logs::ARMv7.format(Emu.IsStopped() ? logs::level::warning : logs::level::error, "Function '%s' aborted", cpu.last_function); cpu.last_function = previous_function; throw; } diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 4db13bc6fd..98ba74fb37 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -395,7 +395,10 @@ void Emulator::Stop() idm::select([](u32, cpu_thread& cpu) { cpu.state += cpu_state::dbg_global_stop; - cpu->lock_notify(); + cpu->lock(); + cpu->set_exception(std::make_exception_ptr(EmulationStopped())); + cpu->unlock(); + cpu->notify(); }); } diff --git a/rpcs3/Gui/SettingsDialog.cpp b/rpcs3/Gui/SettingsDialog.cpp index 95226fcf50..294b086415 100644 --- a/rpcs3/Gui/SettingsDialog.cpp +++ b/rpcs3/Gui/SettingsDialog.cpp @@ -340,7 +340,6 @@ SettingsDialog::SettingsDialog(wxWindow* parent) radiobox_pad_helper ppu_decoder_modes({ "Core", "PPU Decoder" }); rbox_ppu_decoder = new wxRadioBox(p_core, wxID_ANY, "PPU Decoder", wxDefaultPosition, wxSize(-1, -1), ppu_decoder_modes, 1); pads.emplace_back(std::make_unique(std::move(ppu_decoder_modes), rbox_ppu_decoder)); - rbox_ppu_decoder->Enable(2, false); // TODO radiobox_pad_helper spu_decoder_modes({ "Core", "SPU Decoder" }); rbox_spu_decoder = new wxRadioBox(p_core, wxID_ANY, "SPU Decoder", wxDefaultPosition, wxSize(-1, -1), spu_decoder_modes, 1); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 9976034e61..e676f88c2b 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -114,6 +114,12 @@ + + NotUsing + + + NotUsing + @@ -425,6 +431,8 @@ + + @@ -664,4 +672,4 @@ - \ No newline at end of file + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 8dbc1cf8dd..6cb640edc1 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -641,6 +641,12 @@ Emu\Cell + + Emu\Cell + + + Emu\CPU + Emu\Cell\Modules @@ -1399,6 +1405,12 @@ Emu\Cell + + Emu\Cell + + + Emu\CPU + Header Files diff --git a/rpcs3/rpcs3.cpp b/rpcs3/rpcs3.cpp index 260f612a2c..e645e2d82c 100644 --- a/rpcs3/rpcs3.cpp +++ b/rpcs3/rpcs3.cpp @@ -34,11 +34,10 @@ #include "Emu/Audio/Null/NullAudioThread.h" #include "Emu/Audio/AL/OpenALThread.h" #ifdef _MSC_VER -#include "Emu/RSX/VK/VKGSRender.h" #include "Emu/RSX/D3D12/D3D12GSRender.h" #endif - #ifdef _WIN32 +#include "Emu/RSX/VK/VKGSRender.h" #include "Emu/Audio/XAudio2/XAudio2Thread.h" #include #endif @@ -97,6 +96,8 @@ cfg::map_entry()>> g_cfg_gs_render(cfg:: { "OpenGL", PURE_EXPR(std::make_shared()) }, #ifdef _MSC_VER { "D3D12", PURE_EXPR(std::make_shared()) }, +#endif +#ifdef _WIN32 { "Vulkan", PURE_EXPR(std::make_shared()) }, #endif }); diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index 95857b0a30..4517df7b61 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -8,9 +8,9 @@ #define NOMINMAX -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif +//#ifndef __STDC_CONSTANT_MACROS +//#define __STDC_CONSTANT_MACROS +//#endif #if defined(MSVC_CRT_MEMLEAK_DETECTION) && defined(_DEBUG) && !defined(DBG_NEW) #define DBG_NEW new ( _NORMAL_BLOCK , __FILE__ , __LINE__ ) diff --git a/rpcs3_llvm.props b/rpcs3_llvm.props index cd5c07532a..6dde5d8d9d 100644 --- a/rpcs3_llvm.props +++ b/rpcs3_llvm.props @@ -10,7 +10,7 @@ ..\llvm_build\Debug\lib ..\llvm_build\Release\lib - LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMipa.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib + LLVMProfileData.lib;LLVMDebugInfoCodeView.lib;LLVMInstrumentation.lib;LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib;LLVMipo.lib