diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 19a725702c..39a9d01bf0 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -8,7 +8,7 @@ #define IS_LE_MACHINE // only draft -union u128 +union v128 { u64 _u64[2]; s64 _s64[2]; @@ -171,22 +171,22 @@ union u128 } _bit; - static u128 from64(u64 _0, u64 _1 = 0) + static v128 from64(u64 _0, u64 _1 = 0) { - u128 ret; + v128 ret; ret._u64[0] = _0; ret._u64[1] = _1; return ret; } - static u128 from64r(u64 _1, u64 _0 = 0) + static v128 from64r(u64 _1, u64 _0 = 0) { return from64(_0, _1); } - static u128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0) + static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0) { - u128 ret; + v128 ret; ret._u32[0] = _0; ret._u32[1] = _1; ret._u32[2] = _2; @@ -194,141 +194,141 @@ union u128 return ret; } - static u128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0) + static v128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0) { return from32(_0, _1, _2, _3); } - static u128 from32p(u32 value) + static v128 from32p(u32 value) { - u128 ret; + v128 ret; ret.vi = _mm_set1_epi32(static_cast(value)); return ret; } - static u128 from16p(u16 value) + static v128 from16p(u16 value) { - u128 ret; + v128 ret; ret.vi = _mm_set1_epi16(static_cast(value)); return ret; } - static u128 from8p(u8 value) + static v128 from8p(u8 value) { - u128 ret; + v128 ret; ret.vi = _mm_set1_epi8(static_cast(value)); return ret; } - static u128 fromBit(u32 bit) + static v128 fromBit(u32 bit) { - u128 ret = {}; + v128 ret = {}; ret._bit[bit] = true; return ret; } - static u128 fromV(__m128i value) + static v128 fromV(__m128i value) { - u128 ret; + v128 ret; ret.vi = value; return ret; } - static u128 fromF(__m128 value) + static v128 fromF(__m128 value) { - u128 ret; + v128 ret; ret.vf = value; return ret; } - static u128 fromD(__m128d value) + static v128 fromD(__m128d value) { - u128 ret; + v128 ret; ret.vd = value; return ret; } - static force_inline u128 add8(const u128& left, const u128& right) + static force_inline v128 add8(const v128& left, const v128& right) { return fromV(_mm_add_epi8(left.vi, right.vi)); } - static force_inline u128 add16(const u128& left, const u128& right) + static force_inline v128 add16(const v128& left, const v128& right) { return fromV(_mm_add_epi16(left.vi, right.vi)); } - static force_inline u128 add32(const u128& left, const u128& right) + static force_inline v128 add32(const v128& left, const v128& right) { return fromV(_mm_add_epi32(left.vi, right.vi)); } - static force_inline u128 addfs(const u128& left, const u128& right) + static force_inline v128 addfs(const v128& left, const v128& right) { return fromF(_mm_add_ps(left.vf, right.vf)); } - static force_inline u128 addfd(const u128& left, const u128& right) + static force_inline v128 addfd(const v128& left, const v128& right) { return fromD(_mm_add_pd(left.vd, right.vd)); } - static force_inline u128 sub8(const u128& left, const u128& right) + static force_inline v128 sub8(const v128& left, const v128& right) { return fromV(_mm_sub_epi8(left.vi, right.vi)); } - static force_inline u128 sub16(const u128& left, const u128& right) + static force_inline v128 sub16(const v128& left, const v128& right) { return fromV(_mm_sub_epi16(left.vi, right.vi)); } - static force_inline u128 sub32(const u128& left, const u128& right) + static force_inline v128 sub32(const v128& left, const v128& right) { return fromV(_mm_sub_epi32(left.vi, right.vi)); } - static force_inline u128 subfs(const u128& left, const u128& right) + static force_inline v128 subfs(const v128& left, const v128& right) { return fromF(_mm_sub_ps(left.vf, right.vf)); } - static force_inline u128 subfd(const u128& left, const u128& right) + static force_inline v128 subfd(const v128& left, const v128& right) { return fromD(_mm_sub_pd(left.vd, right.vd)); } - static force_inline u128 maxu8(const u128& left, const u128& right) + static force_inline v128 maxu8(const v128& left, const v128& right) { return fromV(_mm_max_epu8(left.vi, right.vi)); } - static force_inline u128 minu8(const u128& left, const u128& right) + static force_inline v128 minu8(const v128& left, const v128& right) { return fromV(_mm_min_epu8(left.vi, right.vi)); } - static force_inline u128 eq8(const u128& left, const u128& right) + static force_inline v128 eq8(const v128& left, const v128& right) { return fromV(_mm_cmpeq_epi8(left.vi, right.vi)); } - static force_inline u128 eq16(const u128& left, const u128& right) + static force_inline v128 eq16(const v128& left, const v128& right) { return fromV(_mm_cmpeq_epi16(left.vi, right.vi)); } - static force_inline u128 eq32(const u128& left, const u128& right) + static force_inline v128 eq32(const v128& left, const v128& right) { return fromV(_mm_cmpeq_epi32(left.vi, right.vi)); } - bool operator == (const u128& right) const + bool operator == (const v128& right) const { return (_u64[0] == right._u64[0]) && (_u64[1] == right._u64[1]); } - bool operator != (const u128& right) const + bool operator != (const v128& right) const { return (_u64[0] != right._u64[0]) || (_u64[1] != right._u64[1]); } @@ -344,7 +344,7 @@ union u128 } // result = (~left) & (right) - static force_inline u128 andnot(const u128& left, const u128& right) + static force_inline v128 andnot(const v128& left, const v128& right) { return fromV(_mm_andnot_si128(left.vi, right.vi)); } @@ -358,46 +358,46 @@ union u128 std::string to_xyzw() const; - static force_inline u128 byteswap(const u128 val) + static force_inline v128 byteswap(const v128 val) { return fromV(_mm_shuffle_epi8(val.vi, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))); } }; -CHECK_SIZE_ALIGN(u128, 16, 16); +CHECK_SIZE_ALIGN(v128, 16, 16); -inline u128 operator |(const u128& left, const u128& right) +inline v128 operator |(const v128& left, const v128& right) { - return u128::fromV(_mm_or_si128(left.vi, right.vi)); + return v128::fromV(_mm_or_si128(left.vi, right.vi)); } -inline u128 operator &(const u128& left, const u128& right) +inline v128 operator &(const v128& left, const v128& right) { - return u128::fromV(_mm_and_si128(left.vi, right.vi)); + return v128::fromV(_mm_and_si128(left.vi, right.vi)); } -inline u128 operator ^(const u128& left, const u128& right) +inline v128 operator ^(const v128& left, const v128& right) { - return u128::fromV(_mm_xor_si128(left.vi, right.vi)); + return v128::fromV(_mm_xor_si128(left.vi, right.vi)); } -inline u128 operator ~(const u128& other) +inline v128 operator ~(const v128& other) { - return u128::from64(~other._u64[0], ~other._u64[1]); + return v128::from64(~other._u64[0], ~other._u64[1]); } -static force_inline u128 sync_val_compare_and_swap(volatile u128* dest, u128 comp, u128 exch) +static force_inline v128 sync_val_compare_and_swap(volatile v128* dest, v128 comp, v128 exch) { #if !defined(_MSC_VER) auto res = __sync_val_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch); - return (u128&)res; + return (v128&)res; #else _InterlockedCompareExchange128((volatile long long*)dest, exch._u64[1], exch._u64[0], (long long*)&comp); return comp; #endif } -static force_inline bool sync_bool_compare_and_swap(volatile u128* dest, u128 comp, u128 exch) +static force_inline bool sync_bool_compare_and_swap(volatile v128* dest, v128 comp, v128 exch) { #if !defined(_MSC_VER) return __sync_bool_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch); @@ -406,38 +406,38 @@ static force_inline bool sync_bool_compare_and_swap(volatile u128* dest, u128 co #endif } -static force_inline u128 sync_lock_test_and_set(volatile u128* dest, u128 value) +static force_inline v128 sync_lock_test_and_set(volatile v128* dest, v128 value) { while (true) { - const u128 old = *(u128*)dest; + const v128 old = *(v128*)dest; if (sync_bool_compare_and_swap(dest, old, value)) return old; } } -static force_inline u128 sync_fetch_and_or(volatile u128* dest, u128 value) +static force_inline v128 sync_fetch_and_or(volatile v128* dest, v128 value) { while (true) { - const u128 old = *(u128*)dest; + const v128 old = *(v128*)dest; if (sync_bool_compare_and_swap(dest, old, value | old)) return old; } } -static force_inline u128 sync_fetch_and_and(volatile u128* dest, u128 value) +static force_inline v128 sync_fetch_and_and(volatile v128* dest, v128 value) { while (true) { - const u128 old = *(u128*)dest; + const v128 old = *(v128*)dest; if (sync_bool_compare_and_swap(dest, old, value & old)) return old; } } -static force_inline u128 sync_fetch_and_xor(volatile u128* dest, u128 value) +static force_inline v128 sync_fetch_and_xor(volatile v128* dest, v128 value) { while (true) { - const u128 old = *(u128*)dest; + const v128 old = *(v128*)dest; if (sync_bool_compare_and_swap(dest, old, value ^ old)) return old; } } @@ -488,14 +488,14 @@ template struct se_t template struct se_t { - static force_inline u128 to(const T& src) + static force_inline v128 to(const T& src) { - return u128::byteswap((u128&)src); + return v128::byteswap((v128&)src); } - static force_inline T from(const u128& src) + static force_inline T from(const v128& src) { - const u128 res = u128::byteswap(src); + const v128 res = v128::byteswap(src); return (T&)res; } }; @@ -553,7 +553,7 @@ template struct be_storage template struct be_storage { - using type = u128; + using type = v128; }; template using be_storage_t = typename be_storage::type; @@ -602,7 +602,7 @@ public: #endif static_assert(!std::is_class::value, "be_t<> error: invalid type (class or structure)"); - static_assert(!std::is_union::value || std::is_same::value, "be_t<> error: invalid type (union)"); + static_assert(!std::is_union::value || std::is_same::value, "be_t<> error: invalid type (union)"); static_assert(!std::is_pointer::value, "be_t<> error: invalid type (pointer)"); static_assert(!std::is_reference::value, "be_t<> error: invalid type (reference)"); static_assert(!std::is_array::value, "be_t<> error: invalid type (array)"); @@ -748,7 +748,7 @@ template struct is_be_t : public std::integral_constant< // to_be_t helper struct template struct to_be { - using type = std::conditional_t::value || std::is_enum::value || std::is_same::value, be_t, T>; + using type = std::conditional_t::value || std::is_enum::value || std::is_same::value, be_t, T>; }; // be_t if possible, T otherwise @@ -781,7 +781,7 @@ public: type m_data; // don't access directly static_assert(!std::is_class::value, "le_t<> error: invalid type (class or structure)"); - static_assert(!std::is_union::value || std::is_same::value, "le_t<> error: invalid type (union)"); + static_assert(!std::is_union::value || std::is_same::value, "le_t<> error: invalid type (union)"); static_assert(!std::is_pointer::value, "le_t<> error: invalid type (pointer)"); static_assert(!std::is_reference::value, "le_t<> error: invalid type (reference)"); static_assert(!std::is_array::value, "le_t<> error: invalid type (array)"); @@ -863,7 +863,7 @@ template struct is_le_t : public std::integral_constant< template struct to_le { - using type = std::conditional_t::value || std::is_enum::value || std::is_same::value, le_t, T>; + using type = std::conditional_t::value || std::is_enum::value || std::is_same::value, le_t, T>; }; // le_t if possible, T otherwise diff --git a/Utilities/StrFmt.cpp b/Utilities/StrFmt.cpp index 3c4182a854..fefc881637 100644 --- a/Utilities/StrFmt.cpp +++ b/Utilities/StrFmt.cpp @@ -5,12 +5,12 @@ #include #pragma warning(pop) -std::string u128::to_hex() const +std::string v128::to_hex() const { return fmt::format("%016llx%016llx", _u64[1], _u64[0]); } -std::string u128::to_xyzw() const +std::string v128::to_xyzw() const { return fmt::Format("x: %g y: %g z: %g w: %g", _f[3], _f[2], _f[1], _f[0]); } diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 67ad785449..75b01dbbf7 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -492,7 +492,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz typedef CONTEXT x64_context; #define X64REG(context, reg) (&(&(context)->Rax)[reg]) -#define XMMREG(context, reg) (reinterpret_cast(&(&(context)->Xmm0)[reg])) +#define XMMREG(context, reg) (reinterpret_cast(&(&(context)->Xmm0)[reg])) #define EFLAGS(context) ((context)->EFlags) #else @@ -502,7 +502,7 @@ typedef ucontext_t x64_context; #ifdef __APPLE__ #define X64REG(context, reg) (darwin_x64reg(context, reg)) -#define XMMREG(context, reg) (reinterpret_cast(&(context)->uc_mcontext->__fs.__fpu_xmm0.__xmm_reg[reg])) +#define XMMREG(context, reg) (reinterpret_cast(&(context)->uc_mcontext->__fs.__fpu_xmm0.__xmm_reg[reg])) #define EFLAGS(context) ((context)->uc_mcontext->__ss.__rflags) uint64_t* darwin_x64reg(x64_context *context, int reg) @@ -560,7 +560,7 @@ static const reg_table_t reg_table[17] = }; #define X64REG(context, reg) (&(context)->uc_mcontext.gregs[reg_table[reg]]) -#define XMMREG(context, reg) (reinterpret_cast(&(context)->uc_mcontext.fpregs->_xmm[reg])) +#define XMMREG(context, reg) (reinterpret_cast(&(context)->uc_mcontext.fpregs->_xmm[reg])) #define EFLAGS(context) ((context)->uc_mcontext.gregs[REG_EFL]) #endif // __APPLE__ diff --git a/rpcs3/Emu/ARMv7/PSVFuncList.h b/rpcs3/Emu/ARMv7/PSVFuncList.h index 13bf6af807..7613e7c391 100644 --- a/rpcs3/Emu/ARMv7/PSVFuncList.h +++ b/rpcs3/Emu/ARMv7/PSVFuncList.h @@ -136,7 +136,7 @@ namespace psv_func_detail struct bind_arg { static_assert(v_count <= 0, "TODO: Unsupported argument type (vector)"); - static_assert(std::is_same, u128>::value, "Invalid function argument type for ARG_VECTOR"); + static_assert(std::is_same, v128>::value, "Invalid function argument type for ARG_VECTOR"); force_inline static T get_arg(ARMv7Context& context) { @@ -294,7 +294,7 @@ namespace psv_func_detail //template //struct bind_result //{ - // static_assert(std::is_same, u128>::value, "Invalid function result type for ARG_VECTOR"); + // static_assert(std::is_same, v128>::value, "Invalid function result type for ARG_VECTOR"); // static force_inline void put_result(ARMv7Context& context, const T& result) // { @@ -307,7 +307,7 @@ namespace psv_func_detail static_assert(!std::is_pointer::value, "Invalid function result type (pointer)"); static_assert(!std::is_reference::value, "Invalid function result type (reference)"); static const bool is_float = std::is_floating_point::value; - static const bool is_vector = std::is_same, u128>::value; + static const bool is_vector = std::is_same, v128>::value; static const arg_class value = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL); }; @@ -316,7 +316,7 @@ namespace psv_func_detail { // TODO: check calculations static const bool is_float = std::is_floating_point::value; - static const bool is_vector = std::is_same, u128>::value; + static const bool is_vector = std::is_same, v128>::value; static const bool is_context = std::is_same::value; static const bool is_variadic = std::is_same, armv7_va_args_t>::value; static const bool is_general = !is_float && !is_vector && !is_context && !is_variadic; diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 21b45365b5..d2e2d15524 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -91,7 +91,7 @@ void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op) @@ -108,7 +108,7 @@ void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va]; const auto b = CPU.VPR[op.vb]; - const auto s = u128::add32(a, b); // a + b + const auto s = v128::add32(a, b); // a + b const auto m = (a ^ s) & (b ^ s); // overflow bit const auto x = _mm_srai_epi32(m.vi, 31); // saturation mask const auto y = _mm_srai_epi32(_mm_and_si128(s.vi, m.vi), 31); // positive saturation mask @@ -117,7 +117,7 @@ void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) @@ -127,7 +127,7 @@ void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) @@ -137,7 +137,7 @@ void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op) @@ -160,30 +160,30 @@ void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va]; - const auto b = u128::add8(CPU.VPR[op.vb], u128::from8p(1)); // add 1 - const auto summ = u128::add8(a, b) & u128::from8p(0xfe); - const auto sign = u128::from8p(0x80); - const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq8(b, sign)) & sign; // calculate msb + const auto b = v128::add8(CPU.VPR[op.vb], v128::from8p(1)); // add 1 + const auto summ = v128::add8(a, b) & v128::from8p(0xfe); + const auto sign = v128::from8p(0x80); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq8(b, sign)) & sign; // calculate msb CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi64(summ.vi, 1)); } void ppu_interpreter::VAVGSH(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va]; - const auto b = u128::add16(CPU.VPR[op.vb], u128::from16p(1)); // add 1 - const auto summ = u128::add16(a, b); - const auto sign = u128::from16p(0x8000); - const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq16(b, sign)) & sign; // calculate msb + const auto b = v128::add16(CPU.VPR[op.vb], v128::from16p(1)); // add 1 + const auto summ = v128::add16(a, b); + const auto sign = v128::from16p(0x8000); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq16(b, sign)) & sign; // calculate msb CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi16(summ.vi, 1)); } void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va]; - const auto b = u128::add32(CPU.VPR[op.vb], u128::from32p(1)); // add 1 - const auto summ = u128::add32(a, b); - const auto sign = u128::from32p(0x80000000); - const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq32(b, sign)) & sign; // calculate msb + const auto b = v128::add32(CPU.VPR[op.vb], v128::from32p(1)); // add 1 + const auto summ = v128::add32(a, b); + const auto sign = v128::from32p(0x80000000); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq32(b, sign)) & sign; // calculate msb CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi32(summ.vi, 1)); } @@ -201,7 +201,7 @@ void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va]; const auto b = CPU.VPR[op.vb]; - const auto summ = u128::add32(u128::add32(a, b), u128::from32p(1)); + const auto summ = v128::add32(v128::add32(a, b), v128::from32p(1)); const auto carry = _mm_xor_si128(_mm_slli_epi32(sse_cmpgt_epu32(summ.vi, a.vi), 31), _mm_set1_epi32(0x80000000)); CPU.VPR[op.vd].vi = _mm_or_si128(carry, _mm_srli_epi32(summ.vi, 1)); } @@ -248,7 +248,7 @@ void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) @@ -260,7 +260,7 @@ void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) @@ -272,7 +272,7 @@ void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) @@ -727,8 +727,8 @@ void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; + v128 VA = CPU.VPR[op.va]; + v128 VB = CPU.VPR[op.vb]; for (uint h = 0; h < 4; h++) { u16 bb7 = VB._u8[15 - (h * 4 + 0)] & 0x1; @@ -764,8 +764,8 @@ void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op) { //CPU.VPR[op.vd].vi = _mm_packus_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; + v128 VA = CPU.VPR[op.va]; + v128 VB = CPU.VPR[op.vb]; for (uint h = 0; h < 4; h++) { s32 result = VA._s32[h]; @@ -798,8 +798,8 @@ void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; + v128 VA = CPU.VPR[op.va]; + v128 VB = CPU.VPR[op.vb]; for (uint b = 0; b < 8; b++) { CPU.VPR[op.vd]._u8[b + 8] = VA._u8[b * 2]; @@ -809,8 +809,8 @@ void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; + v128 VA = CPU.VPR[op.va]; + v128 VB = CPU.VPR[op.vb]; for (uint b = 0; b < 8; b++) { u16 result = VA._u16[b]; @@ -835,8 +835,8 @@ void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; + v128 VA = CPU.VPR[op.va]; + v128 VB = CPU.VPR[op.vb]; for (uint h = 0; h < 4; h++) { CPU.VPR[op.vd]._u16[h + 4] = VA._u16[h * 2]; @@ -846,8 +846,8 @@ void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VPKUWUS(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; + v128 VA = CPU.VPR[op.va]; + v128 VB = CPU.VPR[op.vb]; for (uint h = 0; h < 4; h++) { u32 result = VA._u32[h]; @@ -949,7 +949,7 @@ void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSL(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; + v128 VA = CPU.VPR[op.va]; u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7; CPU.VPR[op.vd]._u8[0] = VA._u8[0] << sh; @@ -989,7 +989,7 @@ void ppu_interpreter::VSLH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSLO(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; + v128 VA = CPU.VPR[op.va]; u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf; CPU.VPR[op.vd].clear(); @@ -1068,7 +1068,7 @@ void ppu_interpreter::VSPLTW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSR(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; + v128 VA = CPU.VPR[op.va]; u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7; CPU.VPR[op.vd]._u8[15] = VA._u8[15] >> sh; @@ -1120,7 +1120,7 @@ void ppu_interpreter::VSRH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSRO(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; + v128 VA = CPU.VPR[op.va]; u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf; CPU.VPR[op.vd].clear(); @@ -1149,7 +1149,7 @@ void ppu_interpreter::VSUBCUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op) @@ -1183,7 +1183,7 @@ void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) @@ -1193,7 +1193,7 @@ void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) @@ -1203,7 +1203,7 @@ void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = u128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]); + CPU.VPR[op.vd] = v128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBUWS(PPUThread& CPU, ppu_opcode_t op) @@ -1334,7 +1334,7 @@ void ppu_interpreter::VSUM4UBS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op) { - u128 VB = CPU.VPR[op.vb]; + v128 VB = CPU.VPR[op.vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[8 + w * 2 + 1] >> 7; // signed shift sign extends @@ -1346,7 +1346,7 @@ void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op) { - u128 VB = CPU.VPR[op.vb]; + v128 VB = CPU.VPR[op.vb]; for (uint h = 0; h < 8; h++) { CPU.VPR[op.vd]._s16[h] = VB._s8[8 + h]; @@ -1355,7 +1355,7 @@ void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op) { - u128 VB = CPU.VPR[op.vb]; + v128 VB = CPU.VPR[op.vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[op.vd]._s32[w] = VB._s16[4 + w]; @@ -1364,7 +1364,7 @@ void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op) { - u128 VB = CPU.VPR[op.vb]; + v128 VB = CPU.VPR[op.vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[w * 2 + 1] >> 7; // signed shift sign extends @@ -1376,7 +1376,7 @@ void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op) { - u128 VB = CPU.VPR[op.vb]; + v128 VB = CPU.VPR[op.vb]; for (uint h = 0; h < 8; h++) { CPU.VPR[op.vd]._s16[h] = VB._s8[h]; @@ -1385,7 +1385,7 @@ void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VUPKLSH(PPUThread& CPU, ppu_opcode_t op) { - u128 VB = CPU.VPR[op.vb]; + v128 VB = CPU.VPR[op.vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[op.vd]._s32[w] = VB._s16[w]; diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index c9c4ea7482..7e9ebc0419 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -1055,8 +1055,8 @@ private: } void VMRGHB(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 8; h++) { CPU.VPR[vd]._u8[15 - h*2] = VA._u8[15 - h]; @@ -1065,8 +1065,8 @@ private: } void VMRGHH(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[vd]._u16[7 - w*2] = VA._u16[7 - w]; @@ -1075,8 +1075,8 @@ private: } void VMRGHW(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint d = 0; d < 2; d++) { CPU.VPR[vd]._u32[3 - d*2] = VA._u32[3 - d]; @@ -1085,8 +1085,8 @@ private: } void VMRGLB(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 8; h++) { CPU.VPR[vd]._u8[15 - h*2] = VA._u8[7 - h]; @@ -1095,8 +1095,8 @@ private: } void VMRGLH(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[vd]._u16[7 - w*2] = VA._u16[3 - w]; @@ -1105,8 +1105,8 @@ private: } void VMRGLW(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint d = 0; d < 2; d++) { CPU.VPR[vd]._u32[3 - d*2] = VA._u32[1 - d]; @@ -1339,8 +1339,8 @@ private: } void VPKPX(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { u16 bb7 = VB._u8[15 - (h*4 + 0)] & 0x1; @@ -1358,8 +1358,8 @@ private: } void VPKSHSS(u32 vd, u32 va, u32 vb) //nf { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { s16 result = VA._s16[b]; @@ -1395,8 +1395,8 @@ private: } void VPKSHUS(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { s16 result = VA._s16[b]; @@ -1432,8 +1432,8 @@ private: } void VPKSWSS(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { s32 result = VA._s32[h]; @@ -1469,8 +1469,8 @@ private: } void VPKSWUS(u32 vd, u32 va, u32 vb) //nf { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { s32 result = VA._s32[h]; @@ -1506,8 +1506,8 @@ private: } void VPKUHUM(u32 vd, u32 va, u32 vb) //nf { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { CPU.VPR[vd]._u8[b+8] = VA._u8[b*2]; @@ -1516,8 +1516,8 @@ private: } void VPKUHUS(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint b = 0; b < 8; b++) { u16 result = VA._u16[b]; @@ -1543,8 +1543,8 @@ private: } void VPKUWUM(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { CPU.VPR[vd]._u16[h+4] = VA._u16[h*2]; @@ -1553,8 +1553,8 @@ private: } void VPKUWUS(u32 vd, u32 va, u32 vb) //nf { - u128 VA = CPU.VPR[va]; - u128 VB = CPU.VPR[vb]; + v128 VA = CPU.VPR[va]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 4; h++) { u32 result = VA._u32[h]; @@ -1684,7 +1684,7 @@ private: } void VSL(u32 vd, u32 va, u32 vb) //nf { - u128 VA = CPU.VPR[va]; + v128 VA = CPU.VPR[va]; u8 sh = CPU.VPR[vb]._u8[0] & 0x7; CPU.VPR[vd]._u8[0] = VA._u8[0] << sh; @@ -1720,7 +1720,7 @@ private: } void VSLO(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; + v128 VA = CPU.VPR[va]; u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf; CPU.VPR[vd].clear(); @@ -1791,7 +1791,7 @@ private: } void VSR(u32 vd, u32 va, u32 vb) //nf { - u128 VA = CPU.VPR[va]; + v128 VA = CPU.VPR[va]; u8 sh = CPU.VPR[vb]._u8[0] & 0x7; CPU.VPR[vd]._u8[15] = VA._u8[15] >> sh; @@ -1837,7 +1837,7 @@ private: } void VSRO(u32 vd, u32 va, u32 vb) { - u128 VA = CPU.VPR[va]; + v128 VA = CPU.VPR[va]; u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf; CPU.VPR[vd].clear(); @@ -2121,7 +2121,7 @@ private: } void VUPKHPX(u32 vd, u32 vb) { - u128 VB = CPU.VPR[vb]; + v128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[8 + w*2 + 1] >> 7; // signed shift sign extends @@ -2132,7 +2132,7 @@ private: } void VUPKHSB(u32 vd, u32 vb) { - u128 VB = CPU.VPR[vb]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 8; h++) { CPU.VPR[vd]._s16[h] = VB._s8[8 + h]; @@ -2140,7 +2140,7 @@ private: } void VUPKHSH(u32 vd, u32 vb) { - u128 VB = CPU.VPR[vb]; + v128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[vd]._s32[w] = VB._s16[4 + w]; @@ -2148,7 +2148,7 @@ private: } void VUPKLPX(u32 vd, u32 vb) { - u128 VB = CPU.VPR[vb]; + v128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[w*2 + 1] >> 7; // signed shift sign extends @@ -2159,7 +2159,7 @@ private: } void VUPKLSB(u32 vd, u32 vb) //nf { - u128 VB = CPU.VPR[vb]; + v128 VB = CPU.VPR[vb]; for (uint h = 0; h < 8; h++) { CPU.VPR[vd]._s16[h] = VB._s8[h]; @@ -2167,7 +2167,7 @@ private: } void VUPKLSH(u32 vd, u32 vb) { - u128 VB = CPU.VPR[vb]; + v128 VB = CPU.VPR[vb]; for (uint w = 0; w < 4; w++) { CPU.VPR[vd]._s32[w] = VB._s16[w]; diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp index c1632617ad..4fa2e45d07 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp @@ -2121,7 +2121,7 @@ void Compiler::TW(u32 to, u32 ra, u32 rb) { } void Compiler::LVSL(u32 vd, u32 ra, u32 rb) { - static const u128 s_lvsl_values[] = { + static const v128 s_lvsl_values[] = { { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, @@ -2350,7 +2350,7 @@ void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { } void Compiler::LVSR(u32 vd, u32 ra, u32 rb) { - static const u128 s_lvsr_values[] = { + static const v128 s_lvsr_values[] = { { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, { 0x161718191A1B1C1D, 0x0E0F101112131415 }, diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index 39e015ab1a..c4b0916029 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -51,7 +51,7 @@ struct ppu_recompiler_llvm::PPUState { u64 GPR[32]; /// Vector purpose registers - u128 VPR[32]; + v128 VPR[32]; /// Condition register CRhdr CR; diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index bd0a98160d..e32d430205 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -462,7 +462,7 @@ public: PPCdouble FPR[32]{}; //Floating Point Register FPSCRhdr FPSCR{}; //Floating Point Status and Control Register u64 GPR[32]{}; //General-Purpose Register - u128 VPR[32]{}; + v128 VPR[32]{}; u32 vpcr = 0; CRhdr CR{}; //Condition Register diff --git a/rpcs3/Emu/Cell/SPUContext.h b/rpcs3/Emu/Cell/SPUContext.h index 2088497285..2fa42dd9da 100644 --- a/rpcs3/Emu/Cell/SPUContext.h +++ b/rpcs3/Emu/Cell/SPUContext.h @@ -2,5 +2,5 @@ struct SPUContext { - u128 gpr[128]; + v128 gpr[128]; }; diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 31ff7f37c1..6cebe2def5 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -69,17 +69,17 @@ void spu_interpreter::MFSPR(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::RDCH(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_value(op.ra)); + CPU.GPR[op.rt] = v128::from32r(CPU.get_ch_value(op.ra)); } void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_count(op.ra)); + CPU.GPR[op.rt] = v128::from32r(CPU.get_ch_count(op.ra)); } void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]); + CPU.GPR[op.rt] = v128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op) @@ -94,7 +94,7 @@ void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]); + CPU.GPR[op.rt] = v128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::NOR(SPUThread& CPU, spu_opcode_t op) @@ -106,7 +106,7 @@ void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op) { const auto a = CPU.GPR[op.ra]; const auto b = CPU.GPR[op.rb]; - CPU.GPR[op.rt] = u128::sub8(u128::maxu8(a, b), u128::minu8(a, b)); + CPU.GPR[op.rt] = v128::sub8(v128::maxu8(a, b), v128::minu8(a, b)); } void spu_interpreter::ROT(SPUThread& CPU, spu_opcode_t op) @@ -249,7 +249,7 @@ void spu_interpreter::SHLHI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::A(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]); + CPU.GPR[op.rt] = v128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op) @@ -266,7 +266,7 @@ void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]); + CPU.GPR[op.rt] = v128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::NAND(SPUThread& CPU, spu_opcode_t op) @@ -343,7 +343,7 @@ void spu_interpreter::BI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BISL(SPUThread& CPU, spu_opcode_t op) { const u32 target = SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0); - CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4); CPU.PC = target - 4; set_interrupt_status(CPU, op); } @@ -364,17 +364,17 @@ void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7))); + CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7))); } void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7))); + CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7))); } void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7))); + CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7))); } void spu_interpreter::FSM(SPUThread& CPU, spu_opcode_t op) @@ -426,28 +426,28 @@ void spu_interpreter::SHLQBYBI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CBX(SPUThread& CPU, spu_opcode_t op) { const s32 t = ~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xf; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u8[t] = 0x03; } void spu_interpreter::CHX(SPUThread& CPU, spu_opcode_t op) { const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u16[t] = 0x0203; } void spu_interpreter::CWX(SPUThread& CPU, spu_opcode_t op) { const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u32[t] = 0x00010203; } void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op) { const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; } @@ -489,34 +489,34 @@ void spu_interpreter::SHLQBY(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ORX(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]); + CPU.GPR[op.rt] = v128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]); } void spu_interpreter::CBD(SPUThread& CPU, spu_opcode_t op) { const s32 t = ~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xf; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u8[t] = 0x03; } void spu_interpreter::CHD(SPUThread& CPU, spu_opcode_t op) { const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u16[t] = 0x0203; } void spu_interpreter::CWD(SPUThread& CPU, spu_opcode_t op) { const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u32[t] = 0x00010203; } void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op) { const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; - CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; } @@ -640,7 +640,7 @@ void spu_interpreter::CLGT(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]); + CPU.GPR[op.rt] = v128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::FCGT(SPUThread& CPU, spu_opcode_t op) @@ -655,12 +655,12 @@ void spu_interpreter::DFCGT(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FA(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); + CPU.GPR[op.rt] = v128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::FS(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); + CPU.GPR[op.rt] = v128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::FM(SPUThread& CPU, spu_opcode_t op) @@ -691,12 +691,12 @@ void spu_interpreter::DFCMGT(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::DFA(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); + CPU.GPR[op.rt] = v128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::DFS(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); + CPU.GPR[op.rt] = v128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::DFM(SPUThread& CPU, spu_opcode_t op) @@ -751,12 +751,12 @@ void spu_interpreter::MPYHHU(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ADDX(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::add32(u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & u128::from32p(1)); + CPU.GPR[op.rt] = v128::add32(v128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & v128::from32p(1)); } void spu_interpreter::SFX(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = u128::sub32(u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), u128::andnot(CPU.GPR[op.rt], u128::from32p(1))); + CPU.GPR[op.rt] = v128::sub32(v128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), v128::andnot(CPU.GPR[op.rt], v128::from32p(1))); } void spu_interpreter::CGX(SPUThread& CPU, spu_opcode_t op) @@ -976,7 +976,7 @@ void spu_interpreter::LQA(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BRASL(SPUThread& CPU, spu_opcode_t op) { const u32 target = SPUOpcodes::branchTarget(0, op.i16); - CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4); CPU.PC = target - 4; } @@ -993,7 +993,7 @@ void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BRSL(SPUThread& CPU, spu_opcode_t op) { const u32 target = SPUOpcodes::branchTarget(CPU.PC, op.i16); - CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4); CPU.PC = target - 4; } @@ -1197,7 +1197,7 @@ void spu_interpreter::ILA(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::SELB(SPUThread& CPU, spu_opcode_t op) { // rt <> rc - CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | u128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]); + CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | v128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]); } void spu_interpreter::SHUFB(SPUThread& CPU, spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index ab5c430f58..b6fdfedb68 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -116,11 +116,11 @@ private: } void RDCH(u32 rt, u32 ra) { - CPU.GPR[rt] = u128::from32r(CPU.get_ch_value(ra)); + CPU.GPR[rt] = v128::from32r(CPU.get_ch_value(ra)); } void RCHCNT(u32 rt, u32 ra) { - CPU.GPR[rt] = u128::from32r(CPU.get_ch_count(ra)); + CPU.GPR[rt] = v128::from32r(CPU.get_ch_count(ra)); } void SF(u32 rt, u32 ra, u32 rb) { @@ -424,7 +424,7 @@ private: void BISL(u32 intr, u32 rt, u32 ra) { u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0); - CPU.GPR[rt] = u128::from32r(CPU.PC + 4); + CPU.GPR[rt] = v128::from32r(CPU.PC + 4); LOG5_OPCODE("branch (0x%x)", target); CPU.PC = target - 4; @@ -539,14 +539,14 @@ private: void ROTQBYBI(u32 rt, u32 ra, u32 rb) { const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; } void ROTQMBYBI(u32 rt, u32 ra, u32 rb) { const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt].clear(); for (int b = 0; b < 16 - s; b++) CPU.GPR[rt]._u8[b] = temp._u8[b + s]; @@ -554,7 +554,7 @@ private: void SHLQBYBI(u32 rt, u32 ra, u32 rb) { const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt].clear(); for (int b = s; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[b - s]; @@ -620,7 +620,7 @@ private: const int t = CPU.GPR[rb]._u32[3] & 0x7; if (t) // not an optimization, it fixes shifts { - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << t) | (temp._u32[3] >> (32 - t)); CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t)); @@ -636,7 +636,7 @@ private: const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7; if (t) // not an optimization, it fixes shifts { - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] >> t) | (temp._u32[1] << (32 - t)); CPU.GPR[rt]._u32[1] = (temp._u32[1] >> t) | (temp._u32[2] << (32 - t)); CPU.GPR[rt]._u32[2] = (temp._u32[2] >> t) | (temp._u32[3] << (32 - t)); @@ -652,7 +652,7 @@ private: const int t = CPU.GPR[rb]._u32[3] & 0x7; if (t) // not an optimization, it fixes shifts { - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << t); CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t)); @@ -666,14 +666,14 @@ private: void ROTQBY(u32 rt, u32 ra, u32 rb) { const int s = CPU.GPR[rb]._u32[3] & 0xf; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; for (int b = 0; b < 16; ++b) CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; } void ROTQMBY(u32 rt, u32 ra, u32 rb) { const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt].clear(); for (int b = 0; b < 16 - s; b++) CPU.GPR[rt]._u8[b] = temp._u8[b + s]; @@ -681,7 +681,7 @@ private: void SHLQBY(u32 rt, u32 ra, u32 rb) { const int s = CPU.GPR[rb]._u32[3] & 0x1f; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt].clear(); for (int b = s; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[b - s]; @@ -753,7 +753,7 @@ private: const int s = i7 & 0x7; if (s) // not an optimization, it fixes shifts { - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << s) | (temp._u32[3] >> (32 - s)); CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s)); @@ -769,7 +769,7 @@ private: const int s = (0 - i7) & 0x7; if (s) // not an optimization, it fixes shifts { - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] >> s) | (temp._u32[1] << (32 - s)); CPU.GPR[rt]._u32[1] = (temp._u32[1] >> s) | (temp._u32[2] << (32 - s)); CPU.GPR[rt]._u32[2] = (temp._u32[2] >> s) | (temp._u32[3] << (32 - s)); @@ -785,7 +785,7 @@ private: const int s = i7 & 0x7; if (s) // not an optimization, it fixes shifts { - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << s); CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s)); @@ -799,14 +799,14 @@ private: void ROTQBYI(u32 rt, u32 ra, s32 i7) { const int s = i7 & 0xf; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; } void ROTQMBYI(u32 rt, u32 ra, s32 i7) { const int s = (0 - i7) & 0x1f; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt].clear(); for (int b = 0; b < 16 - s; b++) CPU.GPR[rt]._u8[b] = temp._u8[b + s]; @@ -814,7 +814,7 @@ private: void SHLQBYI(u32 rt, u32 ra, s32 i7) { const int s = i7 & 0x1f; - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt].clear(); for (int b = s; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[b - s]; @@ -849,8 +849,8 @@ private: } void SUMB(u32 rt, u32 ra, u32 rb) { - const u128 _a = CPU.GPR[ra]; - const u128 _b = CPU.GPR[rb]; + const v128 _a = CPU.GPR[ra]; + const v128 _b = CPU.GPR[rb]; for (int w = 0; w < 4; w++) { CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3]; @@ -890,7 +890,7 @@ private: } void CNTB(u32 rt, u32 ra) { - const u128 temp = CPU.GPR[ra]; + const v128 temp = CPU.GPR[ra]; CPU.GPR[rt].clear(); for (int b = 0; b < 16; b++) for (int i = 0; i < 8; i++) @@ -1621,7 +1621,7 @@ private: void BRASL(u32 rt, s32 i16) { u32 target = branchTarget(0, i16); - CPU.GPR[rt] = u128::from32r(CPU.PC + 4); + CPU.GPR[rt] = v128::from32r(CPU.PC + 4); LOG5_OPCODE("branch (0x%x)", target); CPU.PC = target - 4; } @@ -1650,7 +1650,7 @@ private: void BRSL(u32 rt, s32 i16) { u32 target = branchTarget(CPU.PC, i16); - CPU.GPR[rt] = u128::from32r(CPU.PC + 4); + CPU.GPR[rt] = v128::from32r(CPU.PC + 4); LOG5_OPCODE("branch (0x%x)", target); CPU.PC = target - 4; } @@ -1873,8 +1873,8 @@ private: } void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc) { - const u128 _a = CPU.GPR[ra]; - const u128 _b = CPU.GPR[rb]; + const v128 _a = CPU.GPR[ra]; + const v128 _b = CPU.GPR[rb]; for (int i = 0; i < 16; i++) { u8 b = CPU.GPR[rc]._u8[i]; diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 3780519975..66b1d22372 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -34,7 +34,7 @@ public: std::array entry = {}; - std::vector imm_table; + std::vector imm_table; SPURecompilerCore(SPUThread& cpu); @@ -105,7 +105,7 @@ public: void XmmInvalidate(const s8 reg); void XmmFinalize(const XmmLink& var, s8 reg = -1); void XmmRelease(); - asmjit::X86Mem XmmConst(u128 data); + asmjit::X86Mem XmmConst(v128 data); private: diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp index 15f2a2c400..c430d1b058 100644 --- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp +++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp @@ -486,7 +486,7 @@ void SPURecompiler::XmmRelease() } } -X86Mem SPURecompiler::XmmConst(u128 data) +X86Mem SPURecompiler::XmmConst(v128 data) { s32 shift = 0; @@ -494,12 +494,12 @@ X86Mem SPURecompiler::XmmConst(u128 data) { if (rec.imm_table[shift] == data) { - return oword_ptr(*imm_var, shift * sizeof(u128)); + return oword_ptr(*imm_var, shift * sizeof(v128)); } } rec.imm_table.push_back(data); - return oword_ptr(*imm_var, shift * sizeof(u128)); + return oword_ptr(*imm_var, shift * sizeof(v128)); } @@ -553,7 +553,7 @@ void SPURecompiler::RDCH(u32 rt, u32 ra) { c.mov(cpu_dword(PC), CPU.PC); WRAPPER_BEGIN(rt, ra, zz); - CPU->GPR[rt] = u128::from32r(CPU->get_ch_value(ra)); + CPU->GPR[rt] = v128::from32r(CPU->get_ch_value(ra)); WRAPPER_END(rt, ra, 0); // TODO } @@ -562,7 +562,7 @@ void SPURecompiler::RCHCNT(u32 rt, u32 ra) { c.mov(cpu_dword(PC), CPU.PC); WRAPPER_BEGIN(rt, ra, zz); - CPU->GPR[rt] = u128::from32r(CPU->get_ch_count(ra)); + CPU->GPR[rt] = v128::from32r(CPU->get_ch_count(ra)); WRAPPER_END(rt, ra, 0); // TODO } @@ -603,7 +603,7 @@ void SPURecompiler::BG(u32 rt, u32 ra, u32 rb) // compare if-greater-than const XmmLink& va = XmmGet(ra, rt); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000))); + c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000))); c.pxor(va.get(), vi.get()); if (const XmmLink* vb = XmmRead(rb)) { @@ -614,7 +614,7 @@ void SPURecompiler::BG(u32 rt, u32 ra, u32 rb) c.pxor(vi.get(), cpu_xmm(GPR[rb])); } c.pcmpgtd(va.get(), vi.get()); - c.paddd(va.get(), XmmConst(u128::from32p(1))); + c.paddd(va.get(), XmmConst(v128::from32p(1))); XmmFinalize(va, rt); XmmFinalize(vi); LOG_OPCODE(); @@ -650,7 +650,7 @@ void SPURecompiler::NOR(u32 rt, u32 ra, u32 rb) c.por(va.get(), cpu_xmm(GPR[rb])); } } - c.pxor(va.get(), XmmConst(u128::from32p(0xffffffff))); + c.pxor(va.get(), XmmConst(v128::from32p(0xffffffff))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -901,7 +901,7 @@ void SPURecompiler::CG(u32 rt, u32 ra, u32 rb) const XmmLink& va = XmmGet(ra, rt); const XmmLink& vb = XmmGet(rb); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000))); + c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000))); c.paddd(vb.get(), va.get()); c.pxor(va.get(), vi.get()); c.pxor(vb.get(), vi.get()); @@ -940,7 +940,7 @@ void SPURecompiler::NAND(u32 rt, u32 ra, u32 rb) { c.pand(va.get(), cpu_xmm(GPR[rb])); } - c.pxor(va.get(), XmmConst(u128::from32p(0xffffffff))); + c.pxor(va.get(), XmmConst(v128::from32p(0xffffffff))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -1178,7 +1178,7 @@ void SPURecompiler::HBR(u32 p, u32 ro, u32 ra) void SPURecompiler::GB(u32 rt, u32 ra) { const XmmLink& va = XmmGet(ra, rt); - c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)))); + c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)))); c.psllq(va.get(), 7); c.pmovmskb(*addr, va.get()); c.pxor(va.get(), va.get()); @@ -1190,7 +1190,7 @@ void SPURecompiler::GB(u32 rt, u32 ra) void SPURecompiler::GBH(u32 rt, u32 ra) { const XmmLink& va = XmmGet(ra, rt); - c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)))); + c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)))); c.psllq(va.get(), 7); c.pmovmskb(*addr, va.get()); c.pxor(va.get(), va.get()); @@ -1254,7 +1254,7 @@ void SPURecompiler::FREST(u32 rt, u32 ra) void SPURecompiler::FRSQEST(u32 rt, u32 ra) { const XmmLink& va = XmmGet(ra, rt); - c.andps(va.get(), XmmConst(u128::from32p(0x7fffffff))); // abs + c.andps(va.get(), XmmConst(v128::from32p(0x7fffffff))); // abs c.rsqrtps(va.get(), va.get()); XmmFinalize(va, rt); LOG_OPCODE(); @@ -1343,7 +1343,7 @@ void SPURecompiler::CBX(u32 rt, u32 ra, u32 rb) c.not_(*addr); c.and_(*addr, 0xf); const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); XmmFinalize(vr, rt); XmmInvalidate(rt); c.mov(byte_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x03); @@ -1368,7 +1368,7 @@ void SPURecompiler::CHX(u32 rt, u32 ra, u32 rb) c.not_(*addr); c.and_(*addr, 0xe); const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); XmmFinalize(vr, rt); XmmInvalidate(rt); c.mov(word_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x0203); @@ -1393,7 +1393,7 @@ void SPURecompiler::CWX(u32 rt, u32 ra, u32 rb) c.not_(*addr); c.and_(*addr, 0xc); const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); XmmFinalize(vr, rt); XmmInvalidate(rt); c.mov(dword_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x00010203); @@ -1419,10 +1419,10 @@ void SPURecompiler::CDX(u32 rt, u32 ra, u32 rb) const XmmLink& vr = XmmAlloc(rt); Label p1(c), p2(c); c.jnz(p1); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f)))); c.jmp(p2); c.bind(p1); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607)))); c.bind(p2); XmmFinalize(vr, rt); LOG_OPCODE(); @@ -1527,7 +1527,7 @@ void SPURecompiler::CBD(u32 rt, u32 ra, s32 i7) { // assuming that SP % 16 is always zero const XmmLink& vr = XmmAlloc(rt); - u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); + v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); value.u8r[i7 & 0xf] = 0x03; c.movdqa(vr.get(), XmmConst(value)); XmmFinalize(vr, rt); @@ -1539,7 +1539,7 @@ void SPURecompiler::CBD(u32 rt, u32 ra, s32 i7) c.not_(*addr); c.and_(*addr, 0xf); const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); XmmFinalize(vr, rt); XmmInvalidate(rt); c.mov(byte_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x03); @@ -1553,7 +1553,7 @@ void SPURecompiler::CHD(u32 rt, u32 ra, s32 i7) { // assuming that SP % 16 is always zero const XmmLink& vr = XmmAlloc(rt); - u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); + v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); value.u16r[(i7 >> 1) & 0x7] = 0x0203; c.movdqa(vr.get(), XmmConst(value)); XmmFinalize(vr, rt); @@ -1565,7 +1565,7 @@ void SPURecompiler::CHD(u32 rt, u32 ra, s32 i7) c.not_(*addr); c.and_(*addr, 0xe); const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); XmmFinalize(vr, rt); XmmInvalidate(rt); c.mov(word_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x0203); @@ -1579,7 +1579,7 @@ void SPURecompiler::CWD(u32 rt, u32 ra, s32 i7) { // assuming that SP % 16 is always zero const XmmLink& vr = XmmAlloc(rt); - u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); + v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); value.u32r[(i7 >> 2) & 0x3] = 0x00010203; c.movdqa(vr.get(), XmmConst(value)); XmmFinalize(vr, rt); @@ -1591,7 +1591,7 @@ void SPURecompiler::CWD(u32 rt, u32 ra, s32 i7) c.not_(*addr); c.and_(*addr, 0xc); const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)))); XmmFinalize(vr, rt); XmmInvalidate(rt); c.mov(dword_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x00010203); @@ -1605,7 +1605,7 @@ void SPURecompiler::CDD(u32 rt, u32 ra, s32 i7) { // assuming that SP % 16 is always zero const XmmLink& vr = XmmAlloc(rt); - u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); + v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)); value.u64r[(i7 >> 3) & 0x1] = 0x0001020304050607ull; c.movdqa(vr.get(), XmmConst(value)); XmmFinalize(vr, rt); @@ -1618,10 +1618,10 @@ void SPURecompiler::CDD(u32 rt, u32 ra, s32 i7) const XmmLink& vr = XmmAlloc(rt); Label p1(c), p2(c); c.jnz(p1); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f)))); c.jmp(p2); c.bind(p1); - c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607)))); + c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607)))); c.bind(p2); XmmFinalize(vr, rt); } @@ -1746,7 +1746,7 @@ void SPURecompiler::CGTH(u32 rt, u32 ra, u32 rb) void SPURecompiler::EQV(u32 rt, u32 ra, u32 rb) { const XmmLink& vb = XmmGet(rb, rt); - c.pxor(vb.get(), XmmConst(u128::from32p(0xffffffff))); + c.pxor(vb.get(), XmmConst(v128::from32p(0xffffffff))); if (const XmmLink* va = XmmRead(ra)) { c.pxor(vb.get(), va->read()); @@ -1779,11 +1779,11 @@ void SPURecompiler::SUMB(u32 rt, u32 ra, u32 rb) const XmmLink& va = XmmGet(ra, rt); const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from8p(1))); + c.movdqa(vi.get(), XmmConst(v128::from8p(1))); c.pmaddubsw(va.get(), vi.get()); c.pmaddubsw(vb.get(), vi.get()); c.phaddw(va.get(), vb.get()); - c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0)))); + c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0)))); XmmFinalize(va, rt); XmmFinalize(vb); XmmFinalize(vi); @@ -1842,12 +1842,12 @@ void SPURecompiler::CNTB(u32 rt, u32 ra) const XmmLink& v1 = XmmCopy(va); const XmmLink& vm = XmmAlloc(); c.psrlq(v1.get(), 4); - c.movdqa(vm.get(), XmmConst(u128::from8p(0xf))); + c.movdqa(vm.get(), XmmConst(v128::from8p(0xf))); c.pand(va.get(), vm.get()); c.pand(v1.get(), vm.get()); - c.movdqa(vm.get(), XmmConst(u128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)))); + c.movdqa(vm.get(), XmmConst(v128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)))); c.pshufb(vm.get(), va.get()); - c.movdqa(va.get(), XmmConst(u128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)))); + c.movdqa(va.get(), XmmConst(v128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)))); c.pshufb(va.get(), v1.get()); c.paddb(va.get(), vm.get()); XmmFinalize(va, rt); @@ -1870,7 +1870,7 @@ void SPURecompiler::CLGT(u32 rt, u32 ra, u32 rb) // compare if-greater-than const XmmLink& va = XmmGet(ra, rt); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000))); + c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000))); c.pxor(va.get(), vi.get()); if (const XmmLink* vb = XmmRead(rb)) { @@ -1973,7 +1973,7 @@ void SPURecompiler::CLGTH(u32 rt, u32 ra, u32 rb) // compare if-greater-than const XmmLink& va = XmmGet(ra, rt); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from16p(0x8000))); + c.movdqa(vi.get(), XmmConst(v128::from16p(0x8000))); c.pxor(va.get(), vi.get()); if (const XmmLink* vb = XmmRead(rb)) { @@ -1992,7 +1992,7 @@ void SPURecompiler::CLGTH(u32 rt, u32 ra, u32 rb) void SPURecompiler::ORC(u32 rt, u32 ra, u32 rb) { const XmmLink& vb = XmmGet(rb, rt); - c.pxor(vb.get(), XmmConst(u128::from32p(0xffffffff))); + c.pxor(vb.get(), XmmConst(v128::from32p(0xffffffff))); if (const XmmLink* va = XmmRead(ra)) { c.por(vb.get(), va->read()); @@ -2010,7 +2010,7 @@ void SPURecompiler::FCMGT(u32 rt, u32 ra, u32 rb) // reverted less-than const XmmLink& vb = XmmGet(rb, rt); const XmmLink& vi = XmmAlloc(); - c.movaps(vi.get(), XmmConst(u128::from32p(0x7fffffff))); + c.movaps(vi.get(), XmmConst(v128::from32p(0x7fffffff))); c.andps(vb.get(), vi.get()); // abs if (const XmmLink* va = XmmRead(ra)) { @@ -2081,7 +2081,7 @@ void SPURecompiler::CLGTB(u32 rt, u32 ra, u32 rb) // compare if-greater-than const XmmLink& va = XmmGet(ra, rt); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from8p(0x80))); + c.movdqa(vi.get(), XmmConst(v128::from8p(0x80))); c.pxor(va.get(), vi.get()); if (const XmmLink* vb = XmmRead(rb)) { @@ -2177,7 +2177,7 @@ void SPURecompiler::MPYHHU(u32 rt, u32 ra, u32 rb) const XmmLink& va2 = XmmCopy(va); c.pmulhuw(va.get(), vb.get()); c.pmullw(va2.get(), vb.get()); - c.pand(va.get(), XmmConst(u128::from32p(0xffff0000))); + c.pand(va.get(), XmmConst(v128::from32p(0xffff0000))); c.psrld(va2.get(), 16); c.por(va.get(), va2.get()); XmmFinalize(va, rt); @@ -2189,7 +2189,7 @@ void SPURecompiler::MPYHHU(u32 rt, u32 ra, u32 rb) void SPURecompiler::ADDX(u32 rt, u32 ra, u32 rb) { const XmmLink& vt = XmmGet(rt); - c.pand(vt.get(), XmmConst(u128::from32p(1))); + c.pand(vt.get(), XmmConst(v128::from32p(1))); c.paddd(vt.get(), cpu_xmm(GPR[ra])); c.paddd(vt.get(), cpu_xmm(GPR[rb])); XmmFinalize(vt, rt); @@ -2200,7 +2200,7 @@ void SPURecompiler::SFX(u32 rt, u32 ra, u32 rb) { const XmmLink& vt = XmmGet(rt); const XmmLink& vb = XmmGet(rb, rt); - c.pandn(vt.get(), XmmConst(u128::from32p(1))); + c.pandn(vt.get(), XmmConst(v128::from32p(1))); c.psubd(vb.get(), cpu_xmm(GPR[ra])); c.psubd(vb.get(), vt.get()); XmmFinalize(vb, rt); @@ -2252,7 +2252,7 @@ void SPURecompiler::MPYHHAU(u32 rt, u32 ra, u32 rb) const XmmLink& va2 = XmmCopy(va); c.pmulhuw(va.get(), vb.get()); c.pmullw(va2.get(), vb.get()); - c.pand(va.get(), XmmConst(u128::from32p(0xffff0000))); + c.pand(va.get(), XmmConst(v128::from32p(0xffff0000))); c.psrld(va2.get(), 16); c.paddd(vt.get(), va.get()); c.paddd(vt.get(), va2.get()); @@ -2327,7 +2327,7 @@ void SPURecompiler::MPY(u32 rt, u32 ra, u32 rb) const XmmLink& va = XmmGet(ra, rt); const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from32p(0xffff))); + c.movdqa(vi.get(), XmmConst(v128::from32p(0xffff))); c.pand(va.get(), vi.get()); c.pand(vb.get(), vi.get()); c.pmaddwd(va.get(), vb.get()); @@ -2392,7 +2392,7 @@ void SPURecompiler::FCMEQ(u32 rt, u32 ra, u32 rb) { const XmmLink& vb = XmmGet(rb, rt); const XmmLink& vi = XmmAlloc(); - c.movaps(vi.get(), XmmConst(u128::from32p(0x7fffffff))); + c.movaps(vi.get(), XmmConst(v128::from32p(0x7fffffff))); c.andps(vb.get(), vi.get()); // abs if (const XmmLink* va = XmmRead(ra)) { @@ -2421,7 +2421,7 @@ void SPURecompiler::MPYU(u32 rt, u32 ra, u32 rb) c.pmulhuw(va.get(), vb.get()); c.pmullw(va2.get(), vb.get()); c.pslld(va.get(), 16); - c.pand(va2.get(), XmmConst(u128::from32p(0xffff))); + c.pand(va2.get(), XmmConst(v128::from32p(0xffff))); c.por(va.get(), va2.get()); XmmFinalize(va, rt); XmmFinalize(vb); @@ -2468,10 +2468,10 @@ void SPURecompiler::CFLTS(u32 rt, u32 ra, s32 i8) const XmmLink& va = XmmGet(ra, rt); if (i8 != 173) { - c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast(173 - (i8 & 0xff))))))); // scale + c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast(173 - (i8 & 0xff))))))); // scale } const XmmLink& vi = XmmAlloc(); - c.movaps(vi.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31))))); + c.movaps(vi.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31))))); c.cmpps(vi.get(), va.get(), 2); c.cvttps2dq(va.get(), va.get()); // convert to ints with truncation c.pxor(va.get(), vi.get()); // fix result saturation (0x80000000 -> 0x7fffffff) @@ -2485,18 +2485,18 @@ void SPURecompiler::CFLTU(u32 rt, u32 ra, s32 i8) const XmmLink& va = XmmGet(ra, rt); if (i8 != 173) { - c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast(173 - (i8 & 0xff))))))); // scale + c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast(173 - (i8 & 0xff))))))); // scale } c.maxps(va.get(), XmmConst({})); // saturate const XmmLink& vs = XmmCopy(va); // copy scaled value const XmmLink& vs2 = XmmCopy(va); const XmmLink& vs3 = XmmAlloc(); - c.movaps(vs3.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31))))); + c.movaps(vs3.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31))))); c.subps(vs2.get(), vs3.get()); c.cmpps(vs3.get(), vs.get(), 2); c.andps(vs2.get(), vs3.get()); c.cvttps2dq(va.get(), va.get()); - c.cmpps(vs.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(32)))), 5); + c.cmpps(vs.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(32)))), 5); c.cvttps2dq(vs2.get(), vs2.get()); c.por(va.get(), vs.get()); c.por(va.get(), vs2.get()); @@ -2513,7 +2513,7 @@ void SPURecompiler::CSFLT(u32 rt, u32 ra, s32 i8) c.cvtdq2ps(va.get(), va.get()); // convert to floats if (i8 != 155) { - c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast((i8 & 0xff) - 155)))))); // scale + c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast((i8 & 0xff) - 155)))))); // scale } XmmFinalize(va, rt); LOG_OPCODE(); @@ -2523,14 +2523,14 @@ void SPURecompiler::CUFLT(u32 rt, u32 ra, s32 i8) { const XmmLink& va = XmmGet(ra, rt); const XmmLink& v1 = XmmCopy(va); - c.pand(va.get(), XmmConst(u128::from32p(0x7fffffff))); + c.pand(va.get(), XmmConst(v128::from32p(0x7fffffff))); c.cvtdq2ps(va.get(), va.get()); // convert to floats c.psrad(v1.get(), 31); // generate mask from sign bit - c.andps(v1.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31))))); // generate correction component + c.andps(v1.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31))))); // generate correction component c.addps(va.get(), v1.get()); // add correction component if (i8 != 155) { - c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast((i8 & 0xff) - 155)))))); // scale + c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast((i8 & 0xff) - 155)))))); // scale } XmmFinalize(va, rt); XmmFinalize(v1); @@ -2746,7 +2746,7 @@ void SPURecompiler::IL(u32 rt, s32 i16) } else { - c.movdqa(vr.get(), XmmConst(u128::from32p(i16))); + c.movdqa(vr.get(), XmmConst(v128::from32p(i16))); } XmmFinalize(vr, rt); LOG_OPCODE(); @@ -2755,7 +2755,7 @@ void SPURecompiler::IL(u32 rt, s32 i16) void SPURecompiler::ILHU(u32 rt, s32 i16) { const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::from32p(i16 << 16))); + c.movdqa(vr.get(), XmmConst(v128::from32p(i16 << 16))); XmmFinalize(vr, rt); LOG_OPCODE(); } @@ -2763,7 +2763,7 @@ void SPURecompiler::ILHU(u32 rt, s32 i16) void SPURecompiler::ILH(u32 rt, s32 i16) { const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::from32p(i16))); + c.movdqa(vr.get(), XmmConst(v128::from32p(i16))); XmmFinalize(vr, rt); LOG_OPCODE(); } @@ -2771,7 +2771,7 @@ void SPURecompiler::ILH(u32 rt, s32 i16) void SPURecompiler::IOHL(u32 rt, s32 i16) { const XmmLink& vt = XmmGet(rt, rt); - c.por(vt.get(), XmmConst(u128::from32p(i16 & 0xffff))); + c.por(vt.get(), XmmConst(v128::from32p(i16 & 0xffff))); XmmFinalize(vt, rt); LOG_OPCODE(); } @@ -2798,7 +2798,7 @@ void SPURecompiler::ORI(u32 rt, u32 ra, s32 i10) else { const XmmLink& va = XmmGet(ra, rt); - c.por(va.get(), XmmConst(u128::from32p(i10))); + c.por(va.get(), XmmConst(v128::from32p(i10))); XmmFinalize(va, rt); } LOG_OPCODE(); @@ -2807,7 +2807,7 @@ void SPURecompiler::ORI(u32 rt, u32 ra, s32 i10) void SPURecompiler::ORHI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra, rt); - c.por(va.get(), XmmConst(u128::from16p(i10))); + c.por(va.get(), XmmConst(v128::from16p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2815,7 +2815,7 @@ void SPURecompiler::ORHI(u32 rt, u32 ra, s32 i10) void SPURecompiler::ORBI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra, rt); - c.por(va.get(), XmmConst(u128::from8p(i10))); + c.por(va.get(), XmmConst(v128::from8p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2841,7 +2841,7 @@ void SPURecompiler::SFI(u32 rt, u32 ra, s32 i10) else { const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::from32p(i10))); + c.movdqa(vr.get(), XmmConst(v128::from32p(i10))); c.psubd(vr.get(), cpu_xmm(GPR[ra])); XmmFinalize(vr, rt); } @@ -2869,7 +2869,7 @@ void SPURecompiler::SFHI(u32 rt, u32 ra, s32 i10) else { const XmmLink& vr = XmmAlloc(rt); - c.movdqa(vr.get(), XmmConst(u128::from16p(i10))); + c.movdqa(vr.get(), XmmConst(v128::from16p(i10))); c.psubw(vr.get(), cpu_xmm(GPR[ra])); XmmFinalize(vr, rt); } @@ -2879,7 +2879,7 @@ void SPURecompiler::SFHI(u32 rt, u32 ra, s32 i10) void SPURecompiler::ANDI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra, rt); - c.pand(va.get(), XmmConst(u128::from32p(i10))); + c.pand(va.get(), XmmConst(v128::from32p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2887,7 +2887,7 @@ void SPURecompiler::ANDI(u32 rt, u32 ra, s32 i10) void SPURecompiler::ANDHI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra, rt); - c.pand(va.get(), XmmConst(u128::from16p(i10))); + c.pand(va.get(), XmmConst(v128::from16p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2895,7 +2895,7 @@ void SPURecompiler::ANDHI(u32 rt, u32 ra, s32 i10) void SPURecompiler::ANDBI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra, rt); - c.pand(va.get(), XmmConst(u128::from8p(i10))); + c.pand(va.get(), XmmConst(v128::from8p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2904,7 +2904,7 @@ void SPURecompiler::AI(u32 rt, u32 ra, s32 i10) { // add const XmmLink& va = XmmGet(ra, rt); - c.paddd(va.get(), XmmConst(u128::from32p(i10))); + c.paddd(va.get(), XmmConst(v128::from32p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2913,7 +2913,7 @@ void SPURecompiler::AHI(u32 rt, u32 ra, s32 i10) { // add const XmmLink& va = XmmGet(ra, rt); - c.paddw(va.get(), XmmConst(u128::from16p(i10))); + c.paddw(va.get(), XmmConst(v128::from16p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2965,7 +2965,7 @@ void SPURecompiler::LQD(u32 rt, s32 i10, u32 ra) // i10 is shifted left by 4 whi void SPURecompiler::XORI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pxor(va.get(), XmmConst(u128::from32p(i10))); + c.pxor(va.get(), XmmConst(v128::from32p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2973,7 +2973,7 @@ void SPURecompiler::XORI(u32 rt, u32 ra, s32 i10) void SPURecompiler::XORHI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pxor(va.get(), XmmConst(u128::from16p(i10))); + c.pxor(va.get(), XmmConst(v128::from16p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2981,7 +2981,7 @@ void SPURecompiler::XORHI(u32 rt, u32 ra, s32 i10) void SPURecompiler::XORBI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pxor(va.get(), XmmConst(u128::from8p(i10))); + c.pxor(va.get(), XmmConst(v128::from8p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2989,7 +2989,7 @@ void SPURecompiler::XORBI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CGTI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pcmpgtd(va.get(), XmmConst(u128::from32p(i10))); + c.pcmpgtd(va.get(), XmmConst(v128::from32p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -2997,7 +2997,7 @@ void SPURecompiler::CGTI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CGTHI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pcmpgtw(va.get(), XmmConst(u128::from16p(i10))); + c.pcmpgtw(va.get(), XmmConst(v128::from16p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3005,7 +3005,7 @@ void SPURecompiler::CGTHI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CGTBI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pcmpgtb(va.get(), XmmConst(u128::from8p(i10))); + c.pcmpgtb(va.get(), XmmConst(v128::from8p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3025,8 +3025,8 @@ void SPURecompiler::HGTI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CLGTI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pxor(va.get(), XmmConst(u128::from32p(0x80000000))); - c.pcmpgtd(va.get(), XmmConst(u128::from32p((u32)i10 - 0x80000000))); + c.pxor(va.get(), XmmConst(v128::from32p(0x80000000))); + c.pcmpgtd(va.get(), XmmConst(v128::from32p((u32)i10 - 0x80000000))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3034,8 +3034,8 @@ void SPURecompiler::CLGTI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CLGTHI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pxor(va.get(), XmmConst(u128::from16p(0x8000))); - c.pcmpgtw(va.get(), XmmConst(u128::from16p((u16)i10 - 0x8000))); + c.pxor(va.get(), XmmConst(v128::from16p(0x8000))); + c.pcmpgtw(va.get(), XmmConst(v128::from16p((u16)i10 - 0x8000))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3043,8 +3043,8 @@ void SPURecompiler::CLGTHI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CLGTBI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.psubb(va.get(), XmmConst(u128::from8p(0x80))); - c.pcmpgtb(va.get(), XmmConst(u128::from8p((s8)i10 - 0x80))); + c.psubb(va.get(), XmmConst(v128::from8p(0x80))); + c.pcmpgtb(va.get(), XmmConst(v128::from8p((s8)i10 - 0x80))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3064,7 +3064,7 @@ void SPURecompiler::HLGTI(u32 rt, u32 ra, s32 i10) void SPURecompiler::MPYI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra, rt); - c.pmaddwd(va.get(), XmmConst(u128::from32p(i10 & 0xffff))); + c.pmaddwd(va.get(), XmmConst(v128::from32p(i10 & 0xffff))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3074,7 +3074,7 @@ void SPURecompiler::MPYUI(u32 rt, u32 ra, s32 i10) const XmmLink& va = XmmGet(ra, rt); const XmmLink& vi = XmmAlloc(); const XmmLink& va2 = XmmCopy(va); - c.movdqa(vi.get(), XmmConst(u128::from32p(i10 & 0xffff))); + c.movdqa(vi.get(), XmmConst(v128::from32p(i10 & 0xffff))); c.pmulhuw(va.get(), vi.get()); c.pmullw(va2.get(), vi.get()); c.pslld(va.get(), 16); @@ -3088,7 +3088,7 @@ void SPURecompiler::MPYUI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CEQI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pcmpeqd(va.get(), XmmConst(u128::from32p(i10))); + c.pcmpeqd(va.get(), XmmConst(v128::from32p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3096,7 +3096,7 @@ void SPURecompiler::CEQI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CEQHI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pcmpeqw(va.get(), XmmConst(u128::from16p(i10))); + c.pcmpeqw(va.get(), XmmConst(v128::from16p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3104,7 +3104,7 @@ void SPURecompiler::CEQHI(u32 rt, u32 ra, s32 i10) void SPURecompiler::CEQBI(u32 rt, u32 ra, s32 i10) { const XmmLink& va = XmmGet(ra); - c.pcmpeqb(va.get(), XmmConst(u128::from8p(i10))); + c.pcmpeqb(va.get(), XmmConst(v128::from8p(i10))); XmmFinalize(va, rt); LOG_OPCODE(); } @@ -3141,7 +3141,7 @@ void SPURecompiler::ILA(u32 rt, u32 i18) } else { - c.movdqa(vr.get(), XmmConst(u128::from32p(i18 & 0x3ffff))); + c.movdqa(vr.get(), XmmConst(v128::from32p(i18 & 0x3ffff))); } XmmFinalize(vr, rt); LOG_OPCODE(); @@ -3168,11 +3168,11 @@ void SPURecompiler::SHUFB(u32 rt, u32 ra, u32 rb, u32 rc) const XmmLink& v4 = XmmAlloc(); const XmmLink& vFF = XmmAlloc(rt); // generate specific values: - c.movdqa(v1.get(), XmmConst(u128::from8p(0xe0))); // v1 = 11100000 - c.movdqa(v3.get(), XmmConst(u128::from8p(0x80))); // v3 = 10000000 + c.movdqa(v1.get(), XmmConst(v128::from8p(0xe0))); // v1 = 11100000 + c.movdqa(v3.get(), XmmConst(v128::from8p(0x80))); // v3 = 10000000 c.pand(v2.get(), v1.get()); // filter mask v2 = mask & 11100000 c.movdqa(vFF.get(), v2.get()); // and copy vFF = mask & 11100000 - c.movdqa(v4.get(), XmmConst(u128::from8p(0xc0))); // v4 = 11000000 + c.movdqa(v4.get(), XmmConst(v128::from8p(0xc0))); // v4 = 11000000 c.pcmpeqb(vFF.get(), v4.get()); // gen 0xff vFF = (mask & 11100000 == 11000000) ? 0xff : 0 c.movdqa(v4.get(), v2.get()); // copy again v4 = mask & 11100000 c.pand(v4.get(), v3.get()); // filter mask v4 = mask & 10000000 @@ -3182,13 +3182,13 @@ void SPURecompiler::SHUFB(u32 rt, u32 ra, u32 rb, u32 rc) c.por(vFF.get(), v2.get()); // merge 0xff, 0x80 vFF = (mask & 11100000 == 11000000) ? 0xff : (mask & 11100000 == 11100000) ? 0x80 : 0 c.pandn(v1.get(), v0.get()); // filter mask v1 = mask & 00011111 // select bytes from [rb]: - c.movdqa(v2.get(), XmmConst(u128::from8p(0x0f))); // v2 = 00001111 - c.pxor(v1.get(), XmmConst(u128::from8p(0x10))); // v1 = (mask & 00011111) ^ 00010000 + c.movdqa(v2.get(), XmmConst(v128::from8p(0x0f))); // v2 = 00001111 + c.pxor(v1.get(), XmmConst(v128::from8p(0x10))); // v1 = (mask & 00011111) ^ 00010000 c.psubb(v2.get(), v1.get()); // v2 = 00001111 - ((mask & 00011111) ^ 00010000) c.movdqa(v1.get(), cpu_xmm(GPR[rb])); // v1 = rb c.pshufb(v1.get(), v2.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000)) // select bytes from [ra]: - c.pxor(v2.get(), XmmConst(u128::from8p(0xf0))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000 + c.pxor(v2.get(), XmmConst(v128::from8p(0xf0))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000 c.movdqa(v3.get(), cpu_xmm(GPR[ra])); // v3 = ra c.pshufb(v3.get(), v2.get()); // v3 = select(ra, (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000) c.por(v1.get(), v3.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000)) | (v3) @@ -3208,7 +3208,7 @@ void SPURecompiler::MPYA(u32 rt, u32 ra, u32 rb, u32 rc) const XmmLink& va = XmmGet(ra, rt); const XmmLink& vb = XmmGet(rb); const XmmLink& vi = XmmAlloc(); - c.movdqa(vi.get(), XmmConst(u128::from32p(0xffff))); + c.movdqa(vi.get(), XmmConst(v128::from32p(0xffff))); c.pand(va.get(), vi.get()); c.pand(vb.get(), vi.get()); c.pmaddwd(va.get(), vb.get()); diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index dd849954a7..825a0aa040 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -360,13 +360,13 @@ struct spu_int_ctrl_t struct g_spu_imm_table_t { - u128 fsmb[65536]; // table for FSMB, FSMBI instructions - u128 fsmh[256]; // table for FSMH instruction - u128 fsm[16]; // table for FSM instruction + v128 fsmb[65536]; // table for FSMB, FSMBI instructions + v128 fsmh[256]; // table for FSMH instruction + v128 fsm[16]; // table for FSM instruction - u128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions - u128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions - u128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions + v128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions + v128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions + v128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions class scale_table_t { @@ -475,7 +475,7 @@ public: memset(this, 0, sizeof(*this)); } //slice -> 0 - 1 (double-precision slice index) - //NOTE: slices follow u128 indexing, i.e. slice 0 is RIGHT end of register! + //NOTE: slices follow v128 indexing, i.e. slice 0 is RIGHT end of register! //roundTo -> FPSCR_RN_* void setSliceRounding(u8 slice, u8 roundTo) { @@ -523,7 +523,7 @@ public: } // Write the FPSCR - void Write(const u128 & r) + void Write(const v128 & r) { _u32[3] = r._u32[3] & 0x00000F07; _u32[2] = r._u32[2] & 0x00003F07; @@ -532,7 +532,7 @@ public: } // Read the FPSCR - void Read(u128 & r) + void Read(v128 & r) { r._u32[3] = _u32[3]; r._u32[2] = _u32[2]; @@ -544,7 +544,7 @@ public: class SPUThread : public CPUThread { public: - u128 GPR[128]; // General-Purpose Registers + v128 GPR[128]; // General-Purpose Registers SPU_FPSCR FPSCR; std::unordered_map> m_addr_to_hle_function_map; @@ -643,18 +643,18 @@ public: u16 read16(u32 lsa) const { return vm::ps3::read16(lsa + offset); } u32 read32(u32 lsa) const { return vm::ps3::read32(lsa + offset); } u64 read64(u32 lsa) const { return vm::ps3::read64(lsa + offset); } - u128 read128(u32 lsa) const { return vm::ps3::read128(lsa + offset); } + v128 read128(u32 lsa) const { return vm::ps3::read128(lsa + offset); } void write8(u32 lsa, u8 data) const { vm::write8(lsa + offset, data); } void write16(u32 lsa, u16 data) const { vm::ps3::write16(lsa + offset, data); } void write32(u32 lsa, u32 data) const { vm::ps3::write32(lsa + offset, data); } void write64(u32 lsa, u64 data) const { vm::ps3::write64(lsa + offset, data); } - void write128(u32 lsa, u128 data) const { vm::ps3::write128(lsa + offset, data); } + void write128(u32 lsa, v128 data) const { vm::ps3::write128(lsa + offset, data); } void write16(u32 lsa, be_t data) const { vm::ps3::write16(lsa + offset, data); } void write32(u32 lsa, be_t data) const { vm::ps3::write32(lsa + offset, data); } void write64(u32 lsa, be_t data) const { vm::ps3::write64(lsa + offset, data); } - void write128(u32 lsa, be_t data) const { vm::ps3::write128(lsa + offset, data); } + void write128(u32 lsa, be_t data) const { vm::ps3::write128(lsa + offset, data); } void RegisterHleFunction(u32 addr, std::function function) { diff --git a/rpcs3/Emu/Memory/atomic.h b/rpcs3/Emu/Memory/atomic.h index 181398d0f6..7e2634e908 100644 --- a/rpcs3/Emu/Memory/atomic.h +++ b/rpcs3/Emu/Memory/atomic.h @@ -27,7 +27,7 @@ template struct _to_atomic_subtype template struct _to_atomic_subtype { - using type = u128; + using type = v128; }; template using atomic_subtype_t = typename _to_atomic_subtype::type; @@ -127,7 +127,7 @@ private: data = value; } - force_inline static void write_relaxed(volatile u128& data, const u128& value) + force_inline static void write_relaxed(volatile v128& data, const v128& value) { sync_lock_test_and_set(&data, value); } @@ -137,9 +137,9 @@ private: return data; } - force_inline static u128 read_relaxed(const volatile u128& value) + force_inline static v128 read_relaxed(const volatile v128& value) { - return sync_val_compare_and_swap(const_cast(&value), {}, {}); + return sync_val_compare_and_swap(const_cast(&value), {}, {}); } public: diff --git a/rpcs3/Emu/Memory/vm.h b/rpcs3/Emu/Memory/vm.h index 7b4fd197ed..f2ee27ae74 100644 --- a/rpcs3/Emu/Memory/vm.h +++ b/rpcs3/Emu/Memory/vm.h @@ -339,14 +339,14 @@ namespace vm get_ref>(addr) = value; } - inline const be_t& read128(u32 addr) + inline const be_t& read128(u32 addr) { - return get_ref>(addr); + return get_ref>(addr); } - inline void write128(u32 addr, be_t value) + inline void write128(u32 addr, be_t value) { - get_ref>(addr) = value; + get_ref>(addr) = value; } } @@ -384,14 +384,14 @@ namespace vm get_ref>(addr) = value; } - inline const le_t& read128(u32 addr) + inline const le_t& read128(u32 addr) { - return get_ref>(addr); + return get_ref>(addr); } - inline void write128(u32 addr, le_t value) + inline void write128(u32 addr, le_t value) { - get_ref>(addr) = value; + get_ref>(addr) = value; } } diff --git a/rpcs3/Emu/SysCalls/CB_FUNC.h b/rpcs3/Emu/SysCalls/CB_FUNC.h index 9ded6a6b9b..9c362d5776 100644 --- a/rpcs3/Emu/SysCalls/CB_FUNC.h +++ b/rpcs3/Emu/SysCalls/CB_FUNC.h @@ -47,7 +47,7 @@ namespace cb_detail template struct _func_arg { - static_assert(std::is_same, u128>::value, "Invalid callback argument type for ARG_VECTOR"); + static_assert(std::is_same, v128>::value, "Invalid callback argument type for ARG_VECTOR"); force_inline static void set_value(PPUThread& CPU, const T& arg) { @@ -91,7 +91,7 @@ namespace cb_detail force_inline static bool _bind_func_args(PPUThread& CPU, T1 arg1, T... args) { const bool is_float = std::is_floating_point::value; - const bool is_vector = std::is_same, u128>::value; + const bool is_vector = std::is_same, v128>::value; const bool is_context = std::is_same::value; const bool is_general = !is_float && !is_vector && !is_context; @@ -138,7 +138,7 @@ namespace cb_detail template struct _func_res { - static_assert(std::is_same, u128>::value, "Invalid callback result type for ARG_VECTOR"); + static_assert(std::is_same, v128>::value, "Invalid callback result type for ARG_VECTOR"); force_inline static T get_value(const PPUThread& CPU) { @@ -156,7 +156,7 @@ namespace cb_detail static_assert(!std::is_pointer::value, "Invalid callback result type (pointer)"); static_assert(!std::is_reference::value, "Invalid callback result type (reference)"); const bool is_float = std::is_floating_point::value; - const bool is_vector = std::is_same, u128>::value; + const bool is_vector = std::is_same, v128>::value; const _func_arg_type t = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL); return _func_res::get_value(CPU); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 619663c259..9de9d27184 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -3536,7 +3536,7 @@ s32 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: alloc_ls_blocks = size > 0x3D400 ? 0x7A : ((size - 0x400) >> 11); if (ls_pattern) { - u128 ls_pattern_128 = u128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]); + v128 ls_pattern_128 = v128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]); u32 ls_blocks = 0; for (auto i = 0; i < 128; i++) { @@ -3551,8 +3551,8 @@ s32 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm: return CELL_SPURS_TASK_ERROR_INVAL; } - u128 _0 = u128::from32(0); - if ((ls_pattern_128 & u128::from32r(0xFC000000)) != _0) + v128 _0 = v128::from32(0); + if ((ls_pattern_128 & v128::from32r(0xFC000000)) != _0) { // Prevent save/restore to SPURS management area return CELL_SPURS_TASK_ERROR_INVAL; @@ -3666,7 +3666,7 @@ s32 _cellSpursSendSignal(PPUThread& ppu, vm::ptr taskset, u32 return CELL_SPURS_TASK_ERROR_INVAL; } - be_t _0(u128::from32(0)); + be_t _0(v128::from32(0)); bool disabled = taskset->enabled.value()._bit[taskId]; auto invalid = (taskset->ready & taskset->pending_ready) != _0 || (taskset->running & taskset->waiting) != _0 || disabled || ((taskset->running | taskset->ready | taskset->pending_ready | taskset->waiting | taskset->signalled) & ~taskset->enabled) != _0; @@ -3676,7 +3676,7 @@ s32 _cellSpursSendSignal(PPUThread& ppu, vm::ptr taskset, u32 return CELL_SPURS_TASK_ERROR_SRCH; } - auto shouldSignal = ((taskset->waiting & ~taskset->signalled) & be_t(u128::fromBit(taskId))) != _0 ? true : false; + auto shouldSignal = ((taskset->waiting & ~taskset->signalled) & be_t(v128::fromBit(taskId))) != _0 ? true : false; auto signalled = taskset->signalled.value(); signalled._bit[taskId] = true; taskset->signalled = signalled; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index f4cc4174d6..168bbedd5c 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -746,12 +746,12 @@ struct set_alignment(128) CellSpursTaskset CHECK_SIZE(TaskInfo, 48); - be_t running; // 0x00 - be_t ready; // 0x10 - be_t pending_ready; // 0x20 - be_t enabled; // 0x30 - be_t signalled; // 0x40 - be_t waiting; // 0x50 + be_t running; // 0x00 + be_t ready; // 0x10 + be_t pending_ready; // 0x20 + be_t enabled; // 0x30 + be_t signalled; // 0x40 + be_t waiting; // 0x50 vm::bptr spurs; // 0x60 be_t args; // 0x68 u8 enable_clear_ls; // 0x70 @@ -806,7 +806,7 @@ struct set_alignment(128) CellSpursTaskset2 u32 event_flag_id1; // 0x1898 u32 event_flag_id2; // 0x189C u8 unk3[0x1980 - 0x18A0]; // 0x18A0 - be_t task_exit_code[128]; // 0x1980 + be_t task_exit_code[128]; // 0x1980 u8 unk4[0x2900 - 0x2180]; // 0x2180 }; @@ -894,10 +894,10 @@ struct SpursTasksetContext u8 x27D8[0x2840 - 0x27D8]; // 0x27D8 u8 moduleId[16]; // 0x2840 u8 stackArea[0x2C80 - 0x2850]; // 0x2850 - be_t savedContextLr; // 0x2C80 - be_t savedContextSp; // 0x2C90 - be_t savedContextR80ToR127[48]; // 0x2CA0 - be_t savedContextFpscr; // 0x2FA0 + be_t savedContextLr; // 0x2C80 + be_t savedContextSp; // 0x2C90 + be_t savedContextR80ToR127[48]; // 0x2CA0 + be_t savedContextFpscr; // 0x2FA0 be_t savedWriteTagGroupQueryMask; // 0x2FB0 be_t savedSpuWriteEventMask; // 0x2FB4 be_t tasksetMgmtAddr; // 0x2FB8 diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index c02b06e2d9..a2ae426665 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -1162,7 +1162,7 @@ void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) { auto taskset = vm::get_ptr(spu.offset + 0x2700); spu.GPR[2].clear(); - spu.GPR[3] = u128::from64r(taskArgs._u64[0], taskArgs._u64[1]); + spu.GPR[3] = v128::from64r(taskArgs._u64[0], taskArgs._u64[1]); spu.GPR[4]._u64[1] = taskset->args; spu.GPR[4]._u64[0] = taskset->spurs.addr(); for (auto i = 5; i < 128; i++) { @@ -1183,7 +1183,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * auto taskset = ctxt->taskset.priv_ptr(); // Verify taskset state is valid - be_t _0(u128::from32(0)); + be_t _0(v128::from32(0)); if ((taskset->waiting & taskset->running) != _0 || (taskset->ready & taskset->pending_ready) != _0 || ((taskset->running | taskset->ready | taskset->pending_ready | taskset->signalled | taskset->waiting) & ~taskset->enabled) != _0) { assert(!"Invalid taskset state"); @@ -1199,13 +1199,13 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * } } - u128 readyButNotRunning; + v128 readyButNotRunning; u8 selectedTaskId; - u128 running = taskset->running.value(); - u128 waiting = taskset->waiting.value(); - u128 enabled = taskset->enabled.value(); - u128 signalled = (taskset->signalled & (taskset->ready | taskset->pending_ready)); - u128 ready = (taskset->signalled | taskset->ready | taskset->pending_ready); + v128 running = taskset->running.value(); + v128 waiting = taskset->waiting.value(); + v128 enabled = taskset->enabled.value(); + v128 signalled = (taskset->signalled & (taskset->ready | taskset->pending_ready)); + v128 ready = (taskset->signalled | taskset->ready | taskset->pending_ready); switch (request) { case SPURS_TASKSET_REQUEST_POLL_SIGNAL: @@ -1235,7 +1235,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * case SPURS_TASKSET_REQUEST_POLL: readyButNotRunning = ready & ~running; if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { - readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->wkl_flag_wait_task)); + readyButNotRunning = readyButNotRunning & ~(v128::fromBit(taskset->wkl_flag_wait_task)); } rc = readyButNotRunning != _0 ? 1 : 0; @@ -1260,7 +1260,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * case SPURS_TASKSET_REQUEST_SELECT_TASK: readyButNotRunning = ready & ~running; if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { - readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->wkl_flag_wait_task)); + readyButNotRunning = readyButNotRunning & ~(v128::fromBit(taskset->wkl_flag_wait_task)); } // Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness. @@ -1402,7 +1402,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F; u32 lsBlocks = 0; - u128 ls_pattern = u128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]); + v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]); for (auto i = 0; i < 128; i++) { if (ls_pattern._bit[i]) { lsBlocks++; @@ -1421,7 +1421,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { } // Get the processor context - u128 r; + v128 r; spu.FPSCR.Read(r); ctxt->savedContextFpscr = r; ctxt->savedSpuWriteEventMask = spu.get_ch_value(SPU_RdEventMask); @@ -1486,7 +1486,7 @@ void spursTasksetDispatch(SPUThread & spu) { //spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); - ctxt->savedContextLr = u128::from32r(entryPoint); + ctxt->savedContextLr = v128::from32r(entryPoint); ctxt->guidAddr = lowestLoadAddr; ctxt->tasksetMgmtAddr = 0x2700; ctxt->x2FC0 = 0; @@ -1516,8 +1516,8 @@ void spursTasksetDispatch(SPUThread & spu) { } // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well - u128 ls_pattern = u128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]); - if (ls_pattern != u128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) { + v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]); + if (ls_pattern != v128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) { // Load the ELF u32 entryPoint; if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf.addr(), true) != CELL_OK) { diff --git a/rpcs3/Emu/SysCalls/SC_FUNC.h b/rpcs3/Emu/SysCalls/SC_FUNC.h index 489257aa41..b23398e56f 100644 --- a/rpcs3/Emu/SysCalls/SC_FUNC.h +++ b/rpcs3/Emu/SysCalls/SC_FUNC.h @@ -53,7 +53,7 @@ namespace ppu_func_detail template struct bind_arg { - static_assert(std::is_same, u128>::value, "Invalid function argument type for ARG_VECTOR"); + static_assert(std::is_same, v128>::value, "Invalid function argument type for ARG_VECTOR"); static force_inline T get_arg(PPUThread& CPU) { @@ -124,7 +124,7 @@ namespace ppu_func_detail template struct bind_result { - static_assert(std::is_same, u128>::value, "Invalid function result type for ARG_VECTOR"); + static_assert(std::is_same, v128>::value, "Invalid function result type for ARG_VECTOR"); static force_inline void put_result(PPUThread& CPU, const T& result) { @@ -176,7 +176,7 @@ namespace ppu_func_detail // TODO: check calculations const bool is_float = std::is_floating_point::value; - const bool is_vector = std::is_same, u128>::value; + const bool is_vector = std::is_same, v128>::value; const bool is_context = std::is_same::value; const bool is_variadic = std::is_same, ppu_va_args_t>::value; const bool is_general = !is_float && !is_vector && !is_context && !is_variadic; @@ -201,7 +201,7 @@ namespace ppu_func_detail static_assert(!std::is_pointer::value, "Invalid function result type (pointer)"); static_assert(!std::is_reference::value, "Invalid function result type (reference)"); static const bool is_float = std::is_floating_point::value; - static const bool is_vector = std::is_same, u128>::value; + static const bool is_vector = std::is_same, v128>::value; static const arg_class value = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL); }; diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp index 3cd8a81e84..bc0e4e08f6 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp @@ -318,10 +318,10 @@ s32 sys_spu_thread_group_start(u32 id) t->PC = image->entry_point; t->run(); - t->GPR[3] = u128::from64(0, args.arg1); - t->GPR[4] = u128::from64(0, args.arg2); - t->GPR[5] = u128::from64(0, args.arg3); - t->GPR[6] = u128::from64(0, args.arg4); + t->GPR[3] = v128::from64(0, args.arg1); + t->GPR[4] = v128::from64(0, args.arg2); + t->GPR[5] = v128::from64(0, args.arg3); + t->GPR[6] = v128::from64(0, args.arg4); t->status.exchange(SPU_STATUS_RUNNING); }