mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 12:12:50 +01:00
u128 renamed to v128
Since it's vector union type
This commit is contained in:
parent
f8afee3325
commit
6f3c50eba2
@ -8,7 +8,7 @@
|
||||
|
||||
#define IS_LE_MACHINE // only draft
|
||||
|
||||
union u128
|
||||
union v128
|
||||
{
|
||||
u64 _u64[2];
|
||||
s64 _s64[2];
|
||||
@ -171,22 +171,22 @@ union u128
|
||||
|
||||
} _bit;
|
||||
|
||||
static u128 from64(u64 _0, u64 _1 = 0)
|
||||
static v128 from64(u64 _0, u64 _1 = 0)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret._u64[0] = _0;
|
||||
ret._u64[1] = _1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 from64r(u64 _1, u64 _0 = 0)
|
||||
static v128 from64r(u64 _1, u64 _0 = 0)
|
||||
{
|
||||
return from64(_0, _1);
|
||||
}
|
||||
|
||||
static u128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0)
|
||||
static v128 from32(u32 _0, u32 _1 = 0, u32 _2 = 0, u32 _3 = 0)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret._u32[0] = _0;
|
||||
ret._u32[1] = _1;
|
||||
ret._u32[2] = _2;
|
||||
@ -194,141 +194,141 @@ union u128
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0)
|
||||
static v128 from32r(u32 _3, u32 _2 = 0, u32 _1 = 0, u32 _0 = 0)
|
||||
{
|
||||
return from32(_0, _1, _2, _3);
|
||||
}
|
||||
|
||||
static u128 from32p(u32 value)
|
||||
static v128 from32p(u32 value)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret.vi = _mm_set1_epi32(static_cast<s32>(value));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 from16p(u16 value)
|
||||
static v128 from16p(u16 value)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret.vi = _mm_set1_epi16(static_cast<s16>(value));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 from8p(u8 value)
|
||||
static v128 from8p(u8 value)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret.vi = _mm_set1_epi8(static_cast<s8>(value));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 fromBit(u32 bit)
|
||||
static v128 fromBit(u32 bit)
|
||||
{
|
||||
u128 ret = {};
|
||||
v128 ret = {};
|
||||
ret._bit[bit] = true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 fromV(__m128i value)
|
||||
static v128 fromV(__m128i value)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret.vi = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 fromF(__m128 value)
|
||||
static v128 fromF(__m128 value)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret.vf = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u128 fromD(__m128d value)
|
||||
static v128 fromD(__m128d value)
|
||||
{
|
||||
u128 ret;
|
||||
v128 ret;
|
||||
ret.vd = value;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static force_inline u128 add8(const u128& left, const u128& right)
|
||||
static force_inline v128 add8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_add_epi8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 add16(const u128& left, const u128& right)
|
||||
static force_inline v128 add16(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_add_epi16(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 add32(const u128& left, const u128& right)
|
||||
static force_inline v128 add32(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_add_epi32(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 addfs(const u128& left, const u128& right)
|
||||
static force_inline v128 addfs(const v128& left, const v128& right)
|
||||
{
|
||||
return fromF(_mm_add_ps(left.vf, right.vf));
|
||||
}
|
||||
|
||||
static force_inline u128 addfd(const u128& left, const u128& right)
|
||||
static force_inline v128 addfd(const v128& left, const v128& right)
|
||||
{
|
||||
return fromD(_mm_add_pd(left.vd, right.vd));
|
||||
}
|
||||
|
||||
static force_inline u128 sub8(const u128& left, const u128& right)
|
||||
static force_inline v128 sub8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_sub_epi8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 sub16(const u128& left, const u128& right)
|
||||
static force_inline v128 sub16(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_sub_epi16(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 sub32(const u128& left, const u128& right)
|
||||
static force_inline v128 sub32(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_sub_epi32(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 subfs(const u128& left, const u128& right)
|
||||
static force_inline v128 subfs(const v128& left, const v128& right)
|
||||
{
|
||||
return fromF(_mm_sub_ps(left.vf, right.vf));
|
||||
}
|
||||
|
||||
static force_inline u128 subfd(const u128& left, const u128& right)
|
||||
static force_inline v128 subfd(const v128& left, const v128& right)
|
||||
{
|
||||
return fromD(_mm_sub_pd(left.vd, right.vd));
|
||||
}
|
||||
|
||||
static force_inline u128 maxu8(const u128& left, const u128& right)
|
||||
static force_inline v128 maxu8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_max_epu8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 minu8(const u128& left, const u128& right)
|
||||
static force_inline v128 minu8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_min_epu8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 eq8(const u128& left, const u128& right)
|
||||
static force_inline v128 eq8(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_cmpeq_epi8(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 eq16(const u128& left, const u128& right)
|
||||
static force_inline v128 eq16(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_cmpeq_epi16(left.vi, right.vi));
|
||||
}
|
||||
|
||||
static force_inline u128 eq32(const u128& left, const u128& right)
|
||||
static force_inline v128 eq32(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_cmpeq_epi32(left.vi, right.vi));
|
||||
}
|
||||
|
||||
bool operator == (const u128& right) const
|
||||
bool operator == (const v128& right) const
|
||||
{
|
||||
return (_u64[0] == right._u64[0]) && (_u64[1] == right._u64[1]);
|
||||
}
|
||||
|
||||
bool operator != (const u128& right) const
|
||||
bool operator != (const v128& right) const
|
||||
{
|
||||
return (_u64[0] != right._u64[0]) || (_u64[1] != right._u64[1]);
|
||||
}
|
||||
@ -344,7 +344,7 @@ union u128
|
||||
}
|
||||
|
||||
// result = (~left) & (right)
|
||||
static force_inline u128 andnot(const u128& left, const u128& right)
|
||||
static force_inline v128 andnot(const v128& left, const v128& right)
|
||||
{
|
||||
return fromV(_mm_andnot_si128(left.vi, right.vi));
|
||||
}
|
||||
@ -358,46 +358,46 @@ union u128
|
||||
|
||||
std::string to_xyzw() const;
|
||||
|
||||
static force_inline u128 byteswap(const u128 val)
|
||||
static force_inline v128 byteswap(const v128 val)
|
||||
{
|
||||
return fromV(_mm_shuffle_epi8(val.vi, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)));
|
||||
}
|
||||
};
|
||||
|
||||
CHECK_SIZE_ALIGN(u128, 16, 16);
|
||||
CHECK_SIZE_ALIGN(v128, 16, 16);
|
||||
|
||||
inline u128 operator |(const u128& left, const u128& right)
|
||||
inline v128 operator |(const v128& left, const v128& right)
|
||||
{
|
||||
return u128::fromV(_mm_or_si128(left.vi, right.vi));
|
||||
return v128::fromV(_mm_or_si128(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline u128 operator &(const u128& left, const u128& right)
|
||||
inline v128 operator &(const v128& left, const v128& right)
|
||||
{
|
||||
return u128::fromV(_mm_and_si128(left.vi, right.vi));
|
||||
return v128::fromV(_mm_and_si128(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline u128 operator ^(const u128& left, const u128& right)
|
||||
inline v128 operator ^(const v128& left, const v128& right)
|
||||
{
|
||||
return u128::fromV(_mm_xor_si128(left.vi, right.vi));
|
||||
return v128::fromV(_mm_xor_si128(left.vi, right.vi));
|
||||
}
|
||||
|
||||
inline u128 operator ~(const u128& other)
|
||||
inline v128 operator ~(const v128& other)
|
||||
{
|
||||
return u128::from64(~other._u64[0], ~other._u64[1]);
|
||||
return v128::from64(~other._u64[0], ~other._u64[1]);
|
||||
}
|
||||
|
||||
static force_inline u128 sync_val_compare_and_swap(volatile u128* dest, u128 comp, u128 exch)
|
||||
static force_inline v128 sync_val_compare_and_swap(volatile v128* dest, v128 comp, v128 exch)
|
||||
{
|
||||
#if !defined(_MSC_VER)
|
||||
auto res = __sync_val_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch);
|
||||
return (u128&)res;
|
||||
return (v128&)res;
|
||||
#else
|
||||
_InterlockedCompareExchange128((volatile long long*)dest, exch._u64[1], exch._u64[0], (long long*)&comp);
|
||||
return comp;
|
||||
#endif
|
||||
}
|
||||
|
||||
static force_inline bool sync_bool_compare_and_swap(volatile u128* dest, u128 comp, u128 exch)
|
||||
static force_inline bool sync_bool_compare_and_swap(volatile v128* dest, v128 comp, v128 exch)
|
||||
{
|
||||
#if !defined(_MSC_VER)
|
||||
return __sync_bool_compare_and_swap((volatile __int128_t*)dest, (__int128_t&)comp, (__int128_t&)exch);
|
||||
@ -406,38 +406,38 @@ static force_inline bool sync_bool_compare_and_swap(volatile u128* dest, u128 co
|
||||
#endif
|
||||
}
|
||||
|
||||
static force_inline u128 sync_lock_test_and_set(volatile u128* dest, u128 value)
|
||||
static force_inline v128 sync_lock_test_and_set(volatile v128* dest, v128 value)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
const u128 old = *(u128*)dest;
|
||||
const v128 old = *(v128*)dest;
|
||||
if (sync_bool_compare_and_swap(dest, old, value)) return old;
|
||||
}
|
||||
}
|
||||
|
||||
static force_inline u128 sync_fetch_and_or(volatile u128* dest, u128 value)
|
||||
static force_inline v128 sync_fetch_and_or(volatile v128* dest, v128 value)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
const u128 old = *(u128*)dest;
|
||||
const v128 old = *(v128*)dest;
|
||||
if (sync_bool_compare_and_swap(dest, old, value | old)) return old;
|
||||
}
|
||||
}
|
||||
|
||||
static force_inline u128 sync_fetch_and_and(volatile u128* dest, u128 value)
|
||||
static force_inline v128 sync_fetch_and_and(volatile v128* dest, v128 value)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
const u128 old = *(u128*)dest;
|
||||
const v128 old = *(v128*)dest;
|
||||
if (sync_bool_compare_and_swap(dest, old, value & old)) return old;
|
||||
}
|
||||
}
|
||||
|
||||
static force_inline u128 sync_fetch_and_xor(volatile u128* dest, u128 value)
|
||||
static force_inline v128 sync_fetch_and_xor(volatile v128* dest, v128 value)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
const u128 old = *(u128*)dest;
|
||||
const v128 old = *(v128*)dest;
|
||||
if (sync_bool_compare_and_swap(dest, old, value ^ old)) return old;
|
||||
}
|
||||
}
|
||||
@ -488,14 +488,14 @@ template<typename T> struct se_t<T, 8>
|
||||
|
||||
template<typename T> struct se_t<T, 16>
|
||||
{
|
||||
static force_inline u128 to(const T& src)
|
||||
static force_inline v128 to(const T& src)
|
||||
{
|
||||
return u128::byteswap((u128&)src);
|
||||
return v128::byteswap((v128&)src);
|
||||
}
|
||||
|
||||
static force_inline T from(const u128& src)
|
||||
static force_inline T from(const v128& src)
|
||||
{
|
||||
const u128 res = u128::byteswap(src);
|
||||
const v128 res = v128::byteswap(src);
|
||||
return (T&)res;
|
||||
}
|
||||
};
|
||||
@ -553,7 +553,7 @@ template<typename T> struct be_storage<T, 8>
|
||||
|
||||
template<typename T> struct be_storage<T, 16>
|
||||
{
|
||||
using type = u128;
|
||||
using type = v128;
|
||||
};
|
||||
|
||||
template<typename T> using be_storage_t = typename be_storage<T>::type;
|
||||
@ -602,7 +602,7 @@ public:
|
||||
#endif
|
||||
|
||||
static_assert(!std::is_class<type>::value, "be_t<> error: invalid type (class or structure)");
|
||||
static_assert(!std::is_union<type>::value || std::is_same<type, u128>::value, "be_t<> error: invalid type (union)");
|
||||
static_assert(!std::is_union<type>::value || std::is_same<type, v128>::value, "be_t<> error: invalid type (union)");
|
||||
static_assert(!std::is_pointer<type>::value, "be_t<> error: invalid type (pointer)");
|
||||
static_assert(!std::is_reference<type>::value, "be_t<> error: invalid type (reference)");
|
||||
static_assert(!std::is_array<type>::value, "be_t<> error: invalid type (array)");
|
||||
@ -748,7 +748,7 @@ template<typename T> struct is_be_t<volatile T> : public std::integral_constant<
|
||||
// to_be_t helper struct
|
||||
template<typename T> struct to_be
|
||||
{
|
||||
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, u128>::value, be_t<T>, T>;
|
||||
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, v128>::value, be_t<T>, T>;
|
||||
};
|
||||
|
||||
// be_t<T> if possible, T otherwise
|
||||
@ -781,7 +781,7 @@ public:
|
||||
type m_data; // don't access directly
|
||||
|
||||
static_assert(!std::is_class<type>::value, "le_t<> error: invalid type (class or structure)");
|
||||
static_assert(!std::is_union<type>::value || std::is_same<type, u128>::value, "le_t<> error: invalid type (union)");
|
||||
static_assert(!std::is_union<type>::value || std::is_same<type, v128>::value, "le_t<> error: invalid type (union)");
|
||||
static_assert(!std::is_pointer<type>::value, "le_t<> error: invalid type (pointer)");
|
||||
static_assert(!std::is_reference<type>::value, "le_t<> error: invalid type (reference)");
|
||||
static_assert(!std::is_array<type>::value, "le_t<> error: invalid type (array)");
|
||||
@ -863,7 +863,7 @@ template<typename T> struct is_le_t<volatile T> : public std::integral_constant<
|
||||
|
||||
template<typename T> struct to_le
|
||||
{
|
||||
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, u128>::value, le_t<T>, T>;
|
||||
using type = std::conditional_t<std::is_arithmetic<T>::value || std::is_enum<T>::value || std::is_same<T, v128>::value, le_t<T>, T>;
|
||||
};
|
||||
|
||||
// le_t<T> if possible, T otherwise
|
||||
|
@ -5,12 +5,12 @@
|
||||
#include <wx/string.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
std::string u128::to_hex() const
|
||||
std::string v128::to_hex() const
|
||||
{
|
||||
return fmt::format("%016llx%016llx", _u64[1], _u64[0]);
|
||||
}
|
||||
|
||||
std::string u128::to_xyzw() const
|
||||
std::string v128::to_xyzw() const
|
||||
{
|
||||
return fmt::Format("x: %g y: %g z: %g w: %g", _f[3], _f[2], _f[1], _f[0]);
|
||||
}
|
||||
|
@ -492,7 +492,7 @@ void decode_x64_reg_op(const u8* code, x64_op_t& out_op, x64_reg_t& out_reg, siz
|
||||
typedef CONTEXT x64_context;
|
||||
|
||||
#define X64REG(context, reg) (&(&(context)->Rax)[reg])
|
||||
#define XMMREG(context, reg) (reinterpret_cast<u128*>(&(&(context)->Xmm0)[reg]))
|
||||
#define XMMREG(context, reg) (reinterpret_cast<v128*>(&(&(context)->Xmm0)[reg]))
|
||||
#define EFLAGS(context) ((context)->EFlags)
|
||||
|
||||
#else
|
||||
@ -502,7 +502,7 @@ typedef ucontext_t x64_context;
|
||||
#ifdef __APPLE__
|
||||
|
||||
#define X64REG(context, reg) (darwin_x64reg(context, reg))
|
||||
#define XMMREG(context, reg) (reinterpret_cast<u128*>(&(context)->uc_mcontext->__fs.__fpu_xmm0.__xmm_reg[reg]))
|
||||
#define XMMREG(context, reg) (reinterpret_cast<v128*>(&(context)->uc_mcontext->__fs.__fpu_xmm0.__xmm_reg[reg]))
|
||||
#define EFLAGS(context) ((context)->uc_mcontext->__ss.__rflags)
|
||||
|
||||
uint64_t* darwin_x64reg(x64_context *context, int reg)
|
||||
@ -560,7 +560,7 @@ static const reg_table_t reg_table[17] =
|
||||
};
|
||||
|
||||
#define X64REG(context, reg) (&(context)->uc_mcontext.gregs[reg_table[reg]])
|
||||
#define XMMREG(context, reg) (reinterpret_cast<u128*>(&(context)->uc_mcontext.fpregs->_xmm[reg]))
|
||||
#define XMMREG(context, reg) (reinterpret_cast<v128*>(&(context)->uc_mcontext.fpregs->_xmm[reg]))
|
||||
#define EFLAGS(context) ((context)->uc_mcontext.gregs[REG_EFL])
|
||||
|
||||
#endif // __APPLE__
|
||||
|
@ -136,7 +136,7 @@ namespace psv_func_detail
|
||||
struct bind_arg<T, ARG_VECTOR, g_count, f_count, v_count>
|
||||
{
|
||||
static_assert(v_count <= 0, "TODO: Unsupported argument type (vector)");
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function argument type for ARG_VECTOR");
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function argument type for ARG_VECTOR");
|
||||
|
||||
force_inline static T get_arg(ARMv7Context& context)
|
||||
{
|
||||
@ -294,7 +294,7 @@ namespace psv_func_detail
|
||||
//template<typename T>
|
||||
//struct bind_result<T, ARG_VECTOR>
|
||||
//{
|
||||
// static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function result type for ARG_VECTOR");
|
||||
// static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function result type for ARG_VECTOR");
|
||||
|
||||
// static force_inline void put_result(ARMv7Context& context, const T& result)
|
||||
// {
|
||||
@ -307,7 +307,7 @@ namespace psv_func_detail
|
||||
static_assert(!std::is_pointer<RT>::value, "Invalid function result type (pointer)");
|
||||
static_assert(!std::is_reference<RT>::value, "Invalid function result type (reference)");
|
||||
static const bool is_float = std::is_floating_point<RT>::value;
|
||||
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, u128>::value;
|
||||
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, v128>::value;
|
||||
static const arg_class value = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL);
|
||||
};
|
||||
|
||||
@ -316,7 +316,7 @@ namespace psv_func_detail
|
||||
{
|
||||
// TODO: check calculations
|
||||
static const bool is_float = std::is_floating_point<T>::value;
|
||||
static const bool is_vector = std::is_same<std::remove_cv_t<T>, u128>::value;
|
||||
static const bool is_vector = std::is_same<std::remove_cv_t<T>, v128>::value;
|
||||
static const bool is_context = std::is_same<T, ARMv7Context&>::value;
|
||||
static const bool is_variadic = std::is_same<std::remove_cv_t<T>, armv7_va_args_t>::value;
|
||||
static const bool is_general = !is_float && !is_vector && !is_context && !is_variadic;
|
||||
|
@ -91,7 +91,7 @@ void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -108,7 +108,7 @@ void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = CPU.VPR[op.va];
|
||||
const auto b = CPU.VPR[op.vb];
|
||||
const auto s = u128::add32(a, b); // a + b
|
||||
const auto s = v128::add32(a, b); // a + b
|
||||
const auto m = (a ^ s) & (b ^ s); // overflow bit
|
||||
const auto x = _mm_srai_epi32(m.vi, 31); // saturation mask
|
||||
const auto y = _mm_srai_epi32(_mm_and_si128(s.vi, m.vi), 31); // positive saturation mask
|
||||
@ -117,7 +117,7 @@ void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -127,7 +127,7 @@ void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -137,7 +137,7 @@ void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -160,30 +160,30 @@ void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op)
|
||||
void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = CPU.VPR[op.va];
|
||||
const auto b = u128::add8(CPU.VPR[op.vb], u128::from8p(1)); // add 1
|
||||
const auto summ = u128::add8(a, b) & u128::from8p(0xfe);
|
||||
const auto sign = u128::from8p(0x80);
|
||||
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq8(b, sign)) & sign; // calculate msb
|
||||
const auto b = v128::add8(CPU.VPR[op.vb], v128::from8p(1)); // add 1
|
||||
const auto summ = v128::add8(a, b) & v128::from8p(0xfe);
|
||||
const auto sign = v128::from8p(0x80);
|
||||
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq8(b, sign)) & sign; // calculate msb
|
||||
CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi64(summ.vi, 1));
|
||||
}
|
||||
|
||||
void ppu_interpreter::VAVGSH(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = CPU.VPR[op.va];
|
||||
const auto b = u128::add16(CPU.VPR[op.vb], u128::from16p(1)); // add 1
|
||||
const auto summ = u128::add16(a, b);
|
||||
const auto sign = u128::from16p(0x8000);
|
||||
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq16(b, sign)) & sign; // calculate msb
|
||||
const auto b = v128::add16(CPU.VPR[op.vb], v128::from16p(1)); // add 1
|
||||
const auto summ = v128::add16(a, b);
|
||||
const auto sign = v128::from16p(0x8000);
|
||||
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq16(b, sign)) & sign; // calculate msb
|
||||
CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi16(summ.vi, 1));
|
||||
}
|
||||
|
||||
void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = CPU.VPR[op.va];
|
||||
const auto b = u128::add32(CPU.VPR[op.vb], u128::from32p(1)); // add 1
|
||||
const auto summ = u128::add32(a, b);
|
||||
const auto sign = u128::from32p(0x80000000);
|
||||
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq32(b, sign)) & sign; // calculate msb
|
||||
const auto b = v128::add32(CPU.VPR[op.vb], v128::from32p(1)); // add 1
|
||||
const auto summ = v128::add32(a, b);
|
||||
const auto sign = v128::from32p(0x80000000);
|
||||
const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ v128::eq32(b, sign)) & sign; // calculate msb
|
||||
CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi32(summ.vi, 1));
|
||||
}
|
||||
|
||||
@ -201,7 +201,7 @@ void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = CPU.VPR[op.va];
|
||||
const auto b = CPU.VPR[op.vb];
|
||||
const auto summ = u128::add32(u128::add32(a, b), u128::from32p(1));
|
||||
const auto summ = v128::add32(v128::add32(a, b), v128::from32p(1));
|
||||
const auto carry = _mm_xor_si128(_mm_slli_epi32(sse_cmpgt_epu32(summ.vi, a.vi), 31), _mm_set1_epi32(0x80000000));
|
||||
CPU.VPR[op.vd].vi = _mm_or_si128(carry, _mm_srli_epi32(summ.vi, 1));
|
||||
}
|
||||
@ -248,7 +248,7 @@ void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -260,7 +260,7 @@ void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -272,7 +272,7 @@ void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -727,8 +727,8 @@ void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
u16 bb7 = VB._u8[15 - (h * 4 + 0)] & 0x1;
|
||||
@ -764,8 +764,8 @@ void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
//CPU.VPR[op.vd].vi = _mm_packus_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi);
|
||||
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
s32 result = VA._s32[h];
|
||||
@ -798,8 +798,8 @@ void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
CPU.VPR[op.vd]._u8[b + 8] = VA._u8[b * 2];
|
||||
@ -809,8 +809,8 @@ void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
u16 result = VA._u16[b];
|
||||
@ -835,8 +835,8 @@ void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
CPU.VPR[op.vd]._u16[h + 4] = VA._u16[h * 2];
|
||||
@ -846,8 +846,8 @@ void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VPKUWUS(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
u32 result = VA._u32[h];
|
||||
@ -949,7 +949,7 @@ void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSL(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7;
|
||||
|
||||
CPU.VPR[op.vd]._u8[0] = VA._u8[0] << sh;
|
||||
@ -989,7 +989,7 @@ void ppu_interpreter::VSLH(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSLO(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf;
|
||||
|
||||
CPU.VPR[op.vd].clear();
|
||||
@ -1068,7 +1068,7 @@ void ppu_interpreter::VSPLTW(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSR(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7;
|
||||
|
||||
CPU.VPR[op.vd]._u8[15] = VA._u8[15] >> sh;
|
||||
@ -1120,7 +1120,7 @@ void ppu_interpreter::VSRH(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSRO(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VA = CPU.VPR[op.va];
|
||||
v128 VA = CPU.VPR[op.va];
|
||||
u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf;
|
||||
|
||||
CPU.VPR[op.vd].clear();
|
||||
@ -1149,7 +1149,7 @@ void ppu_interpreter::VSUBCUW(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -1183,7 +1183,7 @@ void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -1193,7 +1193,7 @@ void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -1203,7 +1203,7 @@ void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
CPU.VPR[op.vd] = u128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
CPU.VPR[op.vd] = v128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]);
|
||||
}
|
||||
|
||||
void ppu_interpreter::VSUBUWS(PPUThread& CPU, ppu_opcode_t op)
|
||||
@ -1334,7 +1334,7 @@ void ppu_interpreter::VSUM4UBS(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[8 + w * 2 + 1] >> 7; // signed shift sign extends
|
||||
@ -1346,7 +1346,7 @@ void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[op.vd]._s16[h] = VB._s8[8 + h];
|
||||
@ -1355,7 +1355,7 @@ void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[op.vd]._s32[w] = VB._s16[4 + w];
|
||||
@ -1364,7 +1364,7 @@ void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[w * 2 + 1] >> 7; // signed shift sign extends
|
||||
@ -1376,7 +1376,7 @@ void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[op.vd]._s16[h] = VB._s8[h];
|
||||
@ -1385,7 +1385,7 @@ void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op)
|
||||
|
||||
void ppu_interpreter::VUPKLSH(PPUThread& CPU, ppu_opcode_t op)
|
||||
{
|
||||
u128 VB = CPU.VPR[op.vb];
|
||||
v128 VB = CPU.VPR[op.vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[op.vd]._s32[w] = VB._s16[w];
|
||||
|
@ -1055,8 +1055,8 @@ private:
|
||||
}
|
||||
void VMRGHB(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[15 - h*2] = VA._u8[15 - h];
|
||||
@ -1065,8 +1065,8 @@ private:
|
||||
}
|
||||
void VMRGHH(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._u16[7 - w*2] = VA._u16[7 - w];
|
||||
@ -1075,8 +1075,8 @@ private:
|
||||
}
|
||||
void VMRGHW(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint d = 0; d < 2; d++)
|
||||
{
|
||||
CPU.VPR[vd]._u32[3 - d*2] = VA._u32[3 - d];
|
||||
@ -1085,8 +1085,8 @@ private:
|
||||
}
|
||||
void VMRGLB(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[15 - h*2] = VA._u8[7 - h];
|
||||
@ -1095,8 +1095,8 @@ private:
|
||||
}
|
||||
void VMRGLH(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._u16[7 - w*2] = VA._u16[3 - w];
|
||||
@ -1105,8 +1105,8 @@ private:
|
||||
}
|
||||
void VMRGLW(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint d = 0; d < 2; d++)
|
||||
{
|
||||
CPU.VPR[vd]._u32[3 - d*2] = VA._u32[1 - d];
|
||||
@ -1339,8 +1339,8 @@ private:
|
||||
}
|
||||
void VPKPX(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
u16 bb7 = VB._u8[15 - (h*4 + 0)] & 0x1;
|
||||
@ -1358,8 +1358,8 @@ private:
|
||||
}
|
||||
void VPKSHSS(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
s16 result = VA._s16[b];
|
||||
@ -1395,8 +1395,8 @@ private:
|
||||
}
|
||||
void VPKSHUS(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
s16 result = VA._s16[b];
|
||||
@ -1432,8 +1432,8 @@ private:
|
||||
}
|
||||
void VPKSWSS(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
s32 result = VA._s32[h];
|
||||
@ -1469,8 +1469,8 @@ private:
|
||||
}
|
||||
void VPKSWUS(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
s32 result = VA._s32[h];
|
||||
@ -1506,8 +1506,8 @@ private:
|
||||
}
|
||||
void VPKUHUM(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[b+8] = VA._u8[b*2];
|
||||
@ -1516,8 +1516,8 @@ private:
|
||||
}
|
||||
void VPKUHUS(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
u16 result = VA._u16[b];
|
||||
@ -1543,8 +1543,8 @@ private:
|
||||
}
|
||||
void VPKUWUM(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
CPU.VPR[vd]._u16[h+4] = VA._u16[h*2];
|
||||
@ -1553,8 +1553,8 @@ private:
|
||||
}
|
||||
void VPKUWUS(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VA = CPU.VPR[va];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
u32 result = VA._u32[h];
|
||||
@ -1684,7 +1684,7 @@ private:
|
||||
}
|
||||
void VSL(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
v128 VA = CPU.VPR[va];
|
||||
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
|
||||
|
||||
CPU.VPR[vd]._u8[0] = VA._u8[0] << sh;
|
||||
@ -1720,7 +1720,7 @@ private:
|
||||
}
|
||||
void VSLO(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
v128 VA = CPU.VPR[va];
|
||||
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
|
||||
|
||||
CPU.VPR[vd].clear();
|
||||
@ -1791,7 +1791,7 @@ private:
|
||||
}
|
||||
void VSR(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
v128 VA = CPU.VPR[va];
|
||||
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
|
||||
|
||||
CPU.VPR[vd]._u8[15] = VA._u8[15] >> sh;
|
||||
@ -1837,7 +1837,7 @@ private:
|
||||
}
|
||||
void VSRO(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
v128 VA = CPU.VPR[va];
|
||||
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
|
||||
|
||||
CPU.VPR[vd].clear();
|
||||
@ -2121,7 +2121,7 @@ private:
|
||||
}
|
||||
void VUPKHPX(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[8 + w*2 + 1] >> 7; // signed shift sign extends
|
||||
@ -2132,7 +2132,7 @@ private:
|
||||
}
|
||||
void VUPKHSB(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._s16[h] = VB._s8[8 + h];
|
||||
@ -2140,7 +2140,7 @@ private:
|
||||
}
|
||||
void VUPKHSH(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s32[w] = VB._s16[4 + w];
|
||||
@ -2148,7 +2148,7 @@ private:
|
||||
}
|
||||
void VUPKLPX(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[w*2 + 1] >> 7; // signed shift sign extends
|
||||
@ -2159,7 +2159,7 @@ private:
|
||||
}
|
||||
void VUPKLSB(u32 vd, u32 vb) //nf
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._s16[h] = VB._s8[h];
|
||||
@ -2167,7 +2167,7 @@ private:
|
||||
}
|
||||
void VUPKLSH(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
v128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s32[w] = VB._s16[w];
|
||||
|
@ -2121,7 +2121,7 @@ void Compiler::TW(u32 to, u32 ra, u32 rb) {
|
||||
}
|
||||
|
||||
void Compiler::LVSL(u32 vd, u32 ra, u32 rb) {
|
||||
static const u128 s_lvsl_values[] = {
|
||||
static const v128 s_lvsl_values[] = {
|
||||
{ 0x08090A0B0C0D0E0F, 0x0001020304050607 },
|
||||
{ 0x090A0B0C0D0E0F10, 0x0102030405060708 },
|
||||
{ 0x0A0B0C0D0E0F1011, 0x0203040506070809 },
|
||||
@ -2350,7 +2350,7 @@ void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) {
|
||||
}
|
||||
|
||||
void Compiler::LVSR(u32 vd, u32 ra, u32 rb) {
|
||||
static const u128 s_lvsr_values[] = {
|
||||
static const v128 s_lvsr_values[] = {
|
||||
{ 0x18191A1B1C1D1E1F, 0x1011121314151617 },
|
||||
{ 0x1718191A1B1C1D1E, 0x0F10111213141516 },
|
||||
{ 0x161718191A1B1C1D, 0x0E0F101112131415 },
|
||||
|
@ -51,7 +51,7 @@ struct ppu_recompiler_llvm::PPUState {
|
||||
u64 GPR[32];
|
||||
|
||||
/// Vector purpose registers
|
||||
u128 VPR[32];
|
||||
v128 VPR[32];
|
||||
|
||||
/// Condition register
|
||||
CRhdr CR;
|
||||
|
@ -462,7 +462,7 @@ public:
|
||||
PPCdouble FPR[32]{}; //Floating Point Register
|
||||
FPSCRhdr FPSCR{}; //Floating Point Status and Control Register
|
||||
u64 GPR[32]{}; //General-Purpose Register
|
||||
u128 VPR[32]{};
|
||||
v128 VPR[32]{};
|
||||
u32 vpcr = 0;
|
||||
|
||||
CRhdr CR{}; //Condition Register
|
||||
|
@ -2,5 +2,5 @@
|
||||
|
||||
struct SPUContext
|
||||
{
|
||||
u128 gpr[128];
|
||||
v128 gpr[128];
|
||||
};
|
||||
|
@ -69,17 +69,17 @@ void spu_interpreter::MFSPR(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::RDCH(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_value(op.ra));
|
||||
CPU.GPR[op.rt] = v128::from32r(CPU.get_ch_value(op.ra));
|
||||
}
|
||||
|
||||
void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_count(op.ra));
|
||||
CPU.GPR[op.rt] = v128::from32r(CPU.get_ch_count(op.ra));
|
||||
}
|
||||
|
||||
void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]);
|
||||
CPU.GPR[op.rt] = v128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]);
|
||||
}
|
||||
|
||||
void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -94,7 +94,7 @@ void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]);
|
||||
CPU.GPR[op.rt] = v128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]);
|
||||
}
|
||||
|
||||
void spu_interpreter::NOR(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -106,7 +106,7 @@ void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const auto a = CPU.GPR[op.ra];
|
||||
const auto b = CPU.GPR[op.rb];
|
||||
CPU.GPR[op.rt] = u128::sub8(u128::maxu8(a, b), u128::minu8(a, b));
|
||||
CPU.GPR[op.rt] = v128::sub8(v128::maxu8(a, b), v128::minu8(a, b));
|
||||
}
|
||||
|
||||
void spu_interpreter::ROT(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -249,7 +249,7 @@ void spu_interpreter::SHLHI(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::A(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
CPU.GPR[op.rt] = v128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
}
|
||||
|
||||
void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -266,7 +266,7 @@ void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
CPU.GPR[op.rt] = v128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
}
|
||||
|
||||
void spu_interpreter::NAND(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -343,7 +343,7 @@ void spu_interpreter::BI(SPUThread& CPU, spu_opcode_t op)
|
||||
void spu_interpreter::BISL(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const u32 target = SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0);
|
||||
CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4);
|
||||
CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4);
|
||||
CPU.PC = target - 4;
|
||||
set_interrupt_status(CPU, op);
|
||||
}
|
||||
@ -364,17 +364,17 @@ void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7)));
|
||||
CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7)));
|
||||
}
|
||||
|
||||
void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7)));
|
||||
CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7)));
|
||||
}
|
||||
|
||||
void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7)));
|
||||
CPU.GPR[op.rt] = v128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7)));
|
||||
}
|
||||
|
||||
void spu_interpreter::FSM(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -426,28 +426,28 @@ void spu_interpreter::SHLQBYBI(SPUThread& CPU, spu_opcode_t op)
|
||||
void spu_interpreter::CBX(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = ~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xf;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u8[t] = 0x03;
|
||||
}
|
||||
|
||||
void spu_interpreter::CHX(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u16[t] = 0x0203;
|
||||
}
|
||||
|
||||
void spu_interpreter::CWX(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u32[t] = 0x00010203;
|
||||
}
|
||||
|
||||
void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull;
|
||||
}
|
||||
|
||||
@ -489,34 +489,34 @@ void spu_interpreter::SHLQBY(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::ORX(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]);
|
||||
CPU.GPR[op.rt] = v128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]);
|
||||
}
|
||||
|
||||
void spu_interpreter::CBD(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = ~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xf;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u8[t] = 0x03;
|
||||
}
|
||||
|
||||
void spu_interpreter::CHD(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u16[t] = 0x0203;
|
||||
}
|
||||
|
||||
void spu_interpreter::CWD(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u32[t] = 0x00010203;
|
||||
}
|
||||
|
||||
void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3;
|
||||
CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt] = v128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull);
|
||||
CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull;
|
||||
}
|
||||
|
||||
@ -640,7 +640,7 @@ void spu_interpreter::CLGT(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]);
|
||||
CPU.GPR[op.rt] = v128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]);
|
||||
}
|
||||
|
||||
void spu_interpreter::FCGT(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -655,12 +655,12 @@ void spu_interpreter::DFCGT(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::FA(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
CPU.GPR[op.rt] = v128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
}
|
||||
|
||||
void spu_interpreter::FS(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
CPU.GPR[op.rt] = v128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
}
|
||||
|
||||
void spu_interpreter::FM(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -691,12 +691,12 @@ void spu_interpreter::DFCMGT(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::DFA(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
CPU.GPR[op.rt] = v128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
}
|
||||
|
||||
void spu_interpreter::DFS(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
CPU.GPR[op.rt] = v128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]);
|
||||
}
|
||||
|
||||
void spu_interpreter::DFM(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -751,12 +751,12 @@ void spu_interpreter::MPYHHU(SPUThread& CPU, spu_opcode_t op)
|
||||
|
||||
void spu_interpreter::ADDX(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::add32(u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & u128::from32p(1));
|
||||
CPU.GPR[op.rt] = v128::add32(v128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & v128::from32p(1));
|
||||
}
|
||||
|
||||
void spu_interpreter::SFX(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
CPU.GPR[op.rt] = u128::sub32(u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), u128::andnot(CPU.GPR[op.rt], u128::from32p(1)));
|
||||
CPU.GPR[op.rt] = v128::sub32(v128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), v128::andnot(CPU.GPR[op.rt], v128::from32p(1)));
|
||||
}
|
||||
|
||||
void spu_interpreter::CGX(SPUThread& CPU, spu_opcode_t op)
|
||||
@ -976,7 +976,7 @@ void spu_interpreter::LQA(SPUThread& CPU, spu_opcode_t op)
|
||||
void spu_interpreter::BRASL(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const u32 target = SPUOpcodes::branchTarget(0, op.i16);
|
||||
CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4);
|
||||
CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4);
|
||||
CPU.PC = target - 4;
|
||||
}
|
||||
|
||||
@ -993,7 +993,7 @@ void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op)
|
||||
void spu_interpreter::BRSL(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
const u32 target = SPUOpcodes::branchTarget(CPU.PC, op.i16);
|
||||
CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4);
|
||||
CPU.GPR[op.rt] = v128::from32r(CPU.PC + 4);
|
||||
CPU.PC = target - 4;
|
||||
}
|
||||
|
||||
@ -1197,7 +1197,7 @@ void spu_interpreter::ILA(SPUThread& CPU, spu_opcode_t op)
|
||||
void spu_interpreter::SELB(SPUThread& CPU, spu_opcode_t op)
|
||||
{
|
||||
// rt <> rc
|
||||
CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | u128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]);
|
||||
CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | v128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]);
|
||||
}
|
||||
|
||||
void spu_interpreter::SHUFB(SPUThread& CPU, spu_opcode_t op)
|
||||
|
@ -116,11 +116,11 @@ private:
|
||||
}
|
||||
void RDCH(u32 rt, u32 ra)
|
||||
{
|
||||
CPU.GPR[rt] = u128::from32r(CPU.get_ch_value(ra));
|
||||
CPU.GPR[rt] = v128::from32r(CPU.get_ch_value(ra));
|
||||
}
|
||||
void RCHCNT(u32 rt, u32 ra)
|
||||
{
|
||||
CPU.GPR[rt] = u128::from32r(CPU.get_ch_count(ra));
|
||||
CPU.GPR[rt] = v128::from32r(CPU.get_ch_count(ra));
|
||||
}
|
||||
void SF(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
@ -424,7 +424,7 @@ private:
|
||||
void BISL(u32 intr, u32 rt, u32 ra)
|
||||
{
|
||||
u32 target = branchTarget(CPU.GPR[ra]._u32[3], 0);
|
||||
CPU.GPR[rt] = u128::from32r(CPU.PC + 4);
|
||||
CPU.GPR[rt] = v128::from32r(CPU.PC + 4);
|
||||
LOG5_OPCODE("branch (0x%x)", target);
|
||||
CPU.PC = target - 4;
|
||||
|
||||
@ -539,14 +539,14 @@ private:
|
||||
void ROTQBYBI(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
for (int b = 0; b < 16; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
|
||||
}
|
||||
void ROTQMBYBI(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt].clear();
|
||||
for (int b = 0; b < 16 - s; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
|
||||
@ -554,7 +554,7 @@ private:
|
||||
void SHLQBYBI(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt].clear();
|
||||
for (int b = s; b < 16; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
|
||||
@ -620,7 +620,7 @@ private:
|
||||
const int t = CPU.GPR[rb]._u32[3] & 0x7;
|
||||
if (t) // not an optimization, it fixes shifts
|
||||
{
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t) | (temp._u32[3] >> (32 - t));
|
||||
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
|
||||
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
|
||||
@ -636,7 +636,7 @@ private:
|
||||
const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7;
|
||||
if (t) // not an optimization, it fixes shifts
|
||||
{
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> t) | (temp._u32[1] << (32 - t));
|
||||
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> t) | (temp._u32[2] << (32 - t));
|
||||
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> t) | (temp._u32[3] << (32 - t));
|
||||
@ -652,7 +652,7 @@ private:
|
||||
const int t = CPU.GPR[rb]._u32[3] & 0x7;
|
||||
if (t) // not an optimization, it fixes shifts
|
||||
{
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt]._u32[0] = (temp._u32[0] << t);
|
||||
CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t));
|
||||
CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t));
|
||||
@ -666,14 +666,14 @@ private:
|
||||
void ROTQBY(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const int s = CPU.GPR[rb]._u32[3] & 0xf;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
for (int b = 0; b < 16; ++b)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
|
||||
}
|
||||
void ROTQMBY(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt].clear();
|
||||
for (int b = 0; b < 16 - s; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
|
||||
@ -681,7 +681,7 @@ private:
|
||||
void SHLQBY(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const int s = CPU.GPR[rb]._u32[3] & 0x1f;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt].clear();
|
||||
for (int b = s; b < 16; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
|
||||
@ -753,7 +753,7 @@ private:
|
||||
const int s = i7 & 0x7;
|
||||
if (s) // not an optimization, it fixes shifts
|
||||
{
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s) | (temp._u32[3] >> (32 - s));
|
||||
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
|
||||
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
|
||||
@ -769,7 +769,7 @@ private:
|
||||
const int s = (0 - i7) & 0x7;
|
||||
if (s) // not an optimization, it fixes shifts
|
||||
{
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt]._u32[0] = (temp._u32[0] >> s) | (temp._u32[1] << (32 - s));
|
||||
CPU.GPR[rt]._u32[1] = (temp._u32[1] >> s) | (temp._u32[2] << (32 - s));
|
||||
CPU.GPR[rt]._u32[2] = (temp._u32[2] >> s) | (temp._u32[3] << (32 - s));
|
||||
@ -785,7 +785,7 @@ private:
|
||||
const int s = i7 & 0x7;
|
||||
if (s) // not an optimization, it fixes shifts
|
||||
{
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt]._u32[0] = (temp._u32[0] << s);
|
||||
CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s));
|
||||
CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s));
|
||||
@ -799,14 +799,14 @@ private:
|
||||
void ROTQBYI(u32 rt, u32 ra, s32 i7)
|
||||
{
|
||||
const int s = i7 & 0xf;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
for (int b = 0; b < 16; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf];
|
||||
}
|
||||
void ROTQMBYI(u32 rt, u32 ra, s32 i7)
|
||||
{
|
||||
const int s = (0 - i7) & 0x1f;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt].clear();
|
||||
for (int b = 0; b < 16 - s; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[b + s];
|
||||
@ -814,7 +814,7 @@ private:
|
||||
void SHLQBYI(u32 rt, u32 ra, s32 i7)
|
||||
{
|
||||
const int s = i7 & 0x1f;
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt].clear();
|
||||
for (int b = s; b < 16; b++)
|
||||
CPU.GPR[rt]._u8[b] = temp._u8[b - s];
|
||||
@ -849,8 +849,8 @@ private:
|
||||
}
|
||||
void SUMB(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const u128 _a = CPU.GPR[ra];
|
||||
const u128 _b = CPU.GPR[rb];
|
||||
const v128 _a = CPU.GPR[ra];
|
||||
const v128 _b = CPU.GPR[rb];
|
||||
for (int w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3];
|
||||
@ -890,7 +890,7 @@ private:
|
||||
}
|
||||
void CNTB(u32 rt, u32 ra)
|
||||
{
|
||||
const u128 temp = CPU.GPR[ra];
|
||||
const v128 temp = CPU.GPR[ra];
|
||||
CPU.GPR[rt].clear();
|
||||
for (int b = 0; b < 16; b++)
|
||||
for (int i = 0; i < 8; i++)
|
||||
@ -1621,7 +1621,7 @@ private:
|
||||
void BRASL(u32 rt, s32 i16)
|
||||
{
|
||||
u32 target = branchTarget(0, i16);
|
||||
CPU.GPR[rt] = u128::from32r(CPU.PC + 4);
|
||||
CPU.GPR[rt] = v128::from32r(CPU.PC + 4);
|
||||
LOG5_OPCODE("branch (0x%x)", target);
|
||||
CPU.PC = target - 4;
|
||||
}
|
||||
@ -1650,7 +1650,7 @@ private:
|
||||
void BRSL(u32 rt, s32 i16)
|
||||
{
|
||||
u32 target = branchTarget(CPU.PC, i16);
|
||||
CPU.GPR[rt] = u128::from32r(CPU.PC + 4);
|
||||
CPU.GPR[rt] = v128::from32r(CPU.PC + 4);
|
||||
LOG5_OPCODE("branch (0x%x)", target);
|
||||
CPU.PC = target - 4;
|
||||
}
|
||||
@ -1873,8 +1873,8 @@ private:
|
||||
}
|
||||
void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc)
|
||||
{
|
||||
const u128 _a = CPU.GPR[ra];
|
||||
const u128 _b = CPU.GPR[rb];
|
||||
const v128 _a = CPU.GPR[ra];
|
||||
const v128 _b = CPU.GPR[rb];
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
u8 b = CPU.GPR[rc]._u8[i];
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
|
||||
std::array<SPURecEntry, 0x10000> entry = {};
|
||||
|
||||
std::vector<u128> imm_table;
|
||||
std::vector<v128> imm_table;
|
||||
|
||||
SPURecompilerCore(SPUThread& cpu);
|
||||
|
||||
@ -105,7 +105,7 @@ public:
|
||||
void XmmInvalidate(const s8 reg);
|
||||
void XmmFinalize(const XmmLink& var, s8 reg = -1);
|
||||
void XmmRelease();
|
||||
asmjit::X86Mem XmmConst(u128 data);
|
||||
asmjit::X86Mem XmmConst(v128 data);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -486,7 +486,7 @@ void SPURecompiler::XmmRelease()
|
||||
}
|
||||
}
|
||||
|
||||
X86Mem SPURecompiler::XmmConst(u128 data)
|
||||
X86Mem SPURecompiler::XmmConst(v128 data)
|
||||
{
|
||||
s32 shift = 0;
|
||||
|
||||
@ -494,12 +494,12 @@ X86Mem SPURecompiler::XmmConst(u128 data)
|
||||
{
|
||||
if (rec.imm_table[shift] == data)
|
||||
{
|
||||
return oword_ptr(*imm_var, shift * sizeof(u128));
|
||||
return oword_ptr(*imm_var, shift * sizeof(v128));
|
||||
}
|
||||
}
|
||||
|
||||
rec.imm_table.push_back(data);
|
||||
return oword_ptr(*imm_var, shift * sizeof(u128));
|
||||
return oword_ptr(*imm_var, shift * sizeof(v128));
|
||||
}
|
||||
|
||||
|
||||
@ -553,7 +553,7 @@ void SPURecompiler::RDCH(u32 rt, u32 ra)
|
||||
{
|
||||
c.mov(cpu_dword(PC), CPU.PC);
|
||||
WRAPPER_BEGIN(rt, ra, zz);
|
||||
CPU->GPR[rt] = u128::from32r(CPU->get_ch_value(ra));
|
||||
CPU->GPR[rt] = v128::from32r(CPU->get_ch_value(ra));
|
||||
WRAPPER_END(rt, ra, 0);
|
||||
// TODO
|
||||
}
|
||||
@ -562,7 +562,7 @@ void SPURecompiler::RCHCNT(u32 rt, u32 ra)
|
||||
{
|
||||
c.mov(cpu_dword(PC), CPU.PC);
|
||||
WRAPPER_BEGIN(rt, ra, zz);
|
||||
CPU->GPR[rt] = u128::from32r(CPU->get_ch_count(ra));
|
||||
CPU->GPR[rt] = v128::from32r(CPU->get_ch_count(ra));
|
||||
WRAPPER_END(rt, ra, 0);
|
||||
// TODO
|
||||
}
|
||||
@ -603,7 +603,7 @@ void SPURecompiler::BG(u32 rt, u32 ra, u32 rb)
|
||||
// compare if-greater-than
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000)));
|
||||
c.pxor(va.get(), vi.get());
|
||||
if (const XmmLink* vb = XmmRead(rb))
|
||||
{
|
||||
@ -614,7 +614,7 @@ void SPURecompiler::BG(u32 rt, u32 ra, u32 rb)
|
||||
c.pxor(vi.get(), cpu_xmm(GPR[rb]));
|
||||
}
|
||||
c.pcmpgtd(va.get(), vi.get());
|
||||
c.paddd(va.get(), XmmConst(u128::from32p(1)));
|
||||
c.paddd(va.get(), XmmConst(v128::from32p(1)));
|
||||
XmmFinalize(va, rt);
|
||||
XmmFinalize(vi);
|
||||
LOG_OPCODE();
|
||||
@ -650,7 +650,7 @@ void SPURecompiler::NOR(u32 rt, u32 ra, u32 rb)
|
||||
c.por(va.get(), cpu_xmm(GPR[rb]));
|
||||
}
|
||||
}
|
||||
c.pxor(va.get(), XmmConst(u128::from32p(0xffffffff)));
|
||||
c.pxor(va.get(), XmmConst(v128::from32p(0xffffffff)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -901,7 +901,7 @@ void SPURecompiler::CG(u32 rt, u32 ra, u32 rb)
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vb = XmmGet(rb);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000)));
|
||||
c.paddd(vb.get(), va.get());
|
||||
c.pxor(va.get(), vi.get());
|
||||
c.pxor(vb.get(), vi.get());
|
||||
@ -940,7 +940,7 @@ void SPURecompiler::NAND(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
c.pand(va.get(), cpu_xmm(GPR[rb]));
|
||||
}
|
||||
c.pxor(va.get(), XmmConst(u128::from32p(0xffffffff)));
|
||||
c.pxor(va.get(), XmmConst(v128::from32p(0xffffffff)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -1178,7 +1178,7 @@ void SPURecompiler::HBR(u32 p, u32 ro, u32 ra)
|
||||
void SPURecompiler::GB(u32 rt, u32 ra)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0))));
|
||||
c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0))));
|
||||
c.psllq(va.get(), 7);
|
||||
c.pmovmskb(*addr, va.get());
|
||||
c.pxor(va.get(), va.get());
|
||||
@ -1190,7 +1190,7 @@ void SPURecompiler::GB(u32 rt, u32 ra)
|
||||
void SPURecompiler::GBH(u32 rt, u32 ra)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0))));
|
||||
c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0))));
|
||||
c.psllq(va.get(), 7);
|
||||
c.pmovmskb(*addr, va.get());
|
||||
c.pxor(va.get(), va.get());
|
||||
@ -1254,7 +1254,7 @@ void SPURecompiler::FREST(u32 rt, u32 ra)
|
||||
void SPURecompiler::FRSQEST(u32 rt, u32 ra)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.andps(va.get(), XmmConst(u128::from32p(0x7fffffff))); // abs
|
||||
c.andps(va.get(), XmmConst(v128::from32p(0x7fffffff))); // abs
|
||||
c.rsqrtps(va.get(), va.get());
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
@ -1343,7 +1343,7 @@ void SPURecompiler::CBX(u32 rt, u32 ra, u32 rb)
|
||||
c.not_(*addr);
|
||||
c.and_(*addr, 0xf);
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
XmmFinalize(vr, rt);
|
||||
XmmInvalidate(rt);
|
||||
c.mov(byte_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x03);
|
||||
@ -1368,7 +1368,7 @@ void SPURecompiler::CHX(u32 rt, u32 ra, u32 rb)
|
||||
c.not_(*addr);
|
||||
c.and_(*addr, 0xe);
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
XmmFinalize(vr, rt);
|
||||
XmmInvalidate(rt);
|
||||
c.mov(word_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x0203);
|
||||
@ -1393,7 +1393,7 @@ void SPURecompiler::CWX(u32 rt, u32 ra, u32 rb)
|
||||
c.not_(*addr);
|
||||
c.and_(*addr, 0xc);
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
XmmFinalize(vr, rt);
|
||||
XmmInvalidate(rt);
|
||||
c.mov(dword_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x00010203);
|
||||
@ -1419,10 +1419,10 @@ void SPURecompiler::CDX(u32 rt, u32 ra, u32 rb)
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
Label p1(c), p2(c);
|
||||
c.jnz(p1);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.jmp(p2);
|
||||
c.bind(p1);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
|
||||
c.bind(p2);
|
||||
XmmFinalize(vr, rt);
|
||||
LOG_OPCODE();
|
||||
@ -1527,7 +1527,7 @@ void SPURecompiler::CBD(u32 rt, u32 ra, s32 i7)
|
||||
{
|
||||
// assuming that SP % 16 is always zero
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
value.u8r[i7 & 0xf] = 0x03;
|
||||
c.movdqa(vr.get(), XmmConst(value));
|
||||
XmmFinalize(vr, rt);
|
||||
@ -1539,7 +1539,7 @@ void SPURecompiler::CBD(u32 rt, u32 ra, s32 i7)
|
||||
c.not_(*addr);
|
||||
c.and_(*addr, 0xf);
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
XmmFinalize(vr, rt);
|
||||
XmmInvalidate(rt);
|
||||
c.mov(byte_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x03);
|
||||
@ -1553,7 +1553,7 @@ void SPURecompiler::CHD(u32 rt, u32 ra, s32 i7)
|
||||
{
|
||||
// assuming that SP % 16 is always zero
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
value.u16r[(i7 >> 1) & 0x7] = 0x0203;
|
||||
c.movdqa(vr.get(), XmmConst(value));
|
||||
XmmFinalize(vr, rt);
|
||||
@ -1565,7 +1565,7 @@ void SPURecompiler::CHD(u32 rt, u32 ra, s32 i7)
|
||||
c.not_(*addr);
|
||||
c.and_(*addr, 0xe);
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
XmmFinalize(vr, rt);
|
||||
XmmInvalidate(rt);
|
||||
c.mov(word_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x0203);
|
||||
@ -1579,7 +1579,7 @@ void SPURecompiler::CWD(u32 rt, u32 ra, s32 i7)
|
||||
{
|
||||
// assuming that SP % 16 is always zero
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
value.u32r[(i7 >> 2) & 0x3] = 0x00010203;
|
||||
c.movdqa(vr.get(), XmmConst(value));
|
||||
XmmFinalize(vr, rt);
|
||||
@ -1591,7 +1591,7 @@ void SPURecompiler::CWD(u32 rt, u32 ra, s32 i7)
|
||||
c.not_(*addr);
|
||||
c.and_(*addr, 0xc);
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))));
|
||||
XmmFinalize(vr, rt);
|
||||
XmmInvalidate(rt);
|
||||
c.mov(dword_ptr(*cpu_var, *addr, 0, cpu_offset(GPR[rt])), 0x00010203);
|
||||
@ -1605,7 +1605,7 @@ void SPURecompiler::CDD(u32 rt, u32 ra, s32 i7)
|
||||
{
|
||||
// assuming that SP % 16 is always zero
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
u128 value = u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
||||
value.u64r[(i7 >> 3) & 0x1] = 0x0001020304050607ull;
|
||||
c.movdqa(vr.get(), XmmConst(value));
|
||||
XmmFinalize(vr, rt);
|
||||
@ -1618,10 +1618,10 @@ void SPURecompiler::CDD(u32 rt, u32 ra, s32 i7)
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
Label p1(c), p2(c);
|
||||
c.jnz(p1);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x00010203, 0x04050607, 0x18191a1b, 0x1c1d1e1f))));
|
||||
c.jmp(p2);
|
||||
c.bind(p1);
|
||||
c.movdqa(vr.get(), XmmConst(u128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
|
||||
c.movdqa(vr.get(), XmmConst(v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x00010203, 0x04050607))));
|
||||
c.bind(p2);
|
||||
XmmFinalize(vr, rt);
|
||||
}
|
||||
@ -1746,7 +1746,7 @@ void SPURecompiler::CGTH(u32 rt, u32 ra, u32 rb)
|
||||
void SPURecompiler::EQV(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const XmmLink& vb = XmmGet(rb, rt);
|
||||
c.pxor(vb.get(), XmmConst(u128::from32p(0xffffffff)));
|
||||
c.pxor(vb.get(), XmmConst(v128::from32p(0xffffffff)));
|
||||
if (const XmmLink* va = XmmRead(ra))
|
||||
{
|
||||
c.pxor(vb.get(), va->read());
|
||||
@ -1779,11 +1779,11 @@ void SPURecompiler::SUMB(u32 rt, u32 ra, u32 rb)
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from8p(1)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from8p(1)));
|
||||
c.pmaddubsw(va.get(), vi.get());
|
||||
c.pmaddubsw(vb.get(), vi.get());
|
||||
c.phaddw(va.get(), vb.get());
|
||||
c.pshufb(va.get(), XmmConst(u128::fromV(_mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0))));
|
||||
c.pshufb(va.get(), XmmConst(v128::fromV(_mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0))));
|
||||
XmmFinalize(va, rt);
|
||||
XmmFinalize(vb);
|
||||
XmmFinalize(vi);
|
||||
@ -1842,12 +1842,12 @@ void SPURecompiler::CNTB(u32 rt, u32 ra)
|
||||
const XmmLink& v1 = XmmCopy(va);
|
||||
const XmmLink& vm = XmmAlloc();
|
||||
c.psrlq(v1.get(), 4);
|
||||
c.movdqa(vm.get(), XmmConst(u128::from8p(0xf)));
|
||||
c.movdqa(vm.get(), XmmConst(v128::from8p(0xf)));
|
||||
c.pand(va.get(), vm.get());
|
||||
c.pand(v1.get(), vm.get());
|
||||
c.movdqa(vm.get(), XmmConst(u128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
|
||||
c.movdqa(vm.get(), XmmConst(v128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
|
||||
c.pshufb(vm.get(), va.get());
|
||||
c.movdqa(va.get(), XmmConst(u128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
|
||||
c.movdqa(va.get(), XmmConst(v128::fromV(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0))));
|
||||
c.pshufb(va.get(), v1.get());
|
||||
c.paddb(va.get(), vm.get());
|
||||
XmmFinalize(va, rt);
|
||||
@ -1870,7 +1870,7 @@ void SPURecompiler::CLGT(u32 rt, u32 ra, u32 rb)
|
||||
// compare if-greater-than
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from32p(0x80000000)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from32p(0x80000000)));
|
||||
c.pxor(va.get(), vi.get());
|
||||
if (const XmmLink* vb = XmmRead(rb))
|
||||
{
|
||||
@ -1973,7 +1973,7 @@ void SPURecompiler::CLGTH(u32 rt, u32 ra, u32 rb)
|
||||
// compare if-greater-than
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from16p(0x8000)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from16p(0x8000)));
|
||||
c.pxor(va.get(), vi.get());
|
||||
if (const XmmLink* vb = XmmRead(rb))
|
||||
{
|
||||
@ -1992,7 +1992,7 @@ void SPURecompiler::CLGTH(u32 rt, u32 ra, u32 rb)
|
||||
void SPURecompiler::ORC(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const XmmLink& vb = XmmGet(rb, rt);
|
||||
c.pxor(vb.get(), XmmConst(u128::from32p(0xffffffff)));
|
||||
c.pxor(vb.get(), XmmConst(v128::from32p(0xffffffff)));
|
||||
if (const XmmLink* va = XmmRead(ra))
|
||||
{
|
||||
c.por(vb.get(), va->read());
|
||||
@ -2010,7 +2010,7 @@ void SPURecompiler::FCMGT(u32 rt, u32 ra, u32 rb)
|
||||
// reverted less-than
|
||||
const XmmLink& vb = XmmGet(rb, rt);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movaps(vi.get(), XmmConst(u128::from32p(0x7fffffff)));
|
||||
c.movaps(vi.get(), XmmConst(v128::from32p(0x7fffffff)));
|
||||
c.andps(vb.get(), vi.get()); // abs
|
||||
if (const XmmLink* va = XmmRead(ra))
|
||||
{
|
||||
@ -2081,7 +2081,7 @@ void SPURecompiler::CLGTB(u32 rt, u32 ra, u32 rb)
|
||||
// compare if-greater-than
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from8p(0x80)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from8p(0x80)));
|
||||
c.pxor(va.get(), vi.get());
|
||||
if (const XmmLink* vb = XmmRead(rb))
|
||||
{
|
||||
@ -2177,7 +2177,7 @@ void SPURecompiler::MPYHHU(u32 rt, u32 ra, u32 rb)
|
||||
const XmmLink& va2 = XmmCopy(va);
|
||||
c.pmulhuw(va.get(), vb.get());
|
||||
c.pmullw(va2.get(), vb.get());
|
||||
c.pand(va.get(), XmmConst(u128::from32p(0xffff0000)));
|
||||
c.pand(va.get(), XmmConst(v128::from32p(0xffff0000)));
|
||||
c.psrld(va2.get(), 16);
|
||||
c.por(va.get(), va2.get());
|
||||
XmmFinalize(va, rt);
|
||||
@ -2189,7 +2189,7 @@ void SPURecompiler::MPYHHU(u32 rt, u32 ra, u32 rb)
|
||||
void SPURecompiler::ADDX(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const XmmLink& vt = XmmGet(rt);
|
||||
c.pand(vt.get(), XmmConst(u128::from32p(1)));
|
||||
c.pand(vt.get(), XmmConst(v128::from32p(1)));
|
||||
c.paddd(vt.get(), cpu_xmm(GPR[ra]));
|
||||
c.paddd(vt.get(), cpu_xmm(GPR[rb]));
|
||||
XmmFinalize(vt, rt);
|
||||
@ -2200,7 +2200,7 @@ void SPURecompiler::SFX(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const XmmLink& vt = XmmGet(rt);
|
||||
const XmmLink& vb = XmmGet(rb, rt);
|
||||
c.pandn(vt.get(), XmmConst(u128::from32p(1)));
|
||||
c.pandn(vt.get(), XmmConst(v128::from32p(1)));
|
||||
c.psubd(vb.get(), cpu_xmm(GPR[ra]));
|
||||
c.psubd(vb.get(), vt.get());
|
||||
XmmFinalize(vb, rt);
|
||||
@ -2252,7 +2252,7 @@ void SPURecompiler::MPYHHAU(u32 rt, u32 ra, u32 rb)
|
||||
const XmmLink& va2 = XmmCopy(va);
|
||||
c.pmulhuw(va.get(), vb.get());
|
||||
c.pmullw(va2.get(), vb.get());
|
||||
c.pand(va.get(), XmmConst(u128::from32p(0xffff0000)));
|
||||
c.pand(va.get(), XmmConst(v128::from32p(0xffff0000)));
|
||||
c.psrld(va2.get(), 16);
|
||||
c.paddd(vt.get(), va.get());
|
||||
c.paddd(vt.get(), va2.get());
|
||||
@ -2327,7 +2327,7 @@ void SPURecompiler::MPY(u32 rt, u32 ra, u32 rb)
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from32p(0xffff)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from32p(0xffff)));
|
||||
c.pand(va.get(), vi.get());
|
||||
c.pand(vb.get(), vi.get());
|
||||
c.pmaddwd(va.get(), vb.get());
|
||||
@ -2392,7 +2392,7 @@ void SPURecompiler::FCMEQ(u32 rt, u32 ra, u32 rb)
|
||||
{
|
||||
const XmmLink& vb = XmmGet(rb, rt);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movaps(vi.get(), XmmConst(u128::from32p(0x7fffffff)));
|
||||
c.movaps(vi.get(), XmmConst(v128::from32p(0x7fffffff)));
|
||||
c.andps(vb.get(), vi.get()); // abs
|
||||
if (const XmmLink* va = XmmRead(ra))
|
||||
{
|
||||
@ -2421,7 +2421,7 @@ void SPURecompiler::MPYU(u32 rt, u32 ra, u32 rb)
|
||||
c.pmulhuw(va.get(), vb.get());
|
||||
c.pmullw(va2.get(), vb.get());
|
||||
c.pslld(va.get(), 16);
|
||||
c.pand(va2.get(), XmmConst(u128::from32p(0xffff)));
|
||||
c.pand(va2.get(), XmmConst(v128::from32p(0xffff)));
|
||||
c.por(va.get(), va2.get());
|
||||
XmmFinalize(va, rt);
|
||||
XmmFinalize(vb);
|
||||
@ -2468,10 +2468,10 @@ void SPURecompiler::CFLTS(u32 rt, u32 ra, s32 i8)
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
if (i8 != 173)
|
||||
{
|
||||
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
|
||||
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
|
||||
}
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movaps(vi.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31)))));
|
||||
c.movaps(vi.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31)))));
|
||||
c.cmpps(vi.get(), va.get(), 2);
|
||||
c.cvttps2dq(va.get(), va.get()); // convert to ints with truncation
|
||||
c.pxor(va.get(), vi.get()); // fix result saturation (0x80000000 -> 0x7fffffff)
|
||||
@ -2485,18 +2485,18 @@ void SPURecompiler::CFLTU(u32 rt, u32 ra, s32 i8)
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
if (i8 != 173)
|
||||
{
|
||||
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
|
||||
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>(173 - (i8 & 0xff))))))); // scale
|
||||
}
|
||||
c.maxps(va.get(), XmmConst({})); // saturate
|
||||
const XmmLink& vs = XmmCopy(va); // copy scaled value
|
||||
const XmmLink& vs2 = XmmCopy(va);
|
||||
const XmmLink& vs3 = XmmAlloc();
|
||||
c.movaps(vs3.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31)))));
|
||||
c.movaps(vs3.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31)))));
|
||||
c.subps(vs2.get(), vs3.get());
|
||||
c.cmpps(vs3.get(), vs.get(), 2);
|
||||
c.andps(vs2.get(), vs3.get());
|
||||
c.cvttps2dq(va.get(), va.get());
|
||||
c.cmpps(vs.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(32)))), 5);
|
||||
c.cmpps(vs.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(32)))), 5);
|
||||
c.cvttps2dq(vs2.get(), vs2.get());
|
||||
c.por(va.get(), vs.get());
|
||||
c.por(va.get(), vs2.get());
|
||||
@ -2513,7 +2513,7 @@ void SPURecompiler::CSFLT(u32 rt, u32 ra, s32 i8)
|
||||
c.cvtdq2ps(va.get(), va.get()); // convert to floats
|
||||
if (i8 != 155)
|
||||
{
|
||||
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
|
||||
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
|
||||
}
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
@ -2523,14 +2523,14 @@ void SPURecompiler::CUFLT(u32 rt, u32 ra, s32 i8)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& v1 = XmmCopy(va);
|
||||
c.pand(va.get(), XmmConst(u128::from32p(0x7fffffff)));
|
||||
c.pand(va.get(), XmmConst(v128::from32p(0x7fffffff)));
|
||||
c.cvtdq2ps(va.get(), va.get()); // convert to floats
|
||||
c.psrad(v1.get(), 31); // generate mask from sign bit
|
||||
c.andps(v1.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(31))))); // generate correction component
|
||||
c.andps(v1.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(31))))); // generate correction component
|
||||
c.addps(va.get(), v1.get()); // add correction component
|
||||
if (i8 != 155)
|
||||
{
|
||||
c.mulps(va.get(), XmmConst(u128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
|
||||
c.mulps(va.get(), XmmConst(v128::fromF(_mm_set1_ps(exp2f(static_cast<float>((i8 & 0xff) - 155)))))); // scale
|
||||
}
|
||||
XmmFinalize(va, rt);
|
||||
XmmFinalize(v1);
|
||||
@ -2746,7 +2746,7 @@ void SPURecompiler::IL(u32 rt, s32 i16)
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movdqa(vr.get(), XmmConst(u128::from32p(i16)));
|
||||
c.movdqa(vr.get(), XmmConst(v128::from32p(i16)));
|
||||
}
|
||||
XmmFinalize(vr, rt);
|
||||
LOG_OPCODE();
|
||||
@ -2755,7 +2755,7 @@ void SPURecompiler::IL(u32 rt, s32 i16)
|
||||
void SPURecompiler::ILHU(u32 rt, s32 i16)
|
||||
{
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::from32p(i16 << 16)));
|
||||
c.movdqa(vr.get(), XmmConst(v128::from32p(i16 << 16)));
|
||||
XmmFinalize(vr, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2763,7 +2763,7 @@ void SPURecompiler::ILHU(u32 rt, s32 i16)
|
||||
void SPURecompiler::ILH(u32 rt, s32 i16)
|
||||
{
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::from32p(i16)));
|
||||
c.movdqa(vr.get(), XmmConst(v128::from32p(i16)));
|
||||
XmmFinalize(vr, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2771,7 +2771,7 @@ void SPURecompiler::ILH(u32 rt, s32 i16)
|
||||
void SPURecompiler::IOHL(u32 rt, s32 i16)
|
||||
{
|
||||
const XmmLink& vt = XmmGet(rt, rt);
|
||||
c.por(vt.get(), XmmConst(u128::from32p(i16 & 0xffff)));
|
||||
c.por(vt.get(), XmmConst(v128::from32p(i16 & 0xffff)));
|
||||
XmmFinalize(vt, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2798,7 +2798,7 @@ void SPURecompiler::ORI(u32 rt, u32 ra, s32 i10)
|
||||
else
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.por(va.get(), XmmConst(u128::from32p(i10)));
|
||||
c.por(va.get(), XmmConst(v128::from32p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
}
|
||||
LOG_OPCODE();
|
||||
@ -2807,7 +2807,7 @@ void SPURecompiler::ORI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::ORHI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.por(va.get(), XmmConst(u128::from16p(i10)));
|
||||
c.por(va.get(), XmmConst(v128::from16p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2815,7 +2815,7 @@ void SPURecompiler::ORHI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::ORBI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.por(va.get(), XmmConst(u128::from8p(i10)));
|
||||
c.por(va.get(), XmmConst(v128::from8p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2841,7 +2841,7 @@ void SPURecompiler::SFI(u32 rt, u32 ra, s32 i10)
|
||||
else
|
||||
{
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::from32p(i10)));
|
||||
c.movdqa(vr.get(), XmmConst(v128::from32p(i10)));
|
||||
c.psubd(vr.get(), cpu_xmm(GPR[ra]));
|
||||
XmmFinalize(vr, rt);
|
||||
}
|
||||
@ -2869,7 +2869,7 @@ void SPURecompiler::SFHI(u32 rt, u32 ra, s32 i10)
|
||||
else
|
||||
{
|
||||
const XmmLink& vr = XmmAlloc(rt);
|
||||
c.movdqa(vr.get(), XmmConst(u128::from16p(i10)));
|
||||
c.movdqa(vr.get(), XmmConst(v128::from16p(i10)));
|
||||
c.psubw(vr.get(), cpu_xmm(GPR[ra]));
|
||||
XmmFinalize(vr, rt);
|
||||
}
|
||||
@ -2879,7 +2879,7 @@ void SPURecompiler::SFHI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::ANDI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.pand(va.get(), XmmConst(u128::from32p(i10)));
|
||||
c.pand(va.get(), XmmConst(v128::from32p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2887,7 +2887,7 @@ void SPURecompiler::ANDI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::ANDHI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.pand(va.get(), XmmConst(u128::from16p(i10)));
|
||||
c.pand(va.get(), XmmConst(v128::from16p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2895,7 +2895,7 @@ void SPURecompiler::ANDHI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::ANDBI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.pand(va.get(), XmmConst(u128::from8p(i10)));
|
||||
c.pand(va.get(), XmmConst(v128::from8p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2904,7 +2904,7 @@ void SPURecompiler::AI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
// add
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.paddd(va.get(), XmmConst(u128::from32p(i10)));
|
||||
c.paddd(va.get(), XmmConst(v128::from32p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2913,7 +2913,7 @@ void SPURecompiler::AHI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
// add
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.paddw(va.get(), XmmConst(u128::from16p(i10)));
|
||||
c.paddw(va.get(), XmmConst(v128::from16p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2965,7 +2965,7 @@ void SPURecompiler::LQD(u32 rt, s32 i10, u32 ra) // i10 is shifted left by 4 whi
|
||||
void SPURecompiler::XORI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pxor(va.get(), XmmConst(u128::from32p(i10)));
|
||||
c.pxor(va.get(), XmmConst(v128::from32p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2973,7 +2973,7 @@ void SPURecompiler::XORI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::XORHI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pxor(va.get(), XmmConst(u128::from16p(i10)));
|
||||
c.pxor(va.get(), XmmConst(v128::from16p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2981,7 +2981,7 @@ void SPURecompiler::XORHI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::XORBI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pxor(va.get(), XmmConst(u128::from8p(i10)));
|
||||
c.pxor(va.get(), XmmConst(v128::from8p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2989,7 +2989,7 @@ void SPURecompiler::XORBI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CGTI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pcmpgtd(va.get(), XmmConst(u128::from32p(i10)));
|
||||
c.pcmpgtd(va.get(), XmmConst(v128::from32p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -2997,7 +2997,7 @@ void SPURecompiler::CGTI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CGTHI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pcmpgtw(va.get(), XmmConst(u128::from16p(i10)));
|
||||
c.pcmpgtw(va.get(), XmmConst(v128::from16p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3005,7 +3005,7 @@ void SPURecompiler::CGTHI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CGTBI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pcmpgtb(va.get(), XmmConst(u128::from8p(i10)));
|
||||
c.pcmpgtb(va.get(), XmmConst(v128::from8p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3025,8 +3025,8 @@ void SPURecompiler::HGTI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CLGTI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pxor(va.get(), XmmConst(u128::from32p(0x80000000)));
|
||||
c.pcmpgtd(va.get(), XmmConst(u128::from32p((u32)i10 - 0x80000000)));
|
||||
c.pxor(va.get(), XmmConst(v128::from32p(0x80000000)));
|
||||
c.pcmpgtd(va.get(), XmmConst(v128::from32p((u32)i10 - 0x80000000)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3034,8 +3034,8 @@ void SPURecompiler::CLGTI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CLGTHI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pxor(va.get(), XmmConst(u128::from16p(0x8000)));
|
||||
c.pcmpgtw(va.get(), XmmConst(u128::from16p((u16)i10 - 0x8000)));
|
||||
c.pxor(va.get(), XmmConst(v128::from16p(0x8000)));
|
||||
c.pcmpgtw(va.get(), XmmConst(v128::from16p((u16)i10 - 0x8000)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3043,8 +3043,8 @@ void SPURecompiler::CLGTHI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CLGTBI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.psubb(va.get(), XmmConst(u128::from8p(0x80)));
|
||||
c.pcmpgtb(va.get(), XmmConst(u128::from8p((s8)i10 - 0x80)));
|
||||
c.psubb(va.get(), XmmConst(v128::from8p(0x80)));
|
||||
c.pcmpgtb(va.get(), XmmConst(v128::from8p((s8)i10 - 0x80)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3064,7 +3064,7 @@ void SPURecompiler::HLGTI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::MPYI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
c.pmaddwd(va.get(), XmmConst(u128::from32p(i10 & 0xffff)));
|
||||
c.pmaddwd(va.get(), XmmConst(v128::from32p(i10 & 0xffff)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3074,7 +3074,7 @@ void SPURecompiler::MPYUI(u32 rt, u32 ra, s32 i10)
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
const XmmLink& va2 = XmmCopy(va);
|
||||
c.movdqa(vi.get(), XmmConst(u128::from32p(i10 & 0xffff)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from32p(i10 & 0xffff)));
|
||||
c.pmulhuw(va.get(), vi.get());
|
||||
c.pmullw(va2.get(), vi.get());
|
||||
c.pslld(va.get(), 16);
|
||||
@ -3088,7 +3088,7 @@ void SPURecompiler::MPYUI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CEQI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pcmpeqd(va.get(), XmmConst(u128::from32p(i10)));
|
||||
c.pcmpeqd(va.get(), XmmConst(v128::from32p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3096,7 +3096,7 @@ void SPURecompiler::CEQI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CEQHI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pcmpeqw(va.get(), XmmConst(u128::from16p(i10)));
|
||||
c.pcmpeqw(va.get(), XmmConst(v128::from16p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3104,7 +3104,7 @@ void SPURecompiler::CEQHI(u32 rt, u32 ra, s32 i10)
|
||||
void SPURecompiler::CEQBI(u32 rt, u32 ra, s32 i10)
|
||||
{
|
||||
const XmmLink& va = XmmGet(ra);
|
||||
c.pcmpeqb(va.get(), XmmConst(u128::from8p(i10)));
|
||||
c.pcmpeqb(va.get(), XmmConst(v128::from8p(i10)));
|
||||
XmmFinalize(va, rt);
|
||||
LOG_OPCODE();
|
||||
}
|
||||
@ -3141,7 +3141,7 @@ void SPURecompiler::ILA(u32 rt, u32 i18)
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movdqa(vr.get(), XmmConst(u128::from32p(i18 & 0x3ffff)));
|
||||
c.movdqa(vr.get(), XmmConst(v128::from32p(i18 & 0x3ffff)));
|
||||
}
|
||||
XmmFinalize(vr, rt);
|
||||
LOG_OPCODE();
|
||||
@ -3168,11 +3168,11 @@ void SPURecompiler::SHUFB(u32 rt, u32 ra, u32 rb, u32 rc)
|
||||
const XmmLink& v4 = XmmAlloc();
|
||||
const XmmLink& vFF = XmmAlloc(rt);
|
||||
// generate specific values:
|
||||
c.movdqa(v1.get(), XmmConst(u128::from8p(0xe0))); // v1 = 11100000
|
||||
c.movdqa(v3.get(), XmmConst(u128::from8p(0x80))); // v3 = 10000000
|
||||
c.movdqa(v1.get(), XmmConst(v128::from8p(0xe0))); // v1 = 11100000
|
||||
c.movdqa(v3.get(), XmmConst(v128::from8p(0x80))); // v3 = 10000000
|
||||
c.pand(v2.get(), v1.get()); // filter mask v2 = mask & 11100000
|
||||
c.movdqa(vFF.get(), v2.get()); // and copy vFF = mask & 11100000
|
||||
c.movdqa(v4.get(), XmmConst(u128::from8p(0xc0))); // v4 = 11000000
|
||||
c.movdqa(v4.get(), XmmConst(v128::from8p(0xc0))); // v4 = 11000000
|
||||
c.pcmpeqb(vFF.get(), v4.get()); // gen 0xff vFF = (mask & 11100000 == 11000000) ? 0xff : 0
|
||||
c.movdqa(v4.get(), v2.get()); // copy again v4 = mask & 11100000
|
||||
c.pand(v4.get(), v3.get()); // filter mask v4 = mask & 10000000
|
||||
@ -3182,13 +3182,13 @@ void SPURecompiler::SHUFB(u32 rt, u32 ra, u32 rb, u32 rc)
|
||||
c.por(vFF.get(), v2.get()); // merge 0xff, 0x80 vFF = (mask & 11100000 == 11000000) ? 0xff : (mask & 11100000 == 11100000) ? 0x80 : 0
|
||||
c.pandn(v1.get(), v0.get()); // filter mask v1 = mask & 00011111
|
||||
// select bytes from [rb]:
|
||||
c.movdqa(v2.get(), XmmConst(u128::from8p(0x0f))); // v2 = 00001111
|
||||
c.pxor(v1.get(), XmmConst(u128::from8p(0x10))); // v1 = (mask & 00011111) ^ 00010000
|
||||
c.movdqa(v2.get(), XmmConst(v128::from8p(0x0f))); // v2 = 00001111
|
||||
c.pxor(v1.get(), XmmConst(v128::from8p(0x10))); // v1 = (mask & 00011111) ^ 00010000
|
||||
c.psubb(v2.get(), v1.get()); // v2 = 00001111 - ((mask & 00011111) ^ 00010000)
|
||||
c.movdqa(v1.get(), cpu_xmm(GPR[rb])); // v1 = rb
|
||||
c.pshufb(v1.get(), v2.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000))
|
||||
// select bytes from [ra]:
|
||||
c.pxor(v2.get(), XmmConst(u128::from8p(0xf0))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000
|
||||
c.pxor(v2.get(), XmmConst(v128::from8p(0xf0))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000
|
||||
c.movdqa(v3.get(), cpu_xmm(GPR[ra])); // v3 = ra
|
||||
c.pshufb(v3.get(), v2.get()); // v3 = select(ra, (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000)
|
||||
c.por(v1.get(), v3.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000)) | (v3)
|
||||
@ -3208,7 +3208,7 @@ void SPURecompiler::MPYA(u32 rt, u32 ra, u32 rb, u32 rc)
|
||||
const XmmLink& va = XmmGet(ra, rt);
|
||||
const XmmLink& vb = XmmGet(rb);
|
||||
const XmmLink& vi = XmmAlloc();
|
||||
c.movdqa(vi.get(), XmmConst(u128::from32p(0xffff)));
|
||||
c.movdqa(vi.get(), XmmConst(v128::from32p(0xffff)));
|
||||
c.pand(va.get(), vi.get());
|
||||
c.pand(vb.get(), vi.get());
|
||||
c.pmaddwd(va.get(), vb.get());
|
||||
|
@ -360,13 +360,13 @@ struct spu_int_ctrl_t
|
||||
|
||||
struct g_spu_imm_table_t
|
||||
{
|
||||
u128 fsmb[65536]; // table for FSMB, FSMBI instructions
|
||||
u128 fsmh[256]; // table for FSMH instruction
|
||||
u128 fsm[16]; // table for FSM instruction
|
||||
v128 fsmb[65536]; // table for FSMB, FSMBI instructions
|
||||
v128 fsmh[256]; // table for FSMH instruction
|
||||
v128 fsm[16]; // table for FSM instruction
|
||||
|
||||
u128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions
|
||||
u128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions
|
||||
u128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions
|
||||
v128 sldq_pshufb[32]; // table for SHLQBYBI, SHLQBY, SHLQBYI instructions
|
||||
v128 srdq_pshufb[32]; // table for ROTQMBYBI, ROTQMBY, ROTQMBYI instructions
|
||||
v128 rldq_pshufb[16]; // table for ROTQBYBI, ROTQBY, ROTQBYI instructions
|
||||
|
||||
class scale_table_t
|
||||
{
|
||||
@ -475,7 +475,7 @@ public:
|
||||
memset(this, 0, sizeof(*this));
|
||||
}
|
||||
//slice -> 0 - 1 (double-precision slice index)
|
||||
//NOTE: slices follow u128 indexing, i.e. slice 0 is RIGHT end of register!
|
||||
//NOTE: slices follow v128 indexing, i.e. slice 0 is RIGHT end of register!
|
||||
//roundTo -> FPSCR_RN_*
|
||||
void setSliceRounding(u8 slice, u8 roundTo)
|
||||
{
|
||||
@ -523,7 +523,7 @@ public:
|
||||
}
|
||||
|
||||
// Write the FPSCR
|
||||
void Write(const u128 & r)
|
||||
void Write(const v128 & r)
|
||||
{
|
||||
_u32[3] = r._u32[3] & 0x00000F07;
|
||||
_u32[2] = r._u32[2] & 0x00003F07;
|
||||
@ -532,7 +532,7 @@ public:
|
||||
}
|
||||
|
||||
// Read the FPSCR
|
||||
void Read(u128 & r)
|
||||
void Read(v128 & r)
|
||||
{
|
||||
r._u32[3] = _u32[3];
|
||||
r._u32[2] = _u32[2];
|
||||
@ -544,7 +544,7 @@ public:
|
||||
class SPUThread : public CPUThread
|
||||
{
|
||||
public:
|
||||
u128 GPR[128]; // General-Purpose Registers
|
||||
v128 GPR[128]; // General-Purpose Registers
|
||||
SPU_FPSCR FPSCR;
|
||||
|
||||
std::unordered_map<u32, std::function<bool(SPUThread& SPU)>> m_addr_to_hle_function_map;
|
||||
@ -643,18 +643,18 @@ public:
|
||||
u16 read16(u32 lsa) const { return vm::ps3::read16(lsa + offset); }
|
||||
u32 read32(u32 lsa) const { return vm::ps3::read32(lsa + offset); }
|
||||
u64 read64(u32 lsa) const { return vm::ps3::read64(lsa + offset); }
|
||||
u128 read128(u32 lsa) const { return vm::ps3::read128(lsa + offset); }
|
||||
v128 read128(u32 lsa) const { return vm::ps3::read128(lsa + offset); }
|
||||
|
||||
void write8(u32 lsa, u8 data) const { vm::write8(lsa + offset, data); }
|
||||
void write16(u32 lsa, u16 data) const { vm::ps3::write16(lsa + offset, data); }
|
||||
void write32(u32 lsa, u32 data) const { vm::ps3::write32(lsa + offset, data); }
|
||||
void write64(u32 lsa, u64 data) const { vm::ps3::write64(lsa + offset, data); }
|
||||
void write128(u32 lsa, u128 data) const { vm::ps3::write128(lsa + offset, data); }
|
||||
void write128(u32 lsa, v128 data) const { vm::ps3::write128(lsa + offset, data); }
|
||||
|
||||
void write16(u32 lsa, be_t<u16> data) const { vm::ps3::write16(lsa + offset, data); }
|
||||
void write32(u32 lsa, be_t<u32> data) const { vm::ps3::write32(lsa + offset, data); }
|
||||
void write64(u32 lsa, be_t<u64> data) const { vm::ps3::write64(lsa + offset, data); }
|
||||
void write128(u32 lsa, be_t<u128> data) const { vm::ps3::write128(lsa + offset, data); }
|
||||
void write128(u32 lsa, be_t<v128> data) const { vm::ps3::write128(lsa + offset, data); }
|
||||
|
||||
void RegisterHleFunction(u32 addr, std::function<bool(SPUThread & SPU)> function)
|
||||
{
|
||||
|
@ -27,7 +27,7 @@ template<typename T> struct _to_atomic_subtype<T, 8>
|
||||
|
||||
template<typename T> struct _to_atomic_subtype<T, 16>
|
||||
{
|
||||
using type = u128;
|
||||
using type = v128;
|
||||
};
|
||||
|
||||
template<typename T> using atomic_subtype_t = typename _to_atomic_subtype<T>::type;
|
||||
@ -127,7 +127,7 @@ private:
|
||||
data = value;
|
||||
}
|
||||
|
||||
force_inline static void write_relaxed(volatile u128& data, const u128& value)
|
||||
force_inline static void write_relaxed(volatile v128& data, const v128& value)
|
||||
{
|
||||
sync_lock_test_and_set(&data, value);
|
||||
}
|
||||
@ -137,9 +137,9 @@ private:
|
||||
return data;
|
||||
}
|
||||
|
||||
force_inline static u128 read_relaxed(const volatile u128& value)
|
||||
force_inline static v128 read_relaxed(const volatile v128& value)
|
||||
{
|
||||
return sync_val_compare_and_swap(const_cast<volatile u128*>(&value), {}, {});
|
||||
return sync_val_compare_and_swap(const_cast<volatile v128*>(&value), {}, {});
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -339,14 +339,14 @@ namespace vm
|
||||
get_ref<be_t<u64>>(addr) = value;
|
||||
}
|
||||
|
||||
inline const be_t<u128>& read128(u32 addr)
|
||||
inline const be_t<v128>& read128(u32 addr)
|
||||
{
|
||||
return get_ref<const be_t<u128>>(addr);
|
||||
return get_ref<const be_t<v128>>(addr);
|
||||
}
|
||||
|
||||
inline void write128(u32 addr, be_t<u128> value)
|
||||
inline void write128(u32 addr, be_t<v128> value)
|
||||
{
|
||||
get_ref<be_t<u128>>(addr) = value;
|
||||
get_ref<be_t<v128>>(addr) = value;
|
||||
}
|
||||
}
|
||||
|
||||
@ -384,14 +384,14 @@ namespace vm
|
||||
get_ref<le_t<u64>>(addr) = value;
|
||||
}
|
||||
|
||||
inline const le_t<u128>& read128(u32 addr)
|
||||
inline const le_t<v128>& read128(u32 addr)
|
||||
{
|
||||
return get_ref<const le_t<u128>>(addr);
|
||||
return get_ref<const le_t<v128>>(addr);
|
||||
}
|
||||
|
||||
inline void write128(u32 addr, le_t<u128> value)
|
||||
inline void write128(u32 addr, le_t<v128> value)
|
||||
{
|
||||
get_ref<le_t<u128>>(addr) = value;
|
||||
get_ref<le_t<v128>>(addr) = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ namespace cb_detail
|
||||
template<typename T, int g_count, int f_count, int v_count>
|
||||
struct _func_arg<T, ARG_VECTOR, g_count, f_count, v_count>
|
||||
{
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid callback argument type for ARG_VECTOR");
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid callback argument type for ARG_VECTOR");
|
||||
|
||||
force_inline static void set_value(PPUThread& CPU, const T& arg)
|
||||
{
|
||||
@ -91,7 +91,7 @@ namespace cb_detail
|
||||
force_inline static bool _bind_func_args(PPUThread& CPU, T1 arg1, T... args)
|
||||
{
|
||||
const bool is_float = std::is_floating_point<T1>::value;
|
||||
const bool is_vector = std::is_same<std::remove_cv_t<T1>, u128>::value;
|
||||
const bool is_vector = std::is_same<std::remove_cv_t<T1>, v128>::value;
|
||||
const bool is_context = std::is_same<T1, PPUThread&>::value;
|
||||
const bool is_general = !is_float && !is_vector && !is_context;
|
||||
|
||||
@ -138,7 +138,7 @@ namespace cb_detail
|
||||
template<typename T>
|
||||
struct _func_res<T, ARG_VECTOR>
|
||||
{
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid callback result type for ARG_VECTOR");
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid callback result type for ARG_VECTOR");
|
||||
|
||||
force_inline static T get_value(const PPUThread& CPU)
|
||||
{
|
||||
@ -156,7 +156,7 @@ namespace cb_detail
|
||||
static_assert(!std::is_pointer<RT>::value, "Invalid callback result type (pointer)");
|
||||
static_assert(!std::is_reference<RT>::value, "Invalid callback result type (reference)");
|
||||
const bool is_float = std::is_floating_point<RT>::value;
|
||||
const bool is_vector = std::is_same<std::remove_cv_t<RT>, u128>::value;
|
||||
const bool is_vector = std::is_same<std::remove_cv_t<RT>, v128>::value;
|
||||
const _func_arg_type t = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL);
|
||||
|
||||
return _func_res<RT, t>::get_value(CPU);
|
||||
|
@ -3536,7 +3536,7 @@ s32 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
|
||||
alloc_ls_blocks = size > 0x3D400 ? 0x7A : ((size - 0x400) >> 11);
|
||||
if (ls_pattern)
|
||||
{
|
||||
u128 ls_pattern_128 = u128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]);
|
||||
v128 ls_pattern_128 = v128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]);
|
||||
u32 ls_blocks = 0;
|
||||
for (auto i = 0; i < 128; i++)
|
||||
{
|
||||
@ -3551,8 +3551,8 @@ s32 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm:
|
||||
return CELL_SPURS_TASK_ERROR_INVAL;
|
||||
}
|
||||
|
||||
u128 _0 = u128::from32(0);
|
||||
if ((ls_pattern_128 & u128::from32r(0xFC000000)) != _0)
|
||||
v128 _0 = v128::from32(0);
|
||||
if ((ls_pattern_128 & v128::from32r(0xFC000000)) != _0)
|
||||
{
|
||||
// Prevent save/restore to SPURS management area
|
||||
return CELL_SPURS_TASK_ERROR_INVAL;
|
||||
@ -3666,7 +3666,7 @@ s32 _cellSpursSendSignal(PPUThread& ppu, vm::ptr<CellSpursTaskset> taskset, u32
|
||||
return CELL_SPURS_TASK_ERROR_INVAL;
|
||||
}
|
||||
|
||||
be_t<u128> _0(u128::from32(0));
|
||||
be_t<v128> _0(v128::from32(0));
|
||||
bool disabled = taskset->enabled.value()._bit[taskId];
|
||||
auto invalid = (taskset->ready & taskset->pending_ready) != _0 || (taskset->running & taskset->waiting) != _0 || disabled ||
|
||||
((taskset->running | taskset->ready | taskset->pending_ready | taskset->waiting | taskset->signalled) & ~taskset->enabled) != _0;
|
||||
@ -3676,7 +3676,7 @@ s32 _cellSpursSendSignal(PPUThread& ppu, vm::ptr<CellSpursTaskset> taskset, u32
|
||||
return CELL_SPURS_TASK_ERROR_SRCH;
|
||||
}
|
||||
|
||||
auto shouldSignal = ((taskset->waiting & ~taskset->signalled) & be_t<u128>(u128::fromBit(taskId))) != _0 ? true : false;
|
||||
auto shouldSignal = ((taskset->waiting & ~taskset->signalled) & be_t<v128>(v128::fromBit(taskId))) != _0 ? true : false;
|
||||
auto signalled = taskset->signalled.value();
|
||||
signalled._bit[taskId] = true;
|
||||
taskset->signalled = signalled;
|
||||
|
@ -746,12 +746,12 @@ struct set_alignment(128) CellSpursTaskset
|
||||
|
||||
CHECK_SIZE(TaskInfo, 48);
|
||||
|
||||
be_t<u128> running; // 0x00
|
||||
be_t<u128> ready; // 0x10
|
||||
be_t<u128> pending_ready; // 0x20
|
||||
be_t<u128> enabled; // 0x30
|
||||
be_t<u128> signalled; // 0x40
|
||||
be_t<u128> waiting; // 0x50
|
||||
be_t<v128> running; // 0x00
|
||||
be_t<v128> ready; // 0x10
|
||||
be_t<v128> pending_ready; // 0x20
|
||||
be_t<v128> enabled; // 0x30
|
||||
be_t<v128> signalled; // 0x40
|
||||
be_t<v128> waiting; // 0x50
|
||||
vm::bptr<CellSpurs, u64> spurs; // 0x60
|
||||
be_t<u64> args; // 0x68
|
||||
u8 enable_clear_ls; // 0x70
|
||||
@ -806,7 +806,7 @@ struct set_alignment(128) CellSpursTaskset2
|
||||
u32 event_flag_id1; // 0x1898
|
||||
u32 event_flag_id2; // 0x189C
|
||||
u8 unk3[0x1980 - 0x18A0]; // 0x18A0
|
||||
be_t<u128> task_exit_code[128]; // 0x1980
|
||||
be_t<v128> task_exit_code[128]; // 0x1980
|
||||
u8 unk4[0x2900 - 0x2180]; // 0x2180
|
||||
};
|
||||
|
||||
@ -894,10 +894,10 @@ struct SpursTasksetContext
|
||||
u8 x27D8[0x2840 - 0x27D8]; // 0x27D8
|
||||
u8 moduleId[16]; // 0x2840
|
||||
u8 stackArea[0x2C80 - 0x2850]; // 0x2850
|
||||
be_t<u128> savedContextLr; // 0x2C80
|
||||
be_t<u128> savedContextSp; // 0x2C90
|
||||
be_t<u128> savedContextR80ToR127[48]; // 0x2CA0
|
||||
be_t<u128> savedContextFpscr; // 0x2FA0
|
||||
be_t<v128> savedContextLr; // 0x2C80
|
||||
be_t<v128> savedContextSp; // 0x2C90
|
||||
be_t<v128> savedContextR80ToR127[48]; // 0x2CA0
|
||||
be_t<v128> savedContextFpscr; // 0x2FA0
|
||||
be_t<u32> savedWriteTagGroupQueryMask; // 0x2FB0
|
||||
be_t<u32> savedSpuWriteEventMask; // 0x2FB4
|
||||
be_t<u32> tasksetMgmtAddr; // 0x2FB8
|
||||
|
@ -1162,7 +1162,7 @@ void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
|
||||
auto taskset = vm::get_ptr<CellSpursTaskset>(spu.offset + 0x2700);
|
||||
|
||||
spu.GPR[2].clear();
|
||||
spu.GPR[3] = u128::from64r(taskArgs._u64[0], taskArgs._u64[1]);
|
||||
spu.GPR[3] = v128::from64r(taskArgs._u64[0], taskArgs._u64[1]);
|
||||
spu.GPR[4]._u64[1] = taskset->args;
|
||||
spu.GPR[4]._u64[0] = taskset->spurs.addr();
|
||||
for (auto i = 5; i < 128; i++) {
|
||||
@ -1183,7 +1183,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
|
||||
auto taskset = ctxt->taskset.priv_ptr();
|
||||
|
||||
// Verify taskset state is valid
|
||||
be_t<u128> _0(u128::from32(0));
|
||||
be_t<v128> _0(v128::from32(0));
|
||||
if ((taskset->waiting & taskset->running) != _0 || (taskset->ready & taskset->pending_ready) != _0 ||
|
||||
((taskset->running | taskset->ready | taskset->pending_ready | taskset->signalled | taskset->waiting) & ~taskset->enabled) != _0) {
|
||||
assert(!"Invalid taskset state");
|
||||
@ -1199,13 +1199,13 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
|
||||
}
|
||||
}
|
||||
|
||||
u128 readyButNotRunning;
|
||||
v128 readyButNotRunning;
|
||||
u8 selectedTaskId;
|
||||
u128 running = taskset->running.value();
|
||||
u128 waiting = taskset->waiting.value();
|
||||
u128 enabled = taskset->enabled.value();
|
||||
u128 signalled = (taskset->signalled & (taskset->ready | taskset->pending_ready));
|
||||
u128 ready = (taskset->signalled | taskset->ready | taskset->pending_ready);
|
||||
v128 running = taskset->running.value();
|
||||
v128 waiting = taskset->waiting.value();
|
||||
v128 enabled = taskset->enabled.value();
|
||||
v128 signalled = (taskset->signalled & (taskset->ready | taskset->pending_ready));
|
||||
v128 ready = (taskset->signalled | taskset->ready | taskset->pending_ready);
|
||||
|
||||
switch (request) {
|
||||
case SPURS_TASKSET_REQUEST_POLL_SIGNAL:
|
||||
@ -1235,7 +1235,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
|
||||
case SPURS_TASKSET_REQUEST_POLL:
|
||||
readyButNotRunning = ready & ~running;
|
||||
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
|
||||
readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->wkl_flag_wait_task));
|
||||
readyButNotRunning = readyButNotRunning & ~(v128::fromBit(taskset->wkl_flag_wait_task));
|
||||
}
|
||||
|
||||
rc = readyButNotRunning != _0 ? 1 : 0;
|
||||
@ -1260,7 +1260,7 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 *
|
||||
case SPURS_TASKSET_REQUEST_SELECT_TASK:
|
||||
readyButNotRunning = ready & ~running;
|
||||
if (taskset->wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
|
||||
readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->wkl_flag_wait_task));
|
||||
readyButNotRunning = readyButNotRunning & ~(v128::fromBit(taskset->wkl_flag_wait_task));
|
||||
}
|
||||
|
||||
// Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness.
|
||||
@ -1402,7 +1402,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
|
||||
|
||||
u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
|
||||
u32 lsBlocks = 0;
|
||||
u128 ls_pattern = u128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
|
||||
v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
|
||||
for (auto i = 0; i < 128; i++) {
|
||||
if (ls_pattern._bit[i]) {
|
||||
lsBlocks++;
|
||||
@ -1421,7 +1421,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) {
|
||||
}
|
||||
|
||||
// Get the processor context
|
||||
u128 r;
|
||||
v128 r;
|
||||
spu.FPSCR.Read(r);
|
||||
ctxt->savedContextFpscr = r;
|
||||
ctxt->savedSpuWriteEventMask = spu.get_ch_value(SPU_RdEventMask);
|
||||
@ -1486,7 +1486,7 @@ void spursTasksetDispatch(SPUThread & spu) {
|
||||
|
||||
//spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
|
||||
|
||||
ctxt->savedContextLr = u128::from32r(entryPoint);
|
||||
ctxt->savedContextLr = v128::from32r(entryPoint);
|
||||
ctxt->guidAddr = lowestLoadAddr;
|
||||
ctxt->tasksetMgmtAddr = 0x2700;
|
||||
ctxt->x2FC0 = 0;
|
||||
@ -1516,8 +1516,8 @@ void spursTasksetDispatch(SPUThread & spu) {
|
||||
}
|
||||
|
||||
// If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well
|
||||
u128 ls_pattern = u128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
|
||||
if (ls_pattern != u128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) {
|
||||
v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
|
||||
if (ls_pattern != v128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) {
|
||||
// Load the ELF
|
||||
u32 entryPoint;
|
||||
if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf.addr(), true) != CELL_OK) {
|
||||
|
@ -53,7 +53,7 @@ namespace ppu_func_detail
|
||||
template<typename T, u32 g_count, u32 f_count, u32 v_count>
|
||||
struct bind_arg<T, ARG_VECTOR, g_count, f_count, v_count>
|
||||
{
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function argument type for ARG_VECTOR");
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function argument type for ARG_VECTOR");
|
||||
|
||||
static force_inline T get_arg(PPUThread& CPU)
|
||||
{
|
||||
@ -124,7 +124,7 @@ namespace ppu_func_detail
|
||||
template<typename T>
|
||||
struct bind_result<T, ARG_VECTOR>
|
||||
{
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, u128>::value, "Invalid function result type for ARG_VECTOR");
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, v128>::value, "Invalid function result type for ARG_VECTOR");
|
||||
|
||||
static force_inline void put_result(PPUThread& CPU, const T& result)
|
||||
{
|
||||
@ -176,7 +176,7 @@ namespace ppu_func_detail
|
||||
|
||||
// TODO: check calculations
|
||||
const bool is_float = std::is_floating_point<T>::value;
|
||||
const bool is_vector = std::is_same<std::remove_cv_t<T>, u128>::value;
|
||||
const bool is_vector = std::is_same<std::remove_cv_t<T>, v128>::value;
|
||||
const bool is_context = std::is_same<T, PPUThread&>::value;
|
||||
const bool is_variadic = std::is_same<std::remove_cv_t<T>, ppu_va_args_t>::value;
|
||||
const bool is_general = !is_float && !is_vector && !is_context && !is_variadic;
|
||||
@ -201,7 +201,7 @@ namespace ppu_func_detail
|
||||
static_assert(!std::is_pointer<RT>::value, "Invalid function result type (pointer)");
|
||||
static_assert(!std::is_reference<RT>::value, "Invalid function result type (reference)");
|
||||
static const bool is_float = std::is_floating_point<RT>::value;
|
||||
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, u128>::value;
|
||||
static const bool is_vector = std::is_same<std::remove_cv_t<RT>, v128>::value;
|
||||
static const arg_class value = is_float ? ARG_FLOAT : (is_vector ? ARG_VECTOR : ARG_GENERAL);
|
||||
};
|
||||
|
||||
|
@ -318,10 +318,10 @@ s32 sys_spu_thread_group_start(u32 id)
|
||||
|
||||
t->PC = image->entry_point;
|
||||
t->run();
|
||||
t->GPR[3] = u128::from64(0, args.arg1);
|
||||
t->GPR[4] = u128::from64(0, args.arg2);
|
||||
t->GPR[5] = u128::from64(0, args.arg3);
|
||||
t->GPR[6] = u128::from64(0, args.arg4);
|
||||
t->GPR[3] = v128::from64(0, args.arg1);
|
||||
t->GPR[4] = v128::from64(0, args.arg2);
|
||||
t->GPR[5] = v128::from64(0, args.arg3);
|
||||
t->GPR[6] = v128::from64(0, args.arg4);
|
||||
|
||||
t->status.exchange(SPU_STATUS_RUNNING);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user