mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 04:02:42 +01:00
PPU: rewrite LVSL/LVSR instructions
Make the tables endian-agnostic.
This commit is contained in:
parent
628354ba92
commit
aa7c9dd15d
@ -500,70 +500,6 @@ inline u32 ppu_record_call(ppu_thread& ppu, u32 new_cia, ppu_opcode_t op, bool i
|
||||
}
|
||||
}
|
||||
|
||||
extern SAFE_BUFFERS(__m128i) sse_pshufb(__m128i data, __m128i index)
|
||||
{
|
||||
v128 m = _mm_and_si128(index, _mm_set1_epi8(0xf));
|
||||
v128 a = data;
|
||||
v128 r;
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
r._u8[i] = a._u8[m._u8[i]];
|
||||
}
|
||||
|
||||
return _mm_and_si128(r, _mm_cmpgt_epi8(index, _mm_set1_epi8(-1)));
|
||||
}
|
||||
|
||||
extern __m128i sse_altivec_lvsl(u64 addr)
|
||||
{
|
||||
alignas(16) static const u8 lvsl_values[0x10][0x10] =
|
||||
{
|
||||
{ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 },
|
||||
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
|
||||
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
|
||||
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
|
||||
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
|
||||
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
|
||||
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
|
||||
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
|
||||
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
|
||||
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
|
||||
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
|
||||
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
|
||||
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
|
||||
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
|
||||
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
|
||||
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
|
||||
};
|
||||
|
||||
return _mm_load_si128(reinterpret_cast<const __m128i*>(+lvsl_values[addr & 0xf]));
|
||||
}
|
||||
|
||||
extern __m128i sse_altivec_lvsr(u64 addr)
|
||||
{
|
||||
alignas(16) static const u8 lvsr_values[0x10][0x10] =
|
||||
{
|
||||
{ 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10 },
|
||||
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
|
||||
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
|
||||
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
|
||||
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
|
||||
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
|
||||
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
|
||||
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
|
||||
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
|
||||
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
|
||||
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
|
||||
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
|
||||
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
|
||||
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
|
||||
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
|
||||
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
|
||||
};
|
||||
|
||||
return _mm_load_si128(reinterpret_cast<const __m128i*>(+lvsr_values[addr & 0xf]));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct add_flags_result_t
|
||||
{
|
||||
@ -3739,16 +3675,40 @@ auto TW()
|
||||
};
|
||||
}
|
||||
|
||||
const v128 s_lvsl_base = v128::from64r(0x0001020304050607, 0x08090a0b0c0d0e0f);
|
||||
|
||||
const v128 s_lvsl_consts[16] =
|
||||
{
|
||||
gv_add8(s_lvsl_base, gv_bcst8(0)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(1)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(2)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(3)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(4)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(5)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(6)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(7)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(8)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(9)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(10)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(11)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(12)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(13)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(14)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(15)),
|
||||
};
|
||||
|
||||
template <u32 Build, ppu_exec_bit... Flags>
|
||||
auto LVSL()
|
||||
{
|
||||
if constexpr (Build == 0xf1a6)
|
||||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
||||
ppu.vr[op.vd] = sse_altivec_lvsl(addr);
|
||||
ppu.vr[op.vd] = s_lvsl_consts[addr % 16];
|
||||
};
|
||||
|
||||
RETURN_(ppu, op);
|
||||
}
|
||||
|
||||
@ -3982,16 +3942,38 @@ auto CMPL()
|
||||
RETURN_(ppu, op);
|
||||
}
|
||||
|
||||
const v128 s_lvsr_consts[16] =
|
||||
{
|
||||
gv_add8(s_lvsl_base, gv_bcst8(16)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(15)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(14)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(13)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(12)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(11)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(10)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(9)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(8)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(7)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(6)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(5)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(4)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(3)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(2)),
|
||||
gv_add8(s_lvsl_base, gv_bcst8(1)),
|
||||
};
|
||||
|
||||
template <u32 Build, ppu_exec_bit... Flags>
|
||||
auto LVSR()
|
||||
{
|
||||
if constexpr (Build == 0xf1a6)
|
||||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
||||
ppu.vr[op.vd] = sse_altivec_lvsr(addr);
|
||||
ppu.vr[op.vd] = s_lvsr_consts[addr % 16];
|
||||
};
|
||||
|
||||
RETURN_(ppu, op);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user