1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2025-01-31 20:41:45 +01:00

PPU: redesign LVSL, LVSR instructions

This commit is contained in:
Nekotekina 2017-12-16 03:21:55 +03:00
parent d3198ddf60
commit 140a581d23
2 changed files with 49 additions and 43 deletions

View File

@ -94,24 +94,24 @@ extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C)
extern __m128i sse_altivec_lvsl(u64 addr)
{
alignas(16) static const u64 lvsl_values[0x10][2] =
alignas(16) static const u8 lvsl_values[0x10][0x10] =
{
{ 0x08090A0B0C0D0E0F, 0x0001020304050607 },
{ 0x090A0B0C0D0E0F10, 0x0102030405060708 },
{ 0x0A0B0C0D0E0F1011, 0x0203040506070809 },
{ 0x0B0C0D0E0F101112, 0x030405060708090A },
{ 0x0C0D0E0F10111213, 0x0405060708090A0B },
{ 0x0D0E0F1011121314, 0x05060708090A0B0C },
{ 0x0E0F101112131415, 0x060708090A0B0C0D },
{ 0x0F10111213141516, 0x0708090A0B0C0D0E },
{ 0x1011121314151617, 0x08090A0B0C0D0E0F },
{ 0x1112131415161718, 0x090A0B0C0D0E0F10 },
{ 0x1213141516171819, 0x0A0B0C0D0E0F1011 },
{ 0x131415161718191A, 0x0B0C0D0E0F101112 },
{ 0x1415161718191A1B, 0x0C0D0E0F10111213 },
{ 0x15161718191A1B1C, 0x0D0E0F1011121314 },
{ 0x161718191A1B1C1D, 0x0E0F101112131415 },
{ 0x1718191A1B1C1D1E, 0x0F10111213141516 },
{ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 },
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
};
return _mm_load_si128((__m128i*)lvsl_values[addr & 0xf]);
@ -119,24 +119,24 @@ extern __m128i sse_altivec_lvsl(u64 addr)
extern __m128i sse_altivec_lvsr(u64 addr)
{
alignas(16) static const u64 lvsr_values[0x10][2] =
alignas(16) static const u8 lvsr_values[0x10][0x10] =
{
{ 0x18191A1B1C1D1E1F, 0x1011121314151617 },
{ 0x1718191A1B1C1D1E, 0x0F10111213141516 },
{ 0x161718191A1B1C1D, 0x0E0F101112131415 },
{ 0x15161718191A1B1C, 0x0D0E0F1011121314 },
{ 0x1415161718191A1B, 0x0C0D0E0F10111213 },
{ 0x131415161718191A, 0x0B0C0D0E0F101112 },
{ 0x1213141516171819, 0x0A0B0C0D0E0F1011 },
{ 0x1112131415161718, 0x090A0B0C0D0E0F10 },
{ 0x1011121314151617, 0x08090A0B0C0D0E0F },
{ 0x0F10111213141516, 0x0708090A0B0C0D0E },
{ 0x0E0F101112131415, 0x060708090A0B0C0D },
{ 0x0D0E0F1011121314, 0x05060708090A0B0C },
{ 0x0C0D0E0F10111213, 0x0405060708090A0B },
{ 0x0B0C0D0E0F101112, 0x030405060708090A },
{ 0x0A0B0C0D0E0F1011, 0x0203040506070809 },
{ 0x090A0B0C0D0E0F10, 0x0102030405060708 },
{ 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10 },
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
};
return _mm_load_si128((__m128i*)lvsr_values[addr & 0xf]);
@ -2050,7 +2050,7 @@ bool ppu_interpreter::VSRB(ppu_thread& ppu, ppu_opcode_t op)
auto& d = ppu.vr[op.vd];
const auto& a = ppu.vr[op.va];
const auto& b = ppu.vr[op.vb];
for (uint i = 0; i < 16; i++)
{
d._u8[i] = a._u8[i] >> (b._u8[i] & 0x7);
@ -2964,7 +2964,7 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op)
if (op.lk) ppu.lr = link;
return false;
}
return true;
}
@ -3163,7 +3163,7 @@ bool ppu_interpreter::MFOCRF(ppu_thread& ppu, ppu_opcode_t op)
const u32 n = cntlz32(op.crm) & 7;
const u32 p = n * 4;
const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0;
ppu.gpr[op.rd] = v << (p ^ 0x1c);
}
else

View File

@ -122,7 +122,7 @@ Type* PPUTranslator::GetContextType()
Function* PPUTranslator::Translate(const ppu_function& info)
{
m_function = m_module->getFunction(info.name);
std::fill(std::begin(m_globals), std::end(m_globals), nullptr);
std::fill(std::begin(m_locals), std::end(m_locals), nullptr);
@ -207,7 +207,7 @@ Value* PPUTranslator::GetAddr(u64 _add)
// Load segment address from global variable, compute actual instruction address
return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_ir->CreateLoad(m_segs[m_reloc - m_info.segs.data()]));
}
return m_ir->getInt64(m_addr + _add);
}
@ -342,7 +342,7 @@ Value* PPUTranslator::Solid(Value* value)
const u32 size = value->getType()->getPrimitiveSizeInBits();
/* Workarounds (casting bool vectors directly may produce invalid code) */
if (value->getType() == GetType<bool[4]>())
{
return m_ir->CreateBitCast(SExt(value, GetType<u32[4]>()), m_ir->getIntNTy(128));
@ -1759,7 +1759,7 @@ void PPUTranslator::B(ppu_opcode_t op)
{
RegStore(GetAddr(+4), m_lr);
}
FlushRegisters();
CallFunction(target);
}
@ -1972,7 +1972,7 @@ void PPUTranslator::RLWNM(ppu_opcode_t op)
// Generic op
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), GetGpr(op.rb, 32)), mask);
}
// Extend 32-bit op result
result = ZExt(result);
}
@ -2064,7 +2064,7 @@ void PPUTranslator::RLDICL(ppu_opcode_t op)
// Generic op, including CLRLDI mnemonic
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask);
}
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
@ -2196,6 +2196,9 @@ void PPUTranslator::TW(ppu_opcode_t op)
void PPUTranslator::LVSL(ppu_opcode_t op)
{
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
//const auto _add = m_ir->CreateInsertElement(ConstantVector::getSplat(16, m_ir->getInt8(0)), Trunc(m_ir->CreateAnd(addr, 0xf), GetType<u8>()), m_ir->getInt32(0));
//const auto base = ConstantDataVector::get(m_context, std::vector<u8>{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
//SetVr(op.vd, m_ir->CreateAdd(base, Shuffle(_add, nullptr, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})));
SetVr(op.vd, Call(GetType<u8[16]>(), m_pure_attr, "__lvsl", addr));
}
@ -2351,6 +2354,9 @@ void PPUTranslator::CMPL(ppu_opcode_t op)
void PPUTranslator::LVSR(ppu_opcode_t op)
{
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
//const auto _add = m_ir->CreateInsertElement(ConstantVector::getSplat(16, m_ir->getInt8(0)), Trunc(m_ir->CreateAnd(addr, 0xf), GetType<u8>()), m_ir->getInt32(0));
//const auto base = ConstantDataVector::get(m_context, std::vector<u8>{31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16});
//SetVr(op.vd, m_ir->CreateSub(base, Shuffle(_add, nullptr, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})));
SetVr(op.vd, Call(GetType<u8[16]>(), m_pure_attr, "__lvsr", addr));
}