mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-31 20:41:45 +01:00
PPU: redesign LVSL, LVSR instructions
This commit is contained in:
parent
d3198ddf60
commit
140a581d23
@ -94,24 +94,24 @@ extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C)
|
||||
|
||||
extern __m128i sse_altivec_lvsl(u64 addr)
|
||||
{
|
||||
alignas(16) static const u64 lvsl_values[0x10][2] =
|
||||
alignas(16) static const u8 lvsl_values[0x10][0x10] =
|
||||
{
|
||||
{ 0x08090A0B0C0D0E0F, 0x0001020304050607 },
|
||||
{ 0x090A0B0C0D0E0F10, 0x0102030405060708 },
|
||||
{ 0x0A0B0C0D0E0F1011, 0x0203040506070809 },
|
||||
{ 0x0B0C0D0E0F101112, 0x030405060708090A },
|
||||
{ 0x0C0D0E0F10111213, 0x0405060708090A0B },
|
||||
{ 0x0D0E0F1011121314, 0x05060708090A0B0C },
|
||||
{ 0x0E0F101112131415, 0x060708090A0B0C0D },
|
||||
{ 0x0F10111213141516, 0x0708090A0B0C0D0E },
|
||||
{ 0x1011121314151617, 0x08090A0B0C0D0E0F },
|
||||
{ 0x1112131415161718, 0x090A0B0C0D0E0F10 },
|
||||
{ 0x1213141516171819, 0x0A0B0C0D0E0F1011 },
|
||||
{ 0x131415161718191A, 0x0B0C0D0E0F101112 },
|
||||
{ 0x1415161718191A1B, 0x0C0D0E0F10111213 },
|
||||
{ 0x15161718191A1B1C, 0x0D0E0F1011121314 },
|
||||
{ 0x161718191A1B1C1D, 0x0E0F101112131415 },
|
||||
{ 0x1718191A1B1C1D1E, 0x0F10111213141516 },
|
||||
{ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 },
|
||||
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
|
||||
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
|
||||
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
|
||||
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
|
||||
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
|
||||
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
|
||||
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
|
||||
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
|
||||
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
|
||||
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
|
||||
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
|
||||
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
|
||||
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
|
||||
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
|
||||
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
|
||||
};
|
||||
|
||||
return _mm_load_si128((__m128i*)lvsl_values[addr & 0xf]);
|
||||
@ -119,24 +119,24 @@ extern __m128i sse_altivec_lvsl(u64 addr)
|
||||
|
||||
extern __m128i sse_altivec_lvsr(u64 addr)
|
||||
{
|
||||
alignas(16) static const u64 lvsr_values[0x10][2] =
|
||||
alignas(16) static const u8 lvsr_values[0x10][0x10] =
|
||||
{
|
||||
{ 0x18191A1B1C1D1E1F, 0x1011121314151617 },
|
||||
{ 0x1718191A1B1C1D1E, 0x0F10111213141516 },
|
||||
{ 0x161718191A1B1C1D, 0x0E0F101112131415 },
|
||||
{ 0x15161718191A1B1C, 0x0D0E0F1011121314 },
|
||||
{ 0x1415161718191A1B, 0x0C0D0E0F10111213 },
|
||||
{ 0x131415161718191A, 0x0B0C0D0E0F101112 },
|
||||
{ 0x1213141516171819, 0x0A0B0C0D0E0F1011 },
|
||||
{ 0x1112131415161718, 0x090A0B0C0D0E0F10 },
|
||||
{ 0x1011121314151617, 0x08090A0B0C0D0E0F },
|
||||
{ 0x0F10111213141516, 0x0708090A0B0C0D0E },
|
||||
{ 0x0E0F101112131415, 0x060708090A0B0C0D },
|
||||
{ 0x0D0E0F1011121314, 0x05060708090A0B0C },
|
||||
{ 0x0C0D0E0F10111213, 0x0405060708090A0B },
|
||||
{ 0x0B0C0D0E0F101112, 0x030405060708090A },
|
||||
{ 0x0A0B0C0D0E0F1011, 0x0203040506070809 },
|
||||
{ 0x090A0B0C0D0E0F10, 0x0102030405060708 },
|
||||
{ 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10 },
|
||||
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
|
||||
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
|
||||
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
|
||||
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
|
||||
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
|
||||
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
|
||||
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
|
||||
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
|
||||
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
|
||||
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
|
||||
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
|
||||
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
|
||||
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
|
||||
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
|
||||
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
|
||||
};
|
||||
|
||||
return _mm_load_si128((__m128i*)lvsr_values[addr & 0xf]);
|
||||
@ -2050,7 +2050,7 @@ bool ppu_interpreter::VSRB(ppu_thread& ppu, ppu_opcode_t op)
|
||||
auto& d = ppu.vr[op.vd];
|
||||
const auto& a = ppu.vr[op.va];
|
||||
const auto& b = ppu.vr[op.vb];
|
||||
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
d._u8[i] = a._u8[i] >> (b._u8[i] & 0x7);
|
||||
@ -2964,7 +2964,7 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op)
|
||||
if (op.lk) ppu.lr = link;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3163,7 +3163,7 @@ bool ppu_interpreter::MFOCRF(ppu_thread& ppu, ppu_opcode_t op)
|
||||
const u32 n = cntlz32(op.crm) & 7;
|
||||
const u32 p = n * 4;
|
||||
const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0;
|
||||
|
||||
|
||||
ppu.gpr[op.rd] = v << (p ^ 0x1c);
|
||||
}
|
||||
else
|
||||
|
@ -122,7 +122,7 @@ Type* PPUTranslator::GetContextType()
|
||||
Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
{
|
||||
m_function = m_module->getFunction(info.name);
|
||||
|
||||
|
||||
std::fill(std::begin(m_globals), std::end(m_globals), nullptr);
|
||||
std::fill(std::begin(m_locals), std::end(m_locals), nullptr);
|
||||
|
||||
@ -207,7 +207,7 @@ Value* PPUTranslator::GetAddr(u64 _add)
|
||||
// Load segment address from global variable, compute actual instruction address
|
||||
return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_ir->CreateLoad(m_segs[m_reloc - m_info.segs.data()]));
|
||||
}
|
||||
|
||||
|
||||
return m_ir->getInt64(m_addr + _add);
|
||||
}
|
||||
|
||||
@ -342,7 +342,7 @@ Value* PPUTranslator::Solid(Value* value)
|
||||
const u32 size = value->getType()->getPrimitiveSizeInBits();
|
||||
|
||||
/* Workarounds (casting bool vectors directly may produce invalid code) */
|
||||
|
||||
|
||||
if (value->getType() == GetType<bool[4]>())
|
||||
{
|
||||
return m_ir->CreateBitCast(SExt(value, GetType<u32[4]>()), m_ir->getIntNTy(128));
|
||||
@ -1759,7 +1759,7 @@ void PPUTranslator::B(ppu_opcode_t op)
|
||||
{
|
||||
RegStore(GetAddr(+4), m_lr);
|
||||
}
|
||||
|
||||
|
||||
FlushRegisters();
|
||||
CallFunction(target);
|
||||
}
|
||||
@ -1972,7 +1972,7 @@ void PPUTranslator::RLWNM(ppu_opcode_t op)
|
||||
// Generic op
|
||||
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), GetGpr(op.rb, 32)), mask);
|
||||
}
|
||||
|
||||
|
||||
// Extend 32-bit op result
|
||||
result = ZExt(result);
|
||||
}
|
||||
@ -2064,7 +2064,7 @@ void PPUTranslator::RLDICL(ppu_opcode_t op)
|
||||
// Generic op, including CLRLDI mnemonic
|
||||
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask);
|
||||
}
|
||||
|
||||
|
||||
SetGpr(op.ra, result);
|
||||
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
|
||||
}
|
||||
@ -2196,6 +2196,9 @@ void PPUTranslator::TW(ppu_opcode_t op)
|
||||
void PPUTranslator::LVSL(ppu_opcode_t op)
|
||||
{
|
||||
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
|
||||
//const auto _add = m_ir->CreateInsertElement(ConstantVector::getSplat(16, m_ir->getInt8(0)), Trunc(m_ir->CreateAnd(addr, 0xf), GetType<u8>()), m_ir->getInt32(0));
|
||||
//const auto base = ConstantDataVector::get(m_context, std::vector<u8>{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
|
||||
//SetVr(op.vd, m_ir->CreateAdd(base, Shuffle(_add, nullptr, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})));
|
||||
SetVr(op.vd, Call(GetType<u8[16]>(), m_pure_attr, "__lvsl", addr));
|
||||
}
|
||||
|
||||
@ -2351,6 +2354,9 @@ void PPUTranslator::CMPL(ppu_opcode_t op)
|
||||
void PPUTranslator::LVSR(ppu_opcode_t op)
|
||||
{
|
||||
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
|
||||
//const auto _add = m_ir->CreateInsertElement(ConstantVector::getSplat(16, m_ir->getInt8(0)), Trunc(m_ir->CreateAnd(addr, 0xf), GetType<u8>()), m_ir->getInt32(0));
|
||||
//const auto base = ConstantDataVector::get(m_context, std::vector<u8>{31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16});
|
||||
//SetVr(op.vd, m_ir->CreateSub(base, Shuffle(_add, nullptr, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})));
|
||||
SetVr(op.vd, Call(GetType<u8[16]>(), m_pure_attr, "__lvsr", addr));
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user