mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-23 11:13:19 +01:00
PPU: Implemented some instructions in the recompiler. Fixed some bugs in the interpreter.
This commit is contained in:
parent
205e1d88b3
commit
6ea50567b6
@ -834,11 +834,11 @@ private:
|
||||
}
|
||||
void VCTSXS(u32 vd, u32 uimm5, u32 vb)
|
||||
{
|
||||
int nScale = 1 << uimm5;
|
||||
|
||||
u32 nScale = 1 << uimm5;
|
||||
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
float result = CPU.VPR[vb]._f[w] * nScale;
|
||||
{
|
||||
double result = (double)CPU.VPR[vb]._f[w] * nScale;
|
||||
|
||||
if (result > 0x7fffffff)
|
||||
{
|
||||
@ -856,12 +856,12 @@ private:
|
||||
}
|
||||
void VCTUXS(u32 vd, u32 uimm5, u32 vb)
|
||||
{
|
||||
int nScale = 1 << uimm5;
|
||||
u32 nScale = 1 << uimm5;
|
||||
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
// C rounding = Round towards 0
|
||||
float result = CPU.VPR[vb]._f[w] * nScale;
|
||||
double result = (double)CPU.VPR[vb]._f[w] * nScale;
|
||||
|
||||
if (result > 0xffffffffu)
|
||||
{
|
||||
@ -1078,26 +1078,32 @@ private:
|
||||
}
|
||||
void VMRGLB(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[15 - h*2] = CPU.VPR[va]._u8[7 - h];
|
||||
CPU.VPR[vd]._u8[15 - h*2 - 1] = CPU.VPR[vb]._u8[7 - h];
|
||||
CPU.VPR[vd]._u8[15 - h*2] = VA._u8[7 - h];
|
||||
CPU.VPR[vd]._u8[15 - h*2 - 1] = VB._u8[7 - h];
|
||||
}
|
||||
}
|
||||
void VMRGLH(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._u16[7 - w*2] = CPU.VPR[va]._u16[3 - w];
|
||||
CPU.VPR[vd]._u16[7 - w*2 - 1] = CPU.VPR[vb]._u16[3 - w];
|
||||
CPU.VPR[vd]._u16[7 - w*2] = VA._u16[3 - w];
|
||||
CPU.VPR[vd]._u16[7 - w*2 - 1] = VB._u16[3 - w];
|
||||
}
|
||||
}
|
||||
void VMRGLW(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint d = 0; d < 2; d++)
|
||||
{
|
||||
CPU.VPR[vd]._u32[3 - d*2] = CPU.VPR[va]._u32[1 - d];
|
||||
CPU.VPR[vd]._u32[3 - d*2 - 1] = CPU.VPR[vb]._u32[1 - d];
|
||||
CPU.VPR[vd]._u32[3 - d*2] = VA._u32[1 - d];
|
||||
CPU.VPR[vd]._u32[3 - d*2 - 1] = VB._u32[1 - d];
|
||||
}
|
||||
}
|
||||
void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) //nf
|
||||
@ -1168,7 +1174,7 @@ private:
|
||||
|
||||
for (uint b = 0; b < 4; b++)
|
||||
{
|
||||
result += CPU.VPR[va]._u8[w*4 + b] * CPU.VPR[vb]._u8[w*4 + b];
|
||||
result += (u32)CPU.VPR[va]._u8[w*4 + b] * (u32)CPU.VPR[vb]._u8[w*4 + b];
|
||||
}
|
||||
|
||||
result += CPU.VPR[vc]._u32[w];
|
||||
@ -1183,7 +1189,7 @@ private:
|
||||
|
||||
for (uint h = 0; h < 2; h++)
|
||||
{
|
||||
result += CPU.VPR[va]._u16[w*2 + h] * CPU.VPR[vb]._u16[w*2 + h];
|
||||
result += (u32)CPU.VPR[va]._u16[w*2 + h] * (u32)CPU.VPR[vb]._u16[w*2 + h];
|
||||
}
|
||||
|
||||
result += CPU.VPR[vc]._u32[w];
|
||||
@ -1199,7 +1205,7 @@ private:
|
||||
|
||||
for (uint h = 0; h < 2; h++)
|
||||
{
|
||||
result += CPU.VPR[va]._u16[w*2 + h] * CPU.VPR[vb]._u16[w*2 + h];
|
||||
result += (u64)CPU.VPR[va]._u16[w*2 + h] * (u64)CPU.VPR[vb]._u16[w*2 + h];
|
||||
}
|
||||
|
||||
result += CPU.VPR[vc]._u32[w];
|
||||
@ -1307,16 +1313,18 @@ private:
|
||||
}
|
||||
void VPKPX(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
u16 bb7 = CPU.VPR[vb]._u8[15 - (h*4 + 0)] & 0x1;
|
||||
u16 bb8 = CPU.VPR[vb]._u8[15 - (h*4 + 1)] >> 3;
|
||||
u16 bb16 = CPU.VPR[vb]._u8[15 - (h*4 + 2)] >> 3;
|
||||
u16 bb24 = CPU.VPR[vb]._u8[15 - (h*4 + 3)] >> 3;
|
||||
u16 ab7 = CPU.VPR[va]._u8[15 - (h*4 + 0)] & 0x1;
|
||||
u16 ab8 = CPU.VPR[va]._u8[15 - (h*4 + 1)] >> 3;
|
||||
u16 ab16 = CPU.VPR[va]._u8[15 - (h*4 + 2)] >> 3;
|
||||
u16 ab24 = CPU.VPR[va]._u8[15 - (h*4 + 3)] >> 3;
|
||||
u16 bb7 = VB._u8[15 - (h*4 + 0)] & 0x1;
|
||||
u16 bb8 = VB._u8[15 - (h*4 + 1)] >> 3;
|
||||
u16 bb16 = VB._u8[15 - (h*4 + 2)] >> 3;
|
||||
u16 bb24 = VB._u8[15 - (h*4 + 3)] >> 3;
|
||||
u16 ab7 = VA._u8[15 - (h*4 + 0)] & 0x1;
|
||||
u16 ab8 = VA._u8[15 - (h*4 + 1)] >> 3;
|
||||
u16 ab16 = VA._u8[15 - (h*4 + 2)] >> 3;
|
||||
u16 ab24 = VA._u8[15 - (h*4 + 3)] >> 3;
|
||||
|
||||
CPU.VPR[vd]._u16[3 - h] = (bb7 << 15) | (bb8 << 10) | (bb16 << 5) | bb24;
|
||||
CPU.VPR[vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24;
|
||||
@ -1324,9 +1332,11 @@ private:
|
||||
}
|
||||
void VPKSHSS(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
s16 result = CPU.VPR[va]._s16[b];
|
||||
s16 result = VA._s16[b];
|
||||
|
||||
if (result > INT8_MAX)
|
||||
{
|
||||
@ -1341,7 +1351,7 @@ private:
|
||||
|
||||
CPU.VPR[vd]._s8[b+8] = (s8)result;
|
||||
|
||||
result = CPU.VPR[vb]._s16[b];
|
||||
result = VB._s16[b];
|
||||
|
||||
if (result > INT8_MAX)
|
||||
{
|
||||
@ -1359,9 +1369,11 @@ private:
|
||||
}
|
||||
void VPKSHUS(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
s16 result = CPU.VPR[va]._s16[b];
|
||||
s16 result = VA._s16[b];
|
||||
|
||||
if (result > UINT8_MAX)
|
||||
{
|
||||
@ -1376,7 +1388,7 @@ private:
|
||||
|
||||
CPU.VPR[vd]._u8[b+8] = (u8)result;
|
||||
|
||||
result = CPU.VPR[vb]._s16[b];
|
||||
result = VB._s16[b];
|
||||
|
||||
if (result > UINT8_MAX)
|
||||
{
|
||||
@ -1394,9 +1406,11 @@ private:
|
||||
}
|
||||
void VPKSWSS(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
s32 result = CPU.VPR[va]._s32[h];
|
||||
s32 result = VA._s32[h];
|
||||
|
||||
if (result > INT16_MAX)
|
||||
{
|
||||
@ -1411,7 +1425,7 @@ private:
|
||||
|
||||
CPU.VPR[vd]._s16[h+4] = result;
|
||||
|
||||
result = CPU.VPR[vb]._s32[h];
|
||||
result = VB._s32[h];
|
||||
|
||||
if (result > INT16_MAX)
|
||||
{
|
||||
@ -1429,9 +1443,11 @@ private:
|
||||
}
|
||||
void VPKSWUS(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
s32 result = CPU.VPR[va]._s32[h];
|
||||
s32 result = VA._s32[h];
|
||||
|
||||
if (result > UINT16_MAX)
|
||||
{
|
||||
@ -1446,7 +1462,7 @@ private:
|
||||
|
||||
CPU.VPR[vd]._u16[h+4] = result;
|
||||
|
||||
result = CPU.VPR[vb]._s32[h];
|
||||
result = VB._s32[h];
|
||||
|
||||
if (result > UINT16_MAX)
|
||||
{
|
||||
@ -1464,17 +1480,21 @@ private:
|
||||
}
|
||||
void VPKUHUM(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[b+8] = CPU.VPR[va]._u8[b*2];
|
||||
CPU.VPR[vd]._u8[b ] = CPU.VPR[vb]._u8[b*2];
|
||||
CPU.VPR[vd]._u8[b+8] = VA._u8[b*2];
|
||||
CPU.VPR[vd]._u8[b ] = VB._u8[b*2];
|
||||
}
|
||||
}
|
||||
void VPKUHUS(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint b = 0; b < 8; b++)
|
||||
{
|
||||
u16 result = CPU.VPR[va]._u16[b];
|
||||
u16 result = VA._u16[b];
|
||||
|
||||
if (result > UINT8_MAX)
|
||||
{
|
||||
@ -1484,7 +1504,7 @@ private:
|
||||
|
||||
CPU.VPR[vd]._u8[b+8] = (u8)result;
|
||||
|
||||
result = CPU.VPR[vb]._u16[b];
|
||||
result = VB._u16[b];
|
||||
|
||||
if (result > UINT8_MAX)
|
||||
{
|
||||
@ -1497,17 +1517,21 @@ private:
|
||||
}
|
||||
void VPKUWUM(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
CPU.VPR[vd]._u16[h+4] = CPU.VPR[va]._u16[h*2];
|
||||
CPU.VPR[vd]._u16[h ] = CPU.VPR[vb]._u16[h*2];
|
||||
CPU.VPR[vd]._u16[h+4] = VA._u16[h*2];
|
||||
CPU.VPR[vd]._u16[h ] = VB._u16[h*2];
|
||||
}
|
||||
}
|
||||
void VPKUWUS(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 4; h++)
|
||||
{
|
||||
u32 result = CPU.VPR[va]._u32[h];
|
||||
u32 result = VA._u32[h];
|
||||
|
||||
if (result > UINT16_MAX)
|
||||
{
|
||||
@ -1517,7 +1541,7 @@ private:
|
||||
|
||||
CPU.VPR[vd]._u16[h+4] = result;
|
||||
|
||||
result = CPU.VPR[vb]._u32[h];
|
||||
result = VB._u32[h];
|
||||
|
||||
if (result > UINT16_MAX)
|
||||
{
|
||||
@ -1539,30 +1563,28 @@ private:
|
||||
{
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._f[w] = floor(CPU.VPR[vb]._f[w]);
|
||||
CPU.VPR[vd]._f[w] = floorf(CPU.VPR[vb]._f[w]);
|
||||
}
|
||||
}
|
||||
void VRFIN(u32 vd, u32 vb)
|
||||
{
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._f[w] = floor(CPU.VPR[vb]._f[w] + 0.5f);
|
||||
CPU.VPR[vd]._f[w] = nearbyintf(CPU.VPR[vb]._f[w]);
|
||||
}
|
||||
}
|
||||
void VRFIP(u32 vd, u32 vb)
|
||||
{
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._f[w] = ceil(CPU.VPR[vb]._f[w]);
|
||||
CPU.VPR[vd]._f[w] = ceilf(CPU.VPR[vb]._f[w]);
|
||||
}
|
||||
}
|
||||
void VRFIZ(u32 vd, u32 vb)
|
||||
{
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
float f;
|
||||
modff(CPU.VPR[vb]._f[w], &f);
|
||||
CPU.VPR[vd]._f[w] = f;
|
||||
CPU.VPR[vd]._f[w] = truncf(CPU.VPR[vb]._f[w]);
|
||||
}
|
||||
}
|
||||
void VRLB(u32 vd, u32 va, u32 vb) //nf
|
||||
@ -1605,12 +1627,13 @@ private:
|
||||
}
|
||||
void VSL(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
|
||||
|
||||
CPU.VPR[vd]._u8[0] = CPU.VPR[va]._u8[0] << sh;
|
||||
CPU.VPR[vd]._u8[0] = VA._u8[0] << sh;
|
||||
for (uint b = 1; b < 16; b++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] << sh) | (CPU.VPR[va]._u8[b-1] >> (8 - sh));
|
||||
CPU.VPR[vd]._u8[b] = (VA._u8[b] << sh) | (VA._u8[b-1] >> (8 - sh));
|
||||
}
|
||||
}
|
||||
void VSLB(u32 vd, u32 va, u32 vb)
|
||||
@ -1635,18 +1658,19 @@ private:
|
||||
{
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] << (CPU.VPR[vb]._u8[h*2] & 0xf);
|
||||
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] << (CPU.VPR[vb]._u16[h] & 0xf);
|
||||
}
|
||||
}
|
||||
void VSLO(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
|
||||
|
||||
CPU.VPR[vd].clear();
|
||||
|
||||
for (u8 b = 0; b < 16 - nShift; b++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[15 - b] = CPU.VPR[va]._u8[15 - (b + nShift)];
|
||||
CPU.VPR[vd]._u8[15 - b] = VA._u8[15 - (b + nShift)];
|
||||
}
|
||||
}
|
||||
void VSLW(u32 vd, u32 va, u32 vb)
|
||||
@ -1710,12 +1734,13 @@ private:
|
||||
}
|
||||
void VSR(u32 vd, u32 va, u32 vb) //nf
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u8 sh = CPU.VPR[vb]._u8[0] & 0x7;
|
||||
|
||||
CPU.VPR[vd]._u8[15] = CPU.VPR[va]._u8[15] >> sh;
|
||||
CPU.VPR[vd]._u8[15] = VA._u8[15] >> sh;
|
||||
for (uint b = 14; ~b; b--)
|
||||
{
|
||||
CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] >> sh) | (CPU.VPR[va]._u8[b+1] << (8 - sh));
|
||||
CPU.VPR[vd]._u8[b] = (VA._u8[b] >> sh) | (VA._u8[b+1] << (8 - sh));
|
||||
}
|
||||
}
|
||||
void VSRAB(u32 vd, u32 va, u32 vb) //nf
|
||||
@ -1729,14 +1754,14 @@ private:
|
||||
{
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._s16[h] = CPU.VPR[va]._s16[h] >> (CPU.VPR[vb]._u8[h*2] & 0xf);
|
||||
CPU.VPR[vd]._s16[h] = CPU.VPR[va]._s16[h] >> (CPU.VPR[vb]._u16[h] & 0xf);
|
||||
}
|
||||
}
|
||||
void VSRAW(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s32[w] = CPU.VPR[va]._s32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f);
|
||||
CPU.VPR[vd]._s32[w] = CPU.VPR[va]._s32[w] >> (CPU.VPR[vb]._u32[w] & 0x1f);
|
||||
}
|
||||
}
|
||||
void VSRB(u32 vd, u32 va, u32 vb)
|
||||
@ -1750,25 +1775,26 @@ private:
|
||||
{
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] >> (CPU.VPR[vb]._u8[h*2] & 0xf);
|
||||
CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] >> (CPU.VPR[vb]._u16[h] & 0xf);
|
||||
}
|
||||
}
|
||||
void VSRO(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
u128 VA = CPU.VPR[va];
|
||||
u8 nShift = (CPU.VPR[vb]._u8[0] >> 3) & 0xf;
|
||||
|
||||
CPU.VPR[vd].clear();
|
||||
|
||||
for (u8 b = 0; b < 16 - nShift; b++)
|
||||
{
|
||||
CPU.VPR[vd]._u8[b] = CPU.VPR[va]._u8[b + nShift];
|
||||
CPU.VPR[vd]._u8[b] = VA._u8[b + nShift];
|
||||
}
|
||||
}
|
||||
void VSRW(u32 vd, u32 va, u32 vb)
|
||||
{
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f);
|
||||
CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u32[w] & 0x1f);
|
||||
}
|
||||
}
|
||||
void VSUBCUW(u32 vd, u32 va, u32 vb) //nf
|
||||
@ -2029,50 +2055,56 @@ private:
|
||||
}
|
||||
void VUPKHPX(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s8[(3 - w)*4 + 3] = CPU.VPR[vb]._s8[w*2 + 0] >> 7; // signed shift sign extends
|
||||
CPU.VPR[vd]._u8[(3 - w)*4 + 2] = (CPU.VPR[vb]._u8[w*2 + 0] >> 2) & 0x1f;
|
||||
CPU.VPR[vd]._u8[(3 - w)*4 + 1] = ((CPU.VPR[vb]._u8[w*2 + 0] & 0x3) << 3) | ((CPU.VPR[vb]._u8[w*2 + 1] >> 5) & 0x7);
|
||||
CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[w*2 + 1] & 0x1f;
|
||||
CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[8 + w*2 + 1] >> 7; // signed shift sign extends
|
||||
CPU.VPR[vd]._u8[w*4 + 2] = (VB._u8[8 + w*2 + 1] >> 2) & 0x1f;
|
||||
CPU.VPR[vd]._u8[w*4 + 1] = ((VB._u8[8 + w*2 + 1] & 0x3) << 3) | ((VB._u8[8 + w*2 + 0] >> 5) & 0x7);
|
||||
CPU.VPR[vd]._u8[w*4 + 0] = VB._u8[8 + w*2 + 0] & 0x1f;
|
||||
}
|
||||
}
|
||||
void VUPKHSB(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._s16[h] = CPU.VPR[vb]._s8[h];
|
||||
CPU.VPR[vd]._s16[h] = VB._s8[8 + h];
|
||||
}
|
||||
}
|
||||
void VUPKHSH(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s32[w] = CPU.VPR[vb]._s16[w];
|
||||
CPU.VPR[vd]._s32[w] = VB._s16[4 + w];
|
||||
}
|
||||
}
|
||||
void VUPKLPX(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s8[(3 - w)*4 + 3] = CPU.VPR[vb]._s8[8 + w*2 + 0] >> 7; // signed shift sign extends
|
||||
CPU.VPR[vd]._u8[(3 - w)*4 + 2] = (CPU.VPR[vb]._u8[8 + w*2 + 0] >> 2) & 0x1f;
|
||||
CPU.VPR[vd]._u8[(3 - w)*4 + 1] = ((CPU.VPR[vb]._u8[8 + w*2 + 0] & 0x3) << 3) | ((CPU.VPR[vb]._u8[8 + w*2 + 1] >> 5) & 0x7);
|
||||
CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[8 + w*2 + 1] & 0x1f;
|
||||
CPU.VPR[vd]._s8[w*4 + 3] = VB._s8[w*2 + 1] >> 7; // signed shift sign extends
|
||||
CPU.VPR[vd]._u8[w*4 + 2] = (VB._u8[w*2 + 1] >> 2) & 0x1f;
|
||||
CPU.VPR[vd]._u8[w*4 + 1] = ((VB._u8[w*2 + 1] & 0x3) << 3) | ((VB._u8[w*2 + 0] >> 5) & 0x7);
|
||||
CPU.VPR[vd]._u8[w*4 + 0] = VB._u8[w*2 + 0] & 0x1f;
|
||||
}
|
||||
}
|
||||
void VUPKLSB(u32 vd, u32 vb) //nf
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint h = 0; h < 8; h++)
|
||||
{
|
||||
CPU.VPR[vd]._s16[h] = CPU.VPR[vb]._s8[8 + h];
|
||||
CPU.VPR[vd]._s16[h] = VB._s8[h];
|
||||
}
|
||||
}
|
||||
void VUPKLSH(u32 vd, u32 vb)
|
||||
{
|
||||
u128 VB = CPU.VPR[vb];
|
||||
for (uint w = 0; w < 4; w++)
|
||||
{
|
||||
CPU.VPR[vd]._s32[w] = CPU.VPR[vb]._s16[4 + w];
|
||||
CPU.VPR[vd]._s32[w] = VB._s16[w];
|
||||
}
|
||||
}
|
||||
void VXOR(u32 vd, u32 va, u32 vb)
|
||||
@ -2792,7 +2824,7 @@ private:
|
||||
return;
|
||||
}
|
||||
const u8 eb = (addr & 0xf) >> 1;
|
||||
vm::write16((u32)addr, CPU.VPR[vs]._u16[7 - eb]);
|
||||
vm::write16((u32)addr & 0xFFFFFFFE, CPU.VPR[vs]._u16[7 - eb]);
|
||||
}
|
||||
void STDUX(u32 rs, u32 ra, u32 rb)
|
||||
{
|
||||
@ -2828,7 +2860,7 @@ private:
|
||||
return;
|
||||
}
|
||||
const u8 eb = (addr & 0xf) >> 2;
|
||||
vm::write32((u32)addr, CPU.VPR[vs]._u32[3 - eb]);
|
||||
vm::write32((u32)addr & 0xFFFFFFFC, CPU.VPR[vs]._u32[3 - eb]);
|
||||
}
|
||||
void ADDZE(u32 rd, u32 ra, u32 oe, bool rc)
|
||||
{
|
||||
|
@ -1016,7 +1016,26 @@ void Compiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) {
|
||||
}
|
||||
|
||||
void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) {
|
||||
InterpreterCall("VMSUMSHS", &PPUInterpreter::VMSUMSHS, vd, va, vb, vc);
|
||||
auto va_v8i16 = GetVrAsIntVec(va, 16);
|
||||
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
|
||||
auto vc_v4i32 = GetVrAsIntVec(vc, 32);
|
||||
auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16);
|
||||
|
||||
auto tmp1_v4i32 = m_ir_builder->CreateLShr(vc_v4i32, 31);
|
||||
tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF)));
|
||||
auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
|
||||
auto tmp2_v4i32 = m_ir_builder->CreateXor(vc_v4i32, res_v4i32);
|
||||
tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32);
|
||||
auto sum_v4i32 = m_ir_builder->CreateAdd(vc_v4i32, res_v4i32);
|
||||
auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
|
||||
auto tmp3_v4i32 = m_ir_builder->CreateXor(vc_v4i32, sum_v4i32);
|
||||
tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32);
|
||||
tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31);
|
||||
auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16));
|
||||
auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8);
|
||||
SetVr(vd, res_v16i8);
|
||||
|
||||
// TODO: Set VSCR.SAT
|
||||
}
|
||||
|
||||
void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) {
|
||||
@ -1074,7 +1093,31 @@ void Compiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) {
|
||||
}
|
||||
|
||||
void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) {
|
||||
InterpreterCall("VMSUMUHS", &PPUInterpreter::VMSUMUHS, vd, va, vb, vc);
|
||||
auto va_v8i16 = GetVrAsIntVec(va, 16);
|
||||
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
|
||||
auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
|
||||
auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8));
|
||||
auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32);
|
||||
|
||||
auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8));
|
||||
u32 mask1_v4i32[4] = {0, 2, 4, 6};
|
||||
auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32));
|
||||
u32 mask2_v4i32[4] = {1, 3, 5, 7};
|
||||
auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32));
|
||||
|
||||
auto vc_v4i32 = GetVrAsIntVec(vc, 32);
|
||||
auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32);
|
||||
auto cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, tmp1_v4i32);
|
||||
auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32);
|
||||
res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32);
|
||||
cmp_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vc_v4i32);
|
||||
cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32);
|
||||
|
||||
SetVr(vd, res_v4i32);
|
||||
|
||||
// TODO: Set VSCR.SAT
|
||||
}
|
||||
|
||||
void Compiler::VMULESB(u32 vd, u32 va, u32 vb) {
|
||||
@ -1204,7 +1247,37 @@ void Compiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) {
|
||||
}
|
||||
|
||||
void Compiler::VPKPX(u32 vd, u32 va, u32 vb) {
|
||||
InterpreterCall("VPKPX", &PPUInterpreter::VPKPX, vd, va, vb);
|
||||
auto va_v4i32 = GetVrAsIntVec(va, 32);
|
||||
auto vb_v4i32 = GetVrAsIntVec(vb, 32);
|
||||
|
||||
auto tmpa_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7)));
|
||||
tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000)));
|
||||
va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
|
||||
va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000)));
|
||||
tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32);
|
||||
tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000)));
|
||||
va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
|
||||
va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000)));
|
||||
tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32);
|
||||
auto tmpa_v8i16 = m_ir_builder->CreateBitCast(tmpa_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8));
|
||||
|
||||
auto tmpb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7)));
|
||||
tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000)));
|
||||
vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
|
||||
vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000)));
|
||||
tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32);
|
||||
tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000)));
|
||||
vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
|
||||
vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000)));
|
||||
tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32);
|
||||
auto tmpb_v8i16 = m_ir_builder->CreateBitCast(tmpb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8));
|
||||
|
||||
u32 mask_v8i32[8] = {1, 3, 5, 7, 9, 11, 13, 15};
|
||||
auto res_v8i16 = m_ir_builder->CreateShuffleVector(tmpb_v8i16, tmpa_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
|
||||
|
||||
SetVr(vd, res_v8i16);
|
||||
|
||||
// TODO: Implement with pext on CPUs with BMI
|
||||
}
|
||||
|
||||
void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) {
|
||||
@ -1669,27 +1742,69 @@ void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) {
|
||||
}
|
||||
|
||||
void Compiler::VUPKHPX(u32 vd, u32 vb) {
|
||||
InterpreterCall("VUPKHPX", &PPUInterpreter::VUPKHPX, vd, vb);
|
||||
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
|
||||
u32 mask_v8i32[8] = { 4, 4, 5, 5, 6, 6, 7, 7 };
|
||||
vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
|
||||
|
||||
auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
|
||||
auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
|
||||
tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00)));
|
||||
auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6)));
|
||||
tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F)));
|
||||
auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000)));
|
||||
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32);
|
||||
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32);
|
||||
SetVr(vd, res_v4i32);
|
||||
}
|
||||
|
||||
void Compiler::VUPKHSB(u32 vd, u32 vb) {
|
||||
InterpreterCall("VUPKHSB", &PPUInterpreter::VUPKHSB, vd, vb);
|
||||
auto vb_v16i8 = GetVrAsIntVec(vb, 8);
|
||||
u32 mask_v8i32[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
|
||||
auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8));
|
||||
SetVr(vd, res_v8i16);
|
||||
}
|
||||
|
||||
void Compiler::VUPKHSH(u32 vd, u32 vb) {
|
||||
InterpreterCall("VUPKHSH", &PPUInterpreter::VUPKHSH, vd, vb);
|
||||
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
|
||||
u32 mask_v4i32[4] = { 4, 5, 6, 7 };
|
||||
auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32));
|
||||
auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
SetVr(vd, res_v4i32);
|
||||
}
|
||||
|
||||
void Compiler::VUPKLPX(u32 vd, u32 vb) {
|
||||
InterpreterCall("VUPKLPX", &PPUInterpreter::VUPKLPX, vd, vb);
|
||||
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
|
||||
u32 mask_v8i32[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
|
||||
vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
|
||||
|
||||
auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10)));
|
||||
auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3)));
|
||||
tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00)));
|
||||
auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6)));
|
||||
tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F)));
|
||||
auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000)));
|
||||
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32);
|
||||
res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32);
|
||||
SetVr(vd, res_v4i32);
|
||||
}
|
||||
|
||||
void Compiler::VUPKLSB(u32 vd, u32 vb) {
|
||||
InterpreterCall("VUPKLSB", &PPUInterpreter::VUPKLSB, vd, vb);
|
||||
auto vb_v16i8 = GetVrAsIntVec(vb, 8);
|
||||
u32 mask_v8i32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
|
||||
auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32));
|
||||
auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8));
|
||||
SetVr(vd, res_v8i16);
|
||||
}
|
||||
|
||||
void Compiler::VUPKLSH(u32 vd, u32 vb) {
|
||||
InterpreterCall("VUPKLSH", &PPUInterpreter::VUPKLSH, vd, vb);
|
||||
auto vb_v8i16 = GetVrAsIntVec(vb, 16);
|
||||
u32 mask_v4i32[4] = { 0, 1, 2, 3 };
|
||||
auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32));
|
||||
auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4));
|
||||
SetVr(vd, res_v4i32);
|
||||
}
|
||||
|
||||
void Compiler::VXOR(u32 vd, u32 va, u32 vb) {
|
||||
@ -5250,9 +5365,9 @@ std::shared_ptr<RecompilationEngine> RecompilationEngine::s_the_instance = nullp
|
||||
|
||||
RecompilationEngine::RecompilationEngine()
|
||||
: ThreadBase("PPU Recompilation Engine")
|
||||
, m_log(nullptr)
|
||||
, m_next_ordinal(0)
|
||||
, m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn)
|
||||
, m_log(nullptr) {
|
||||
, m_compiler(*this, ExecutionEngine::ExecuteFunction, ExecutionEngine::ExecuteTillReturn) {
|
||||
m_compiler.RunAllTests();
|
||||
}
|
||||
|
||||
|
@ -1022,6 +1022,9 @@ namespace ppu_recompiler_llvm {
|
||||
};
|
||||
};
|
||||
|
||||
/// Log
|
||||
llvm::raw_fd_ostream * m_log;
|
||||
|
||||
/// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue.
|
||||
std::mutex m_pending_execution_traces_lock;
|
||||
|
||||
@ -1047,9 +1050,6 @@ namespace ppu_recompiler_llvm {
|
||||
/// PPU Compiler
|
||||
Compiler m_compiler;
|
||||
|
||||
/// Log
|
||||
llvm::raw_fd_ostream * m_log;
|
||||
|
||||
/// Executable lookup table
|
||||
Executable m_executable_lookup[10000]; // TODO: Adjust size
|
||||
|
||||
|
@ -432,9 +432,10 @@ void Compiler::RunAllTests() {
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLW, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMMBM, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHM, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHS, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHS, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESB, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESH, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUB, 0, 5, 0, 1, 2);
|
||||
@ -443,9 +444,11 @@ void Compiler::RunAllTests() {
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSH, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUB, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUH, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0, 1, 2, 3);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKPX, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHSS, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHUS, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWSS, 0, 5, 0, 1, 2);
|
||||
@ -494,6 +497,12 @@ void Compiler::RunAllTests() {
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0, 1, 2);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHPX, 0, 5, 0, 1);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSB, 0, 5, 0, 1);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSH, 0, 5, 0, 1);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLPX, 0, 5, 0, 1);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSB, 0, 5, 0, 1);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSH, 0, 5, 0, 1);
|
||||
VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VXOR, 0, 5, 0, 1, 2);
|
||||
// TODO: Rest of the vector instructions
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user