mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-26 04:32:35 +01:00
SPU LLVM: Microfixes
- Avoid vpermb path in shufb when op.ra == op.rb - Reverse indices with (c ^ 0xf) rather than (~c) in vpermb path, vpternlogd is a 3 input operation and requires needless mov instructions to avoid destroying inputs
This commit is contained in:
parent
004d9b09b8
commit
64616f1408
@ -7695,7 +7695,7 @@ public:
|
||||
if (auto [ok, bs] = match_expr(b, byteswap(match<u8[16]>())); ok)
|
||||
{
|
||||
// Undo endian swapping, and rely on pshufb/vperm2b to re-reverse endianness
|
||||
if (m_use_avx512_icl)
|
||||
if (m_use_avx512_icl && (op.ra != op.rb))
|
||||
{
|
||||
if (perm_only)
|
||||
{
|
||||
@ -7757,19 +7757,19 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (m_use_avx512_icl)
|
||||
if (m_use_avx512_icl && (op.ra != op.rb))
|
||||
{
|
||||
if (perm_only)
|
||||
{
|
||||
set_vr(op.rt4, vperm2b256to128(b, a, eval(~c)));
|
||||
set_vr(op.rt4, vperm2b256to128(a, b, eval(c ^ 0xf)));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
|
||||
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
|
||||
const auto cr = eval(~c);
|
||||
const auto ab = vperm2b256to128(b, a, cr);
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(cr) >= 0, mm, ab));
|
||||
const auto cr = eval(c ^ 0xf);
|
||||
const auto ab = vperm2b256to128(a, b, cr);
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
|
||||
return;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user