mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-31 20:41:45 +01:00
SPU LLVM: Optimize GB/GBH/GBB with a GFNI path
- Abuses GFNI to extract bits from bytes, from 5->2 instructions in most cases
This commit is contained in:
parent
0140925e65
commit
d1bea790f3
@ -154,6 +154,16 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||
m_use_vnni = true;
|
||||
}
|
||||
|
||||
// Test GFNI feature (TODO)
|
||||
if (cpu == "tremont" ||
|
||||
cpu == "gracemont" ||
|
||||
cpu == "alderlake" ||
|
||||
cpu == "raptorlake" ||
|
||||
cpu == "meteorlake")
|
||||
{
|
||||
m_use_gfni = true;
|
||||
}
|
||||
|
||||
// Test AVX-512_icelake features (TODO)
|
||||
if (cpu == "icelake" ||
|
||||
cpu == "icelake-client" ||
|
||||
@ -168,6 +178,7 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||
m_use_avx512 = true;
|
||||
m_use_avx512_icl = true;
|
||||
m_use_vnni = true;
|
||||
m_use_gfni = true;
|
||||
}
|
||||
|
||||
// Aarch64 CPUs
|
||||
|
@ -2971,6 +2971,9 @@ protected:
|
||||
// Allow VNNI
|
||||
bool m_use_vnni = false;
|
||||
|
||||
// Allow GFNI
|
||||
bool m_use_gfni = false;
|
||||
|
||||
// Allow Icelake tier AVX-512
|
||||
bool m_use_avx512_icl = false;
|
||||
|
||||
|
@ -8134,6 +8134,18 @@ public:
|
||||
|
||||
void GB(spu_opcode_t op)
|
||||
{
|
||||
// GFNI trick to extract selected bit from bytes
|
||||
// By treating the first input as constant, and the second input as variable,
|
||||
// with only 1 bit set in our constant, gf2p8affineqb will extract that selected bit
|
||||
// from each byte of the second operand
|
||||
if (m_use_gfni)
|
||||
{
|
||||
const auto a = get_vr<u8[16]>(op.ra);
|
||||
const auto as = zshuffle(a, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 12, 8, 4, 0);
|
||||
set_vr(op.rt, gf2p8affineqb(build<u8[16]>(0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0), as, 0x0));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto a = get_vr<s32[4]>(op.ra);
|
||||
const auto m = zext<u32>(bitcast<i4>(trunc<bool[4]>(a)));
|
||||
set_vr(op.rt, insert(splat<u32[4]>(0), 3, eval(m)));
|
||||
@ -8141,6 +8153,14 @@ public:
|
||||
|
||||
void GBH(spu_opcode_t op)
|
||||
{
|
||||
if (m_use_gfni)
|
||||
{
|
||||
const auto a = get_vr<u8[16]>(op.ra);
|
||||
const auto as = zshuffle(a, 16, 16, 16, 16, 16, 16, 16, 16, 14, 12, 10, 8, 6, 4, 2, 0);
|
||||
set_vr(op.rt, gf2p8affineqb(build<u8[16]>(0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0), as, 0x0));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto a = get_vr<s16[8]>(op.ra);
|
||||
const auto m = zext<u32>(bitcast<u8>(trunc<bool[8]>(a)));
|
||||
set_vr(op.rt, insert(splat<u32[4]>(0), 3, eval(m)));
|
||||
@ -8148,7 +8168,16 @@ public:
|
||||
|
||||
void GBB(spu_opcode_t op)
|
||||
{
|
||||
const auto a = get_vr<s8[16]>(op.ra);
|
||||
const auto a = get_vr<u8[16]>(op.ra);
|
||||
|
||||
if (m_use_gfni)
|
||||
{
|
||||
const auto as = zshuffle(a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
|
||||
const auto m = gf2p8affineqb(build<u8[16]>(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x01, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0), as, 0x0);
|
||||
set_vr(op.rt, zshuffle(m, 16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto m = zext<u32>(bitcast<u16>(trunc<bool[16]>(a)));
|
||||
set_vr(op.rt, insert(splat<u32[4]>(0), 3, eval(m)));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user