mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
SPU LLVM: Add optimized path for spu_re_acc special cases
- Uses vfixupimmps (AVX-512), 5 instructions down to 1
This commit is contained in:
parent
4832267307
commit
8ca60df1ab
@ -3729,6 +3729,22 @@ public:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename T3>
|
||||||
|
value_t<f32[4]> vfixupimmps(T1 a, T2 b, T3 c, u8 d, u8 e)
|
||||||
|
{
|
||||||
|
value_t<f32[4]> result;
|
||||||
|
|
||||||
|
const auto data0 = a.eval(m_ir);
|
||||||
|
const auto data1 = b.eval(m_ir);
|
||||||
|
const auto data2 = c.eval(m_ir);
|
||||||
|
const auto immediate = (llvm_const_int<u32>{d});
|
||||||
|
const auto imm32 = immediate.eval(m_ir);
|
||||||
|
const auto immediate2 = (llvm_const_int<u8>{e});
|
||||||
|
const auto imm8 = immediate2.eval(m_ir);
|
||||||
|
result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8});\
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i, llvm::Type* type = nullptr)
|
llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i, llvm::Type* type = nullptr)
|
||||||
{
|
{
|
||||||
return m_ir->CreateLoad(type ? type : g->getValueType(), m_ir->CreateGEP(g->getValueType(), g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type<u64>())}));
|
return m_ir->CreateLoad(type ? type : g->getValueType(), m_ir->CreateGEP(g->getValueType(), g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type<u64>())}));
|
||||||
|
@ -6704,8 +6704,22 @@ public:
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci)
|
if (m_use_avx512)
|
||||||
{
|
{
|
||||||
|
register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci)
|
||||||
|
{
|
||||||
|
const auto div = value<f32[4]>(ci->getOperand(0));
|
||||||
|
const auto the_one = value<f32[4]>(ci->getOperand(1));
|
||||||
|
|
||||||
|
const auto div_result = the_one / div;
|
||||||
|
|
||||||
|
return vfixupimmps(div_result, div_result, splat<u32[4]>(0x00330088u), 0, 0xff);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci)
|
||||||
|
{
|
||||||
const auto div = value<f32[4]>(ci->getOperand(0));
|
const auto div = value<f32[4]>(ci->getOperand(0));
|
||||||
const auto the_one = value<f32[4]>(ci->getOperand(1));
|
const auto the_one = value<f32[4]>(ci->getOperand(1));
|
||||||
|
|
||||||
@ -6718,10 +6732,11 @@ public:
|
|||||||
|
|
||||||
const auto and_mask = bitcast<u32[4]>(result_cmp_nan) & splat<u32[4]>(0xFFFFFFFFu);
|
const auto and_mask = bitcast<u32[4]>(result_cmp_nan) & splat<u32[4]>(0xFFFFFFFFu);
|
||||||
const auto or_mask = bitcast<u32[4]>(result_cmp_inf) & splat<u32[4]>(0xFFFFFFFu);
|
const auto or_mask = bitcast<u32[4]>(result_cmp_inf) & splat<u32[4]>(0xFFFFFFFu);
|
||||||
|
|
||||||
return bitcast<f32[4]>((bitcast<u32[4]>(div_result) & and_mask) | or_mask);
|
return bitcast<f32[4]>((bitcast<u32[4]>(div_result) & and_mask) | or_mask);
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const auto [a, b, c] = get_vrs<f32[4]>(op.ra, op.rb, op.rc);
|
const auto [a, b, c] = get_vrs<f32[4]>(op.ra, op.rb, op.rc);
|
||||||
static const auto MT = match<f32[4]>();
|
static const auto MT = match<f32[4]>();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user