mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
SPU LLVM: rewrite comparison on non-xfloat path of CFLTU, CFLTS
CFLTU on non-xfloat path is accurate as xfloat path now. * Also optimize FCTIW like FCTIWZ (PPU)
This commit is contained in:
parent
ef8872c0d7
commit
923cd7ad72
@ -4832,9 +4832,9 @@ bool ppu_interpreter::FCMPU(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const f64 b = ppu.fpr[op.frb];
|
||||
const s32 res = b >= f64(INT32_MAX) ? INT32_MAX : _mm_cvtsd_si32(_mm_load_sd(&b));
|
||||
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(res);
|
||||
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
|
||||
const auto res = _mm_xor_si128(_mm_cvtpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000))));
|
||||
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsi128_si32(res));
|
||||
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
||||
return true;
|
||||
}
|
||||
|
@ -4004,8 +4004,10 @@ void PPUTranslator::FRSP(ppu_opcode_t op)
|
||||
void PPUTranslator::FCTIW(ppu_opcode_t op)
|
||||
{
|
||||
const auto b = GetFpr(op.frb);
|
||||
SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), f64(INT32_MAX))), m_ir->getInt32(INT32_MAX),
|
||||
Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
|
||||
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s32>());
|
||||
|
||||
// fix result saturation (0x80000000 -> 0x7fffffff)
|
||||
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
|
||||
|
||||
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fr", b));
|
||||
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fi", b));
|
||||
|
@ -7509,7 +7509,7 @@ public:
|
||||
|
||||
value_t<s32[4]> r;
|
||||
r.value = m_ir->CreateFPToSI(a.value, get_type<s32[4]>());
|
||||
set_vr(op.rt, r ^ sext<s32[4]>(fcmp_ord(a >= fsplat<f32[4]>(std::exp2(31.f)))));
|
||||
set_vr(op.rt, r ^ sext<s32[4]>(bitcast<s32[4]>(a) > splat<s32[4]>(((31 + 127) << 23) - 1)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -7568,7 +7568,7 @@ public:
|
||||
}
|
||||
|
||||
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
|
||||
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
|
||||
set_vr(op.rt, select(fcmp_ord(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -7583,7 +7583,7 @@ public:
|
||||
|
||||
value_t<s32[4]> r;
|
||||
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
|
||||
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f32[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
|
||||
set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user