1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 10:42:36 +01:00

SPU LLVM: rewrite comparison on non-xfloat path of CFLTU, CFLTS

CFLTU on non-xfloat path is accurate as xfloat path now.
* Also optimize FCTIW like FCTIWZ (PPU)
This commit is contained in:
Eladash 2019-10-25 11:03:49 +03:00 committed by Ivan
parent ef8872c0d7
commit 923cd7ad72
3 changed files with 10 additions and 8 deletions

View File

@ -4832,9 +4832,9 @@ bool ppu_interpreter::FCMPU(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op)
{
const f64 b = ppu.fpr[op.frb];
const s32 res = b >= f64(INT32_MAX) ? INT32_MAX : _mm_cvtsd_si32(_mm_load_sd(&b));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(res);
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
const auto res = _mm_xor_si128(_mm_cvtpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000))));
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsi128_si32(res));
if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
return true;
}

View File

@ -4004,8 +4004,10 @@ void PPUTranslator::FRSP(ppu_opcode_t op)
void PPUTranslator::FCTIW(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), f64(INT32_MAX))), m_ir->getInt32(INT32_MAX),
Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s32>());
// fix result saturation (0x80000000 -> 0x7fffffff)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fi", b));

View File

@ -7509,7 +7509,7 @@ public:
value_t<s32[4]> r;
r.value = m_ir->CreateFPToSI(a.value, get_type<s32[4]>());
set_vr(op.rt, r ^ sext<s32[4]>(fcmp_ord(a >= fsplat<f32[4]>(std::exp2(31.f)))));
set_vr(op.rt, r ^ sext<s32[4]>(bitcast<s32[4]>(a) > splat<s32[4]>(((31 + 127) << 23) - 1)));
}
}
@ -7568,7 +7568,7 @@ public:
}
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
set_vr(op.rt, select(fcmp_ord(a >= fsplat<f64[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & sext<s32[4]>(fcmp_ord(a >= fsplat<f64[4]>(0.)))));
}
else
{
@ -7583,7 +7583,7 @@ public:
value_t<s32[4]> r;
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
set_vr(op.rt, select(fcmp_uno(a >= fsplat<f32[4]>(std::exp2(32.f))), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
}
}