From a560498cd4d18b4dff820c35f9cb3a5f00fbf630 Mon Sep 17 00:00:00 2001 From: Eladash Date: Wed, 31 Jul 2019 20:35:08 +0300 Subject: [PATCH] ppu: Improve FCTIW, FCTIWZ, FCTID and FCTIDZ --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 16 ++++++++++++---- rpcs3/Emu/Cell/PPUTranslator.cpp | 26 ++++++++++++++------------ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 98428b20ea..5b7bf8c1c5 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -4735,14 +4735,18 @@ bool ppu_interpreter::FRSP(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTIW(ppu_thread& ppu, ppu_opcode_t op) { - ppu.fpr[op.frd] = std::bit_cast(_mm_cvtsd_si32(_mm_load_sd(&ppu.fpr[op.frb]))); + const f64 b = ppu.fpr[op.frb]; + const s32 res = b >= f64(INT32_MAX) ? INT32_MAX : _mm_cvtsd_si32(_mm_load_sd(&b)); + ppu.fpr[op.frd] = std::bit_cast(res); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } bool ppu_interpreter::FCTIWZ(ppu_thread& ppu, ppu_opcode_t op) { - ppu.fpr[op.frd] = std::bit_cast(_mm_cvttsd_si32(_mm_load_sd(&ppu.fpr[op.frb]))); + const auto b = _mm_load_sd(&ppu.fpr[op.frb]); + const auto res = _mm_xor_si128(_mm_cvttpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000)))); + ppu.fpr[op.frd] = std::bit_cast(_mm_extract_epi32(res, 0)); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } @@ -4859,14 +4863,18 @@ bool ppu_interpreter::FABS(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::FCTID(ppu_thread& ppu, ppu_opcode_t op) { - ppu.fpr[op.frd] = std::bit_cast(_mm_cvtsd_si64(_mm_load_sd(&ppu.fpr[op.frb]))); + const auto b = _mm_load_sd(&ppu.fpr[op.frb]); + const auto res = _mm_xor_si128(_mm_set1_epi64x(_mm_cvtsd_si64(b)), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(f64(1ull << 63))))); + ppu.fpr[op.frd] = std::bit_cast(_mm_extract_epi64(res, 0)); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } bool ppu_interpreter::FCTIDZ(ppu_thread& ppu, ppu_opcode_t op) { - ppu.fpr[op.frd] = std::bit_cast(_mm_cvttsd_si64(_mm_load_sd(&ppu.fpr[op.frb]))); + const auto b = _mm_load_sd(&ppu.fpr[op.frb]); + const auto res = _mm_xor_si128(_mm_set1_epi64x(_mm_cvtsd_si64(b)), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(f64(1ull << 63))))); + ppu.fpr[op.frd] = std::bit_cast(_mm_extract_epi64(res, 0)); if (UNLIKELY(op.rc)) fmt::throw_exception("%s: op.rc", __func__); //ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu); return true; } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 9fefa7bd17..30d360a47a 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -4004,11 +4004,8 @@ void PPUTranslator::FRSP(ppu_opcode_t op) void PPUTranslator::FCTIW(ppu_opcode_t op) { const auto b = GetFpr(op.frb); - //const auto sat_l = m_ir->CreateFCmpULT(b, ConstantFP::get(GetType(), -std::pow(2, 31))); // TODO ??? - //const auto sat_h = m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::pow(2, 31))); - //const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, b), GetType()); - //SetFpr(op.frd, m_ir->CreateSelect(sat_h, m_ir->getInt64(0x7fffffff), converted)); - SetFpr(op.frd, Call(GetType(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef(), b, uint64_t{0}))); + SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), f64(INT32_MAX))), m_ir->getInt32(INT32_MAX), + Call(GetType(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef(), b, u64{0})))); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fctiw_get_fr", b)); //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fctiw_get_fi", b)); @@ -4021,7 +4018,10 @@ void PPUTranslator::FCTIW(ppu_opcode_t op) void PPUTranslator::FCTIWZ(ppu_opcode_t op) { const auto b = GetFpr(op.frb); - SetFpr(op.frd, Call(GetType(), "llvm.x86.sse2.cvttsd2si", m_ir->CreateInsertElement(GetUndef(), b, uint64_t{0}))); + const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::exp2l(31.))), GetType()); + + // fix result saturation (0x80000000 -> 0x7fffffff) + SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType(), "llvm.x86.sse2.cvttsd2si", m_ir->CreateInsertElement(GetUndef(), b, u64{0})))); } void PPUTranslator::FDIV(ppu_opcode_t op) @@ -4248,11 +4248,10 @@ void PPUTranslator::FABS(ppu_opcode_t op) void PPUTranslator::FCTID(ppu_opcode_t op) { const auto b = GetFpr(op.frb); - //const auto sat_l = m_ir->CreateFCmpULT(b, ConstantFP::get(GetType(), -std::pow(2, 63))); - //const auto sat_h = m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::pow(2, 63))); - //const auto converted = m_ir->CreateFPToSI(FP_SAT_OP(sat_l, b), GetType()); - //SetFpr(op.frd, m_ir->CreateSelect(sat_h, m_ir->getInt64(0x7fffffffffffffff), converted)); - SetFpr(op.frd, Call(GetType(), "llvm.x86.sse2.cvtsd2si64", m_ir->CreateInsertElement(GetUndef(), b, uint64_t{0}))); + const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::exp2l(31.))), GetType()); + + // fix result saturation (0x8000000000000000 -> 0x7fffffffffffffff) + SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType(), "llvm.x86.sse2.cvtsd2si64", m_ir->CreateInsertElement(GetUndef(), b, u64{0})))); //SetFPSCR_FR(Call(GetType(), m_pure_attr, "__fctid_get_fr", b)); //SetFPSCR_FI(Call(GetType(), m_pure_attr, "__fctid_get_fi", b)); @@ -4265,7 +4264,10 @@ void PPUTranslator::FCTID(ppu_opcode_t op) void PPUTranslator::FCTIDZ(ppu_opcode_t op) { const auto b = GetFpr(op.frb); - SetFpr(op.frd, Call(GetType(), "llvm.x86.sse2.cvttsd2si64", m_ir->CreateInsertElement(GetUndef(), b, uint64_t{0}))); + const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType(), std::exp2l(31.))), GetType()); + + // fix result saturation (0x8000000000000000 -> 0x7fffffffffffffff) + SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType(), "llvm.x86.sse2.cvttsd2si64", m_ir->CreateInsertElement(GetUndef(), b, u64{0})))); } void PPUTranslator::FCFID(ppu_opcode_t op)