From 349f251d146af388a29d64b3644eb6f00e7d52b3 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 20 Jan 2022 11:44:04 +0300 Subject: [PATCH] PPU LLVM: use masked stores for STVLX/STVRX Drop maskmove intrinsic, not portable. Its implicit NT hint may also hurt performance. --- rpcs3/Emu/Cell/PPUTranslator.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 1b0c80aeac..5a0edd23fd 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -3319,9 +3319,10 @@ void PPUTranslator::STVLX(ppu_opcode_t op) { const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); const auto data = pshufb(get_vr(op.vs), build(127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112) + vsplat(trunc(value(addr) & 0xf))); - const auto mask = sext(bitcast(splat(0xffff) << trunc(value(addr) & 0xf))); - const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType())); - eval(llvm_calli{"llvm.x86.sse2.maskmov.dqu", {data, mask, ptr}}); + const auto mask = bitcast(splat(0xffff) << trunc(value(addr) & 0xf)); + const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType())); + const auto align = splat(16); + eval(llvm_calli{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}}); } void PPUTranslator::STDBRX(ppu_opcode_t op) @@ -3348,9 +3349,10 @@ void PPUTranslator::STVRX(ppu_opcode_t op) { const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb); const auto data = pshufb(get_vr(op.vs), build(255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240) + vsplat(trunc(value(addr) & 0xf))); - const auto mask = sext(bitcast(trunc(splat(0xffff) << (value(addr) & 0xf) >> 16))); - const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType())); - eval(llvm_calli{"llvm.x86.sse2.maskmov.dqu", {data, mask, ptr}}); + const auto mask = bitcast(trunc(splat(0xffff) << (value(addr) & 0xf) >> 16)); + const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType())); + const auto align = splat(16); + eval(llvm_calli{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}}); } void PPUTranslator::STFSUX(ppu_opcode_t op)