From b1b67a13c62d18c7fa0e153267e1f95c445f8fb0 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 14 Apr 2020 19:41:31 +0300 Subject: [PATCH] Revert "Replace rotate utils with std::rotl" (partial) This reverts commit 4d8bfe328bdb5e0b988e300e44269f759044ad82. --- Utilities/asm.h | 127 +++++++++++++++++++++++++ Utilities/cfmt.h | 1 - Utilities/cond.h | 1 - rpcs3/Emu/Cell/PPUAnalyser.cpp | 32 +++---- rpcs3/Emu/Cell/PPUInterpreter.cpp | 24 ++--- rpcs3/Emu/Cell/PPUOpcodes.h | 2 +- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 3 +- rpcs3/Emu/Cell/SPUInterpreter.cpp | 5 +- rpcs3/Emu/Cell/SPUThread.cpp | 29 +++--- 9 files changed, 176 insertions(+), 48 deletions(-) diff --git a/Utilities/asm.h b/Utilities/asm.h index 4aaa10b595..951dd2fd2a 100644 --- a/Utilities/asm.h +++ b/Utilities/asm.h @@ -7,6 +7,94 @@ namespace utils // Rotate helpers #if defined(__GNUG__) + inline u8 rol8(u8 x, u8 n) + { +#if __has_builtin(__builtin_rotateleft8) + return __builtin_rotateleft8(x, n); +#else + u8 result = x; + __asm__("rolb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + + inline u8 ror8(u8 x, u8 n) + { +#if __has_builtin(__builtin_rotateright8) + return __builtin_rotateright8(x, n); +#else + u8 result = x; + __asm__("rorb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + + inline u16 rol16(u16 x, u16 n) + { +#if __has_builtin(__builtin_rotateleft16) + return __builtin_rotateleft16(x, n); +#else + u16 result = x; + __asm__("rolw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + + inline u16 ror16(u16 x, u16 n) + { +#if __has_builtin(__builtin_rotateright16) + return __builtin_rotateright16(x, n); +#else + u16 result = x; + __asm__("rorw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + + inline u32 rol32(u32 x, u32 n) + { +#if __has_builtin(__builtin_rotateleft32) + return __builtin_rotateleft32(x, n); +#else + u32 result = x; + __asm__("roll %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + + inline u32 ror32(u32 x, u32 n) + { +#if __has_builtin(__builtin_rotateright32) + return __builtin_rotateright32(x, n); +#else + u32 result = x; + __asm__("rorl %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + + inline u64 rol64(u64 x, u64 n) + { +#if __has_builtin(__builtin_rotateleft64) + return __builtin_rotateleft64(x, n); +#else + u64 result = x; + __asm__("rolq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + + inline u64 ror64(u64 x, u64 n) + { +#if __has_builtin(__builtin_rotateright64) + return __builtin_rotateright64(x, n); +#else + u64 result = x; + __asm__("rorq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; +#endif + } + constexpr u64 umulh64(u64 a, u64 b) { const __uint128_t x = a; @@ -48,6 +136,45 @@ namespace utils } #elif defined(_MSC_VER) + inline u8 rol8(u8 x, u8 n) + { + return _rotl8(x, n); + } + + inline u8 ror8(u8 x, u8 n) + { + return _rotr8(x, n); + } + + inline u16 rol16(u16 x, u16 n) + { + return _rotl16(x, (u8)n); + } + + inline u16 ror16(u16 x, u16 n) + { + return _rotr16(x, (u8)n); + } + + inline u32 rol32(u32 x, u32 n) + { + return _rotl(x, (int)n); + } + + inline u32 ror32(u32 x, u32 n) + { + return _rotr(x, (int)n); + } + + inline u64 rol64(u64 x, u64 n) + { + return _rotl64(x, (int)n); + } + + inline u64 ror64(u64 x, u64 n) + { + return _rotr64(x, (int)n); + } inline u64 umulh64(u64 x, u64 y) { diff --git a/Utilities/cfmt.h b/Utilities/cfmt.h index 04d826b7b1..50e32e4a71 100644 --- a/Utilities/cfmt.h +++ b/Utilities/cfmt.h @@ -1,7 +1,6 @@ #pragma once #include "types.h" -#include "asm.h" #include #include #include diff --git a/Utilities/cond.h b/Utilities/cond.h index 7f151c032f..496b9ada96 100644 --- a/Utilities/cond.h +++ b/Utilities/cond.h @@ -3,7 +3,6 @@ #include "types.h" #include "util/atomic.hpp" #include -#include "asm.h" // Lightweight condition variable class cond_variable diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index d2353733de..259d3aa723 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -2245,14 +2245,14 @@ void ppu_acontext::RLWIMI(ppu_opcode_t op) if (op.mb32 <= op.me32) { // 32-bit op, including mnemonics: INSLWI, INSRWI (TODO) - min = std::rotl(static_cast(min), op.sh32) & mask; - max = std::rotl(static_cast(max), op.sh32) & mask; + min = utils::rol32(static_cast(min), op.sh32) & mask; + max = utils::rol32(static_cast(max), op.sh32) & mask; } else { // Full 64-bit op with duplication - min = std::rotl(static_cast(min) | min << 32, op.sh32) & mask; - max = std::rotl(static_cast(max) | max << 32, op.sh32) & mask; + min = utils::rol64(static_cast(min) | min << 32, op.sh32) & mask; + max = utils::rol64(static_cast(max) | max << 32, op.sh32) & mask; } if (mask != umax) @@ -2301,14 +2301,14 @@ void ppu_acontext::RLWINM(ppu_opcode_t op) // EXTRWI and other possible mnemonics } - min = std::rotl(static_cast(min), op.sh32) & mask; - max = std::rotl(static_cast(max), op.sh32) & mask; + min = utils::rol32(static_cast(min), op.sh32) & mask; + max = utils::rol32(static_cast(max), op.sh32) & mask; } else { // Full 64-bit op with duplication - min = std::rotl(static_cast(min) | min << 32, op.sh32) & mask; - max = std::rotl(static_cast(max) | max << 32, op.sh32) & mask; + min = utils::rol64(static_cast(min) | min << 32, op.sh32) & mask; + max = utils::rol64(static_cast(max) | max << 32, op.sh32) & mask; } gpr[op.ra] = spec_gpr::approx(min, max); @@ -2396,8 +2396,8 @@ void ppu_acontext::RLDICL(ppu_opcode_t op) return; } - min = std::rotl(min, sh) & mask; - max = std::rotl(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; gpr[op.ra] = spec_gpr::approx(min, max); } @@ -2425,8 +2425,8 @@ void ppu_acontext::RLDICR(ppu_opcode_t op) return; } - min = std::rotl(min, sh) & mask; - max = std::rotl(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; gpr[op.ra] = spec_gpr::approx(min, max); } @@ -2451,8 +2451,8 @@ void ppu_acontext::RLDIC(ppu_opcode_t op) return; } - min = std::rotl(min, sh) & mask; - max = std::rotl(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; gpr[op.ra] = spec_gpr::approx(min, max); } @@ -2474,8 +2474,8 @@ void ppu_acontext::RLDIMI(ppu_opcode_t op) // INSRDI mnemonic } - min = std::rotl(min, sh) & mask; - max = std::rotl(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; if (mask != umax) { diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 0b68195f09..47519bb4f2 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -1873,7 +1873,7 @@ bool ppu_interpreter::VRLB(ppu_thread& ppu, ppu_opcode_t op) for (uint i = 0; i < 16; i++) { - d._u8[i] = std::rotl(a._u8[i], b._u8[i]); + d._u8[i] = utils::rol8(a._u8[i], b._u8[i]); } return true; } @@ -1886,7 +1886,7 @@ bool ppu_interpreter::VRLH(ppu_thread& ppu, ppu_opcode_t op) for (uint i = 0; i < 8; i++) { - d._u16[i] = std::rotl(a._u16[i], b._u8[i * 2] & 0xf); + d._u16[i] = utils::rol16(a._u16[i], b._u8[i * 2] & 0xf); } return true; } @@ -1899,7 +1899,7 @@ bool ppu_interpreter::VRLW(ppu_thread& ppu, ppu_opcode_t op) for (uint w = 0; w < 4; w++) { - d._u32[w] = std::rotl(a._u32[w], b._u8[w * 4] & 0x1f); + d._u32[w] = utils::rol32(a._u32[w], b._u8[w * 4] & 0x1f); } return true; } @@ -3063,21 +3063,21 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::RLWIMI(ppu_thread& ppu, ppu_opcode_t op) { const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); - ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(std::rotl(static_cast(ppu.gpr[op.rs]), op.sh32)) & mask); + ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast(ppu.gpr[op.rs]), op.sh32)) & mask); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLWINM(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = dup32(std::rotl(static_cast(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); + ppu.gpr[op.ra] = dup32(utils::rol32(static_cast(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLWNM(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = dup32(std::rotl(static_cast(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); + ppu.gpr[op.ra] = dup32(utils::rol32(static_cast(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3122,21 +3122,21 @@ bool ppu_interpreter::ANDIS(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::RLDICL(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDICR(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63)); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63)); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDIC(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3144,21 +3144,21 @@ bool ppu_interpreter::RLDIC(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::RLDIMI(ppu_thread& ppu, ppu_opcode_t op) { const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); - ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (std::rotl(ppu.gpr[op.rs], op.sh64) & mask); + ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDCL(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDCR(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63)); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63)); if (op.rc) [[unlikely]] ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } diff --git a/rpcs3/Emu/Cell/PPUOpcodes.h b/rpcs3/Emu/Cell/PPUOpcodes.h index 42dde74580..4a34f846f7 100644 --- a/rpcs3/Emu/Cell/PPUOpcodes.h +++ b/rpcs3/Emu/Cell/PPUOpcodes.h @@ -63,7 +63,7 @@ union ppu_opcode_t constexpr u64 ppu_rotate_mask(u32 mb, u32 me) { - return std::rotr(~0ull << (~(me - mb) & 63), mb); + return std::rotr(~0ull << (~(me - mb) & 63), mb & 63); } constexpr u32 ppu_decode(u32 inst) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 8495cf79df..149a37a203 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -8,6 +8,7 @@ #include "SPUThread.h" #include "SPUInterpreter.h" #include "Utilities/sysinfo.h" +#include "Utilities/asm.h" #include "PPUAnalyser.h" #include "Crypto/sha1.h" @@ -3275,7 +3276,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op) } else if (s == 4 || s == 8 || s == 12) { - c->pshufd(va, va, std::rotl(0xE4, s / 2)); + c->pshufd(va, va, utils::rol8(0xE4, s / 2)); } else if (utils::has_ssse3()) { diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 38d0f1b5e8..2f26a73253 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -3,6 +3,7 @@ #include "Utilities/JIT.h" #include "Utilities/sysinfo.h" +#include "Utilities/asm.h" #include "SPUThread.h" #include "Emu/Cell/Common.h" @@ -231,7 +232,7 @@ bool spu_interpreter::ROT(spu_thread& spu, spu_opcode_t op) for (u32 i = 0; i < 4; i++) { - spu.gpr[op.rt]._u32[i] = std::rotl(a._u32[i], b._u32[i]); + spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]); } return true; } @@ -282,7 +283,7 @@ bool spu_interpreter::ROTH(spu_thread& spu, spu_opcode_t op) for (u32 i = 0; i < 8; i++) { - spu.gpr[op.rt]._u16[i] = std::rotl(a._u16[i], b._u16[i]); + spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]); } return true; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index dc70546624..98a08469e5 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "Utilities/JIT.h" +#include "Utilities/asm.h" #include "Utilities/sysinfo.h" #include "Emu/Memory/vm_ptr.h" #include "Emu/Memory/vm_reservation.h" @@ -1062,7 +1063,7 @@ std::string spu_thread::dump_misc() const { ret += '\n'; } - + fmt::append(ret, "\nWaiting: %fs", (get_system_time() - _time) / 1000000.); } @@ -1548,7 +1549,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args) bool spu_thread::do_dma_check(const spu_mfc_cmd& args) { - const u32 mask = std::rotl(1, args.tag); + const u32 mask = utils::rol32(1, args.tag); if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]] { @@ -1564,13 +1565,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args) if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD) { mfc_barrier |= -1; - mfc_fence |= std::rotl(1, mfc_queue[i].tag); + mfc_fence |= utils::rol32(1, mfc_queue[i].tag); continue; } if (true) { - const u32 _mask = std::rotl(1u, mfc_queue[i].tag); + const u32 _mask = utils::rol32(1u, mfc_queue[i].tag); // A command with barrier hard blocks that tag until it's been dealt with if (mfc_queue[i].cmd & MFC_BARRIER_MASK) @@ -1671,14 +1672,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args) if (items[index].sb & 0x8000) [[unlikely]] { - ch_stall_mask |= std::rotl(1, args.tag); + ch_stall_mask |= utils::rol32(1, args.tag); if (!ch_stall_stat.get_count()) { ch_event_stat |= SPU_EVENT_SN; } - ch_stall_stat.set_value(std::rotl(1, args.tag) | ch_stall_stat.get_value()); + ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value()); args.tag |= 0x80; // Set stalled status return false; @@ -1773,7 +1774,7 @@ void spu_thread::do_mfc(bool wait) static_cast(std::remove_if(mfc_queue + 0, mfc_queue + mfc_size, [&](spu_mfc_cmd& args) { // Select tag bit in the tag mask or the stall mask - const u32 mask = std::rotl(1, args.tag); + const u32 mask = utils::rol32(1, args.tag); if ((args.cmd & ~0xc) == MFC_BARRIER_CMD) { @@ -2130,7 +2131,7 @@ bool spu_thread::process_mfc_cmd() } case MFC_PUTQLLUC_CMD: { - const u32 mask = std::rotl(1, ch_mfc_cmd.tag); + const u32 mask = utils::rol32(1, ch_mfc_cmd.tag); if ((mfc_barrier | mfc_fence) & mask) [[unlikely]] { @@ -2178,11 +2179,11 @@ bool spu_thread::process_mfc_cmd() } mfc_queue[mfc_size++] = ch_mfc_cmd; - mfc_fence |= std::rotl(1, ch_mfc_cmd.tag); + mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag); if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK) { - mfc_barrier |= std::rotl(1, ch_mfc_cmd.tag); + mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag); } return true; @@ -2214,11 +2215,11 @@ bool spu_thread::process_mfc_cmd() } mfc_size++; - mfc_fence |= std::rotl(1, cmd.tag); + mfc_fence |= utils::rol32(1, cmd.tag); if (cmd.cmd & MFC_BARRIER_MASK) { - mfc_barrier |= std::rotl(1, cmd.tag); + mfc_barrier |= utils::rol32(1, cmd.tag); } return true; @@ -2238,7 +2239,7 @@ bool spu_thread::process_mfc_cmd() { mfc_queue[mfc_size++] = ch_mfc_cmd; mfc_barrier |= -1; - mfc_fence |= std::rotl(1, ch_mfc_cmd.tag); + mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag); } return true; @@ -2838,7 +2839,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) case MFC_WrListStallAck: { // Reset stall status for specified tag - const u32 tag_mask = std::rotl(1, value); + const u32 tag_mask = utils::rol32(1, value); if (ch_stall_mask & tag_mask) {