From ed9fb8405b149f81081b5ebfca70931df9bb1bd0 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 5 Sep 2018 19:57:52 +0300 Subject: [PATCH] Move rotate/cntlz/cnttz helpers to Utilities/asm.h --- Utilities/asm.h | 171 +++++++++++++++++++++++++ Utilities/cfmt.h | 15 ++- Utilities/types.h | 126 ------------------ rpcs3/Emu/Cell/Modules/cellGcmSys.cpp | 1 - rpcs3/Emu/Cell/Modules/cellSpurs.cpp | 6 +- rpcs3/Emu/Cell/Modules/cellSync.cpp | 6 +- rpcs3/Emu/Cell/Modules/sys_libc.cpp | 3 - rpcs3/Emu/Cell/PPUAnalyser.cpp | 40 +++--- rpcs3/Emu/Cell/PPUAnalyser.h | 4 +- rpcs3/Emu/Cell/PPUInterpreter.cpp | 39 +++--- rpcs3/Emu/Cell/PPUOpcodes.h | 5 +- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 5 +- rpcs3/Emu/Cell/SPUInterpreter.cpp | 8 +- rpcs3/Emu/Memory/vm.cpp | 5 +- rpcs3/Emu/RSX/rsx_utils.h | 5 +- 15 files changed, 242 insertions(+), 197 deletions(-) create mode 100644 Utilities/asm.h diff --git a/Utilities/asm.h b/Utilities/asm.h new file mode 100644 index 0000000000..835b70bb60 --- /dev/null +++ b/Utilities/asm.h @@ -0,0 +1,171 @@ +#pragma once + +#include "types.h" + +namespace utils +{ + inline u32 cntlz32(u32 arg, bool nonzero = false) + { +#ifdef _MSC_VER + ulong res; + return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32; +#else + return arg || nonzero ? __builtin_clz(arg) : 32; +#endif + } + + inline u64 cntlz64(u64 arg, bool nonzero = false) + { +#ifdef _MSC_VER + ulong res; + return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64; +#else + return arg || nonzero ? __builtin_clzll(arg) : 64; +#endif + } + + inline u32 cnttz32(u32 arg, bool nonzero = false) + { +#ifdef _MSC_VER + ulong res; + return _BitScanForward(&res, arg) || nonzero ? res : 32; +#else + return arg || nonzero ? __builtin_ctz(arg) : 32; +#endif + } + + inline u64 cnttz64(u64 arg, bool nonzero = false) + { +#ifdef _MSC_VER + ulong res; + return _BitScanForward64(&res, arg) || nonzero ? res : 64; +#else + return arg || nonzero ? __builtin_ctzll(arg) : 64; +#endif + } + +// Rotate helpers +#if defined(__GNUG__) + + inline u8 rol8(u8 x, u8 n) + { + u8 result = x; + __asm__("rolb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u8 ror8(u8 x, u8 n) + { + u8 result = x; + __asm__("rorb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u16 rol16(u16 x, u16 n) + { + u16 result = x; + __asm__("rolw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u16 ror16(u16 x, u16 n) + { + u16 result = x; + __asm__("rorw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u32 rol32(u32 x, u32 n) + { + u32 result = x; + __asm__("roll %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u32 ror32(u32 x, u32 n) + { + u32 result = x; + __asm__("rorl %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u64 rol64(u64 x, u64 n) + { + u64 result = x; + __asm__("rolq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u64 ror64(u64 x, u64 n) + { + u64 result = x; + __asm__("rorq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n)); + return result; + } + + inline u64 umulh64(u64 a, u64 b) + { + u64 result; + __asm__("mulq %[b]" : "=d"(result) : [a] "a"(a), [b] "rm"(b)); + return result; + } + + inline s64 mulh64(s64 a, s64 b) + { + s64 result; + __asm__("imulq %[b]" : "=d"(result) : [a] "a"(a), [b] "rm"(b)); + return result; + } + +#elif defined(_MSC_VER) + inline u8 rol8(u8 x, u8 n) + { + return _rotl8(x, n); + } + + inline u8 ror8(u8 x, u8 n) + { + return _rotr8(x, n); + } + + inline u16 rol16(u16 x, u16 n) + { + return _rotl16(x, (u8)n); + } + + inline u16 ror16(u16 x, u16 n) + { + return _rotr16(x, (u8)n); + } + + inline u32 rol32(u32 x, u32 n) + { + return _rotl(x, (int)n); + } + + inline u32 ror32(u32 x, u32 n) + { + return _rotr(x, (int)n); + } + + inline u64 rol64(u64 x, u64 n) + { + return _rotl64(x, (int)n); + } + + inline u64 ror64(u64 x, u64 n) + { + return _rotr64(x, (int)n); + } + + inline u64 umulh64(u64 x, u64 y) + { + return __umulh(x, y); + } + + inline s64 mulh64(s64 x, s64 y) + { + return __mulh(x, y); + } +#endif +} // namespace utils diff --git a/Utilities/cfmt.h b/Utilities/cfmt.h index 90de3c9a06..da58eeec95 100644 --- a/Utilities/cfmt.h +++ b/Utilities/cfmt.h @@ -1,6 +1,7 @@ #pragma once #include "types.h" +#include "asm.h" #include #include #include @@ -56,7 +57,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) const auto write_octal = [&](u64 value, u64 min_num) { - out.resize(out.size() + std::max(min_num, 66 / 3 - (cntlz64(value | 1, true) + 2) / 3), '0'); + out.resize(out.size() + std::max(min_num, 66 / 3 - (utils::cntlz64(value | 1, true) + 2) / 3), '0'); // Write in reversed order for (auto i = out.rbegin(); value; i++, value /= 8) @@ -67,8 +68,8 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) const auto write_hex = [&](u64 value, bool upper, u64 min_num) { - out.resize(out.size() + std::max(min_num, 64 / 4 - cntlz64(value | 1, true) / 4), '0'); - + out.resize(out.size() + std::max(min_num, 64 / 4 - utils::cntlz64(value | 1, true) / 4), '0'); + // Write in reversed order for (auto i = out.rbegin(); value; i++, value /= 16) { @@ -141,7 +142,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) { ctx.width = read_decimal(ch - '0'); } - + break; } @@ -157,7 +158,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) ctx.width = std::abs(warg); ctx.left |= warg < 0; } - + break; } @@ -307,7 +308,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) const std::size_t start = out.size(); const std::size_t size1 = src.fmt_string(out, ctx.args); - + if (ctx.dot && size1 > ctx.prec) { // Shrink if necessary @@ -576,7 +577,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src) // Add padding if necessary out.insert(ctx.left ? out.end() : out.begin() + start, ctx.width - size2, ' '); } - + src.skip(ctx.args); ctx = {0}; break; diff --git a/Utilities/types.h b/Utilities/types.h index 30221f616b..7f7b3c5347 100644 --- a/Utilities/types.h +++ b/Utilities/types.h @@ -433,46 +433,6 @@ struct offset32_detail } }; -inline u32 cntlz32(u32 arg, bool nonzero = false) -{ -#ifdef _MSC_VER - ulong res; - return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32; -#else - return arg || nonzero ? __builtin_clzll(arg) - 32 : 32; -#endif -} - -inline u64 cntlz64(u64 arg, bool nonzero = false) -{ -#ifdef _MSC_VER - ulong res; - return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64; -#else - return arg || nonzero ? __builtin_clzll(arg) : 64; -#endif -} - -inline u32 cnttz32(u32 arg, bool nonzero = false) -{ -#ifdef _MSC_VER - ulong res; - return _BitScanForward(&res, arg) || nonzero ? res : 32; -#else - return arg || nonzero ? __builtin_ctzll(arg) : 32; -#endif -} - -inline u64 cnttz64(u64 arg, bool nonzero = false) -{ -#ifdef _MSC_VER - ulong res; - return _BitScanForward64(&res, arg) || nonzero ? res : 64; -#else - return arg || nonzero ? __builtin_ctzll(arg) : 64; -#endif -} - // Helper function, used by ""_u16, ""_u32, ""_u64 constexpr u8 to_u8(char c) { @@ -848,89 +808,3 @@ inline void busy_wait(std::size_t cycles = 3000) const u64 s = __rdtsc(); do _mm_pause(); while (__rdtsc() - s < cycles); } - -// Rotate helpers -#if defined(__GNUG__) - -inline u8 rol8(u8 x, u8 n) -{ - u8 result = x; - __asm__("rolb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u8 ror8(u8 x, u8 n) -{ - u8 result = x; - __asm__("rorb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u16 rol16(u16 x, u16 n) -{ - u16 result = x; - __asm__("rolw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u16 ror16(u16 x, u16 n) -{ - u16 result = x; - __asm__("rorw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u32 rol32(u32 x, u32 n) -{ - u32 result = x; - __asm__("roll %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u32 ror32(u32 x, u32 n) -{ - u32 result = x; - __asm__("rorl %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u64 rol64(u64 x, u64 n) -{ - u64 result = x; - __asm__("rolq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u64 ror64(u64 x, u64 n) -{ - u64 result = x; - __asm__("rorq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n)); - return result; -} - -inline u64 umulh64(u64 a, u64 b) -{ - u64 result; - __asm__("mulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b)); - return result; -} - -inline s64 mulh64(s64 a, s64 b) -{ - s64 result; - __asm__("imulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b)); - return result; -} - -#elif defined(_MSC_VER) -inline u8 rol8(u8 x, u8 n) { return _rotl8(x, n); } -inline u8 ror8(u8 x, u8 n) { return _rotr8(x, n); } -inline u16 rol16(u16 x, u16 n) { return _rotl16(x, (u8)n); } -inline u16 ror16(u16 x, u16 n) { return _rotr16(x, (u8)n); } -inline u32 rol32(u32 x, u32 n) { return _rotl(x, (int)n); } -inline u32 ror32(u32 x, u32 n) { return _rotr(x, (int)n); } -inline u64 rol64(u64 x, u64 n) { return _rotl64(x, (int)n); } -inline u64 ror64(u64 x, u64 n) { return _rotr64(x, (int)n); } -inline u64 umulh64(u64 x, u64 y) { return __umulh(x, y); } -inline s64 mulh64(s64 x, s64 y) { return __mulh(x, y); } -#endif diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index 11aa84253b..0154893123 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -3,7 +3,6 @@ #include "Emu/IdManager.h" #include "Emu/Cell/PPUModule.h" -#include "Emu/Cell/PPUOpcodes.h" #include "Emu/Memory/vm.h" #include "Emu/RSX/GSRender.h" #include "cellGcmSys.h" diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 8966202832..ab52624184 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -2,7 +2,7 @@ #include "Emu/System.h" #include "Emu/IdManager.h" #include "Emu/Cell/PPUModule.h" - +#include "Utilities/asm.h" #include "Emu/Cell/SPUThread.h" #include "Emu/Cell/lv2/sys_lwmutex.h" #include "Emu/Cell/lv2/sys_lwcond.h" @@ -2126,7 +2126,7 @@ s32 _spurs::add_workload(vm::ptr spurs, vm::ptr wid, vm::cptrflags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed spurs->wklEnabled.atomic_op([spurs, wmax, &wnum](be_t& value) { - wnum = cntlz32(~(u32)value); // found empty position + wnum = utils::cntlz32(~(u32)value); // found empty position if (wnum < wmax) { value |= (u32)(0x80000000ull >> wnum); // set workload bit @@ -2249,7 +2249,7 @@ s32 _spurs::add_workload(vm::ptr spurs, vm::ptr wid, vm::cptr> current->uniqueId; - res_wkl = cntlz32(~k); + res_wkl = utils::cntlz32(~k); } } } diff --git a/rpcs3/Emu/Cell/Modules/cellSync.cpp b/rpcs3/Emu/Cell/Modules/cellSync.cpp index 7911f709bd..307edba4c0 100644 --- a/rpcs3/Emu/Cell/Modules/cellSync.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSync.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #include "Emu/System.h" #include "Emu/Cell/PPUModule.h" - +#include "Utilities/asm.h" #include "Emu/Cell/lv2/sys_event.h" #include "Emu/Cell/lv2/sys_process.h" #include "cellSync.h" @@ -966,7 +966,7 @@ error_code _cellSyncLFQueueCompletePushPointer(ppu_thread& ppu, vm::ptr - #include "yaml-cpp/yaml.h" - - +#include "Utilities/asm.h" const ppu_decoder s_ppu_itype; @@ -2051,7 +2049,7 @@ void ppu_acontext::MULLI(ppu_opcode_t op) max = amax * op.simm16; // Check overflow - if (min >> 63 != ::mulh64(amin, op.simm16) || max >> 63 != ::mulh64(amax, op.simm16)) + if (min >> 63 != utils::mulh64(amin, op.simm16) || max >> 63 != utils::mulh64(amax, op.simm16)) { min = 0; max = -1; @@ -2062,7 +2060,7 @@ void ppu_acontext::MULLI(ppu_opcode_t op) } } - gpr[op.rd] = spec_gpr::range(min, max, gpr[op.ra].tz() + ::cnttz64(op.simm16)); + gpr[op.rd] = spec_gpr::range(min, max, gpr[op.ra].tz() + utils::cnttz64(op.simm16)); } void ppu_acontext::SUBFIC(ppu_opcode_t op) @@ -2163,14 +2161,14 @@ void ppu_acontext::RLWIMI(ppu_opcode_t op) if (op.mb32 <= op.me32) { // 32-bit op, including mnemonics: INSLWI, INSRWI (TODO) - min = ::rol32((u32)min, op.sh32) & mask; - max = ::rol32((u32)max, op.sh32) & mask; + min = utils::rol32((u32)min, op.sh32) & mask; + max = utils::rol32((u32)max, op.sh32) & mask; } else { // Full 64-bit op with duplication - min = ::rol64((u32)min | min << 32, op.sh32) & mask; - max = ::rol64((u32)max | max << 32, op.sh32) & mask; + min = utils::rol64((u32)min | min << 32, op.sh32) & mask; + max = utils::rol64((u32)max | max << 32, op.sh32) & mask; } if (mask != -1) @@ -2219,14 +2217,14 @@ void ppu_acontext::RLWINM(ppu_opcode_t op) // EXTRWI and other possible mnemonics } - min = ::rol32((u32)min, op.sh32) & mask; - max = ::rol32((u32)max, op.sh32) & mask; + min = utils::rol32((u32)min, op.sh32) & mask; + max = utils::rol32((u32)max, op.sh32) & mask; } else { // Full 64-bit op with duplication - min = ::rol64((u32)min | min << 32, op.sh32) & mask; - max = ::rol64((u32)max | max << 32, op.sh32) & mask; + min = utils::rol64((u32)min | min << 32, op.sh32) & mask; + max = utils::rol64((u32)max | max << 32, op.sh32) & mask; } gpr[op.ra] = spec_gpr::approx(min, max); @@ -2314,8 +2312,8 @@ void ppu_acontext::RLDICL(ppu_opcode_t op) return; } - min = ::rol64(min, sh) & mask; - max = ::rol64(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; gpr[op.ra] = spec_gpr::approx(min, max); } @@ -2343,8 +2341,8 @@ void ppu_acontext::RLDICR(ppu_opcode_t op) return; } - min = ::rol64(min, sh) & mask; - max = ::rol64(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; gpr[op.ra] = spec_gpr::approx(min, max); } @@ -2369,8 +2367,8 @@ void ppu_acontext::RLDIC(ppu_opcode_t op) return; } - min = ::rol64(min, sh) & mask; - max = ::rol64(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; gpr[op.ra] = spec_gpr::approx(min, max); } @@ -2392,8 +2390,8 @@ void ppu_acontext::RLDIMI(ppu_opcode_t op) // INSRDI mnemonic } - min = ::rol64(min, sh) & mask; - max = ::rol64(max, sh) & mask; + min = utils::rol64(min, sh) & mask; + max = utils::rol64(max, sh) & mask; if (mask != -1) { diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index aa32bfe200..7e9a912b3f 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -1006,7 +1006,7 @@ struct ppu_acontext // Return number of trailing zero bits u64 tz() const { - return ::cnttz64(mask()); + return utils::cnttz64(mask()); } // Range NOT @@ -1255,7 +1255,7 @@ struct ppu_acontext if (min < max) { // Inverted constant MSB mask - const u64 mix = ~0ull >> ::cntlz64(min ^ max, true); + const u64 mix = ~0ull >> utils::cntlz64(min ^ max, true); r.bmin |= min & ~mix; r.bmax &= max | mix; diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 24e54dd868..b1f300ac32 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -2,6 +2,7 @@ #include "Emu/System.h" #include "PPUThread.h" #include "PPUInterpreter.h" +#include "Utilities/asm.h" #include @@ -1851,7 +1852,7 @@ bool ppu_interpreter::VRLB(ppu_thread& ppu, ppu_opcode_t op) for (uint i = 0; i < 16; i++) { - d._u8[i] = rol8(a._u8[i], b._u8[i]); + d._u8[i] = utils::rol8(a._u8[i], b._u8[i]); } return true; } @@ -1864,7 +1865,7 @@ bool ppu_interpreter::VRLH(ppu_thread& ppu, ppu_opcode_t op) for (uint i = 0; i < 8; i++) { - d._u16[i] = rol16(a._u16[i], b._u8[i * 2] & 0xf); + d._u16[i] = utils::rol16(a._u16[i], b._u8[i * 2] & 0xf); } return true; } @@ -1877,7 +1878,7 @@ bool ppu_interpreter::VRLW(ppu_thread& ppu, ppu_opcode_t op) for (uint w = 0; w < 4; w++) { - d._u32[w] = rol32(a._u32[w], b._u8[w * 4] & 0x1f); + d._u32[w] = utils::rol32(a._u32[w], b._u8[w * 4] & 0x1f); } return true; } @@ -3036,21 +3037,21 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::RLWIMI(ppu_thread& ppu, ppu_opcode_t op) { const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); - ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(rol32(u32(ppu.gpr[op.rs]), op.sh32)) & mask); + ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(u32(ppu.gpr[op.rs]), op.sh32)) & mask); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLWINM(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = dup32(rol32(u32(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); + ppu.gpr[op.ra] = dup32(utils::rol32(u32(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLWNM(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = dup32(rol32(u32(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); + ppu.gpr[op.ra] = dup32(utils::rol32(u32(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3095,21 +3096,21 @@ bool ppu_interpreter::ANDIS(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::RLDICL(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDICR(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63)); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63)); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDIC(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3117,21 +3118,21 @@ bool ppu_interpreter::RLDIC(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::RLDIMI(ppu_thread& ppu, ppu_opcode_t op) { const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); - ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (rol64(ppu.gpr[op.rs], op.sh64) & mask); + ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDCL(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } bool ppu_interpreter::RLDCR(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63)); + ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63)); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3194,7 +3195,7 @@ bool ppu_interpreter::SUBFC(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::MULHDU(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.rd] = umulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]); + ppu.gpr[op.rd] = utils::umulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.rd], 0); return true; } @@ -3225,7 +3226,7 @@ bool ppu_interpreter::MFOCRF(ppu_thread& ppu, ppu_opcode_t op) if (op.l11) { // MFOCRF - const u32 n = cntlz32(op.crm) & 7; + const u32 n = utils::cntlz32(op.crm) & 7; const u32 p = n * 4; const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0; @@ -3274,7 +3275,7 @@ bool ppu_interpreter::SLW(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::CNTLZW(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = cntlz32(u32(ppu.gpr[op.rs])); + ppu.gpr[op.ra] = utils::cntlz32(u32(ppu.gpr[op.rs])); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3354,7 +3355,7 @@ bool ppu_interpreter::LWZUX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::CNTLZD(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.ra] = cntlz64(ppu.gpr[op.rs]); + ppu.gpr[op.ra] = utils::cntlz64(ppu.gpr[op.rs]); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.ra], 0); return true; } @@ -3392,7 +3393,7 @@ bool ppu_interpreter::LVEWX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::MULHD(ppu_thread& ppu, ppu_opcode_t op) { - ppu.gpr[op.rd] = mulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]); + ppu.gpr[op.rd] = utils::mulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]); if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.rd], 0); return true; } @@ -3516,7 +3517,7 @@ bool ppu_interpreter::MTOCRF(ppu_thread& ppu, ppu_opcode_t op) { // MTOCRF - const u32 n = cntlz32(op.crm) & 7; + const u32 n = utils::cntlz32(op.crm) & 7; const u32 p = n * 4; const u64 v = (s >> (p ^ 0x1c)) & 0xf; *(u32*)(u8*)(ppu.cr + p) = *(u32*)(s_table + v); @@ -3641,7 +3642,7 @@ bool ppu_interpreter::MULLD(ppu_thread& ppu, ppu_opcode_t op) ppu.gpr[op.rd] = (s64)(RA * RB); if (UNLIKELY(op.oe)) { - const s64 high = mulh64(RA, RB); + const s64 high = utils::mulh64(RA, RB); ppu_ov_set(ppu, high != s64(ppu.gpr[op.rd]) >> 63); } if (UNLIKELY(op.rc)) ppu_cr_set(ppu, 0, ppu.gpr[op.rd], 0); diff --git a/rpcs3/Emu/Cell/PPUOpcodes.h b/rpcs3/Emu/Cell/PPUOpcodes.h index b2c01d0470..397294d64a 100644 --- a/rpcs3/Emu/Cell/PPUOpcodes.h +++ b/rpcs3/Emu/Cell/PPUOpcodes.h @@ -1,6 +1,7 @@ #pragma once -#include "../../../Utilities/BitField.h" +#include "Utilities/BitField.h" +#include "Utilities/asm.h" template using ppu_bf_t = bf_t; @@ -63,7 +64,7 @@ union ppu_opcode_t inline u64 ppu_rotate_mask(u32 mb, u32 me) { - return ror64(~0ull << (63 ^ (me - mb)), mb); + return utils::ror64(~0ull << (63 ^ (me - mb)), mb); } inline u32 ppu_decode(u32 inst) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 7458aa5bc6..03402567c0 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -7,6 +7,7 @@ #include "SPUThread.h" #include "SPUInterpreter.h" #include "Utilities/sysinfo.h" +#include "Utilities/asm.h" #include "PPUAnalyser.h" #include @@ -630,7 +631,7 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) } // Determine which value will be duplicated at hole positions - const u32 w3 = func.at((j - start + ~::cntlz32(cmask, true) % 4 * 4) / 4 + 1); + const u32 w3 = func.at((j - start + ~utils::cntlz32(cmask, true) % 4 * 4) / 4 + 1); words.push_back(cmask & 1 ? func[(j - start + 0) / 4 + 1] : w3); words.push_back(cmask & 2 ? func[(j - start + 4) / 4 + 1] : w3); words.push_back(cmask & 4 ? func[(j - start + 8) / 4 + 1] : w3); @@ -3413,7 +3414,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op) } else if (s == 4 || s == 8 || s == 12) { - c->pshufd(va, va, ::rol8(0xE4, s / 2)); + c->pshufd(va, va, utils::rol8(0xE4, s / 2)); } else if (utils::has_ssse3()) { diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index f56ea5c19e..6f5a6e0355 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -3,7 +3,7 @@ #include "Emu/System.h" #include "Utilities/JIT.h" #include "Utilities/sysinfo.h" - +#include "Utilities/asm.h" #include "SPUThread.h" #include "SPUInterpreter.h" @@ -209,7 +209,7 @@ bool spu_interpreter::ROT(SPUThread& spu, spu_opcode_t op) for (u32 i = 0; i < 4; i++) { - spu.gpr[op.rt]._u32[i] = rol32(a._u32[i], b._u32[i]); + spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]); } return true; } @@ -260,7 +260,7 @@ bool spu_interpreter::ROTH(SPUThread& spu, spu_opcode_t op) for (u32 i = 0; i < 8; i++) { - spu.gpr[op.rt]._u16[i] = rol16(a._u16[i], b._u16[i]); + spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]); } return true; } @@ -855,7 +855,7 @@ bool spu_interpreter::CLZ(SPUThread& spu, spu_opcode_t op) { for (u32 i = 0; i < 4; i++) { - spu.gpr[op.rt]._u32[i] = cntlz32(spu.gpr[op.ra]._u32[i]); + spu.gpr[op.rt]._u32[i] = utils::cntlz32(spu.gpr[op.ra]._u32[i]); } return true; } diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 32e54fad4d..2376385b6d 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -4,6 +4,7 @@ #include "Utilities/cond.h" #include "Utilities/Thread.h" #include "Utilities/VirtualMemory.h" +#include "Utilities/asm.h" #include "Emu/CPU/CPUThread.h" #include "Emu/Cell/lv2/sys_memory.h" #include "Emu/RSX/GSRender.h" @@ -599,7 +600,7 @@ namespace vm const u32 size = ::align(orig_size, min_page_size); // Check alignment (it's page allocation, so passing small values there is just silly) - if (align < min_page_size || align != (0x80000000u >> cntlz32(align, true))) + if (align < min_page_size || align != (0x80000000u >> utils::cntlz32(align, true))) { fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align); } @@ -852,7 +853,7 @@ namespace vm const u32 size = ::align(orig_size, 0x10000); // Check alignment - if (align < 0x10000 || align != (0x80000000u >> ::cntlz32(align, true))) + if (align < 0x10000 || align != (0x80000000u >> utils::cntlz32(align, true))) { fmt::throw_exception("Invalid alignment (size=0x%x, align=0x%x)" HERE, size, align); } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 693a6d524a..221561336d 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -2,6 +2,7 @@ #include "../System.h" #include "Utilities/geometry.h" +#include "Utilities/asm.h" #include "gcm_enums.h" #include #include @@ -299,14 +300,14 @@ namespace rsx // static inline u32 ceil_log2(u32 value) { - return value <= 1 ? 0 : ::cntlz32((value - 1) << 1, true) ^ 31; + return value <= 1 ? 0 : utils::cntlz32((value - 1) << 1, true) ^ 31; } static inline u32 next_pow2(u32 x) { if (x <= 2) return x; - return static_cast((1ULL << 32) >> ::cntlz32(x - 1, true)); + return static_cast((1ULL << 32) >> utils::cntlz32(x - 1, true)); } // Returns interleaved bits of X|Y|Z used as Z-order curve indices