From 11ee1f3eb288650e5163885f177df66fd352e37d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 23 Jan 2022 15:22:00 +0300 Subject: [PATCH] Improve JIT profiling on Linux Add JIT object dumping functionality. Add source for objdump interception utility. --- Utilities/JIT.cpp | 42 +++++-- Utilities/JIT.h | 3 - objdump.cpp | 195 +++++++++++++++++++++++++++++++ rpcs3/Emu/Cell/SPURecompiler.cpp | 8 +- 4 files changed, 230 insertions(+), 18 deletions(-) create mode 100644 objdump.cpp diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index a6c8bb2015..855d8207b4 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -24,6 +24,37 @@ void jit_announce(uptr func, usz size, std::string_view name) return; } + if (!name.empty()) + { + // If directory ASMJIT doesn't exist, nothing will be written + static const fs::file s_asm = []() + { + fs::remove_all(fs::get_cache_dir() + "/ASMJIT/", false); + + return fs::file(fmt::format("%s/ASMJIT/.objects", fs::get_cache_dir()), fs::rewrite + fs::append); + }(); + + if (s_asm) + { + // Dump object: addr + size + bytes + s_asm.write(fmt::format("%s%s%s", + std::string_view(reinterpret_cast(&func), 8), + std::string_view(reinterpret_cast(&size), 8), + std::string_view(reinterpret_cast(func), size))); + } + + if (s_asm && name[0] != '_') + { + // Save some objects separately + fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), name), fs::rewrite); + + if (dump) + { + dump.write(reinterpret_cast(func), size); + } + } + } + #ifdef __linux__ static const fs::file s_map(fmt::format("/tmp/perf-%d.map", getpid()), fs::rewrite + fs::append); @@ -146,17 +177,6 @@ void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept } } - if (!dump_name.empty()) - { - // If directory ASMJIT doesn't exist, nothing will be written - fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), dump_name), fs::rewrite); - - if (dump) - { - dump.write(p, codeSize); - } - } - return p; } diff --git a/Utilities/JIT.h b/Utilities/JIT.h index d18901dd1c..3e1a1f8cae 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -77,8 +77,6 @@ struct jit_runtime_base const asmjit::Environment& environment() const noexcept; void* _add(asmjit::CodeHolder* code) noexcept; virtual uchar* _alloc(usz size, usz align) noexcept = 0; - - std::string_view dump_name; }; // ASMJIT runtime for emitting code in a single 2G region @@ -257,7 +255,6 @@ inline FT build_function_asm(std::string_view name, F&& builder) builder(compiler, args); } - rt.dump_name = name; const auto result = rt._add(&code); jit_announce(result, code.codeSize(), name); return reinterpret_cast(uptr(result)); diff --git a/objdump.cpp b/objdump.cpp new file mode 100644 index 0000000000..eb0200221a --- /dev/null +++ b/objdump.cpp @@ -0,0 +1,195 @@ +// objdump injection utility for Linux perf tools. +// Profiling JIT generated code is always problematic. +// On Linux, perf annotation tools do not automatically +// disassemble runtime-generated code. +// However, it's possible to override objdump utility +// which is used to disassemeble executables. +// This tool intercepts objdump commands, and if they +// correspond to JIT generated objects in RPCS3, +// it should be able to correctly disassemble them. +// Usage: +// 1. Make sure ~/.cache/rpcs3/ASMJIT directory exists. +// 2. Build this utility, for example: +// g++-11 objdump.cpp -o objdump +// 3. Run perf, for example: +// perf record -b -p `pgrep rpcs3` +// 4. Specify --objdump override, for example: +// perf report --objdump=./objdump --gtk + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +std::string to_hex(std::uint64_t value, bool prfx = true) +{ + char buf[20]{}, *ptr = buf + 19; + do *--ptr = "0123456789abcdef"[value % 16], value /= 16; while (value); + if (!prfx) return ptr; + *--ptr = 'x'; + *--ptr = '0'; + return ptr; +} + +int main(int argc, char* argv[]) +{ + std::string home; + + if (const char* d = ::getenv("XDG_CACHE_HOME")) + home = d; + else if (const char* d = ::getenv("XDG_CONFIG_HOME")) + home = d; + else if (const char* d = ::getenv("HOME")) + home = d, home += "/.cache"; + + // Get cache path + home += "/rpcs3/ASMJIT/"; + + // Get object names + int fd = open((home + ".objects").c_str(), O_RDONLY); + + if (fd < 0) + return 1; + + // Addr -> offset;size in .objects + std::unordered_map> objects; + + while (true) + { + // Size is name size, not object size + std::uint64_t ptr, size; + if (read(fd, &ptr, 8) != 8 || read(fd, &size, 8) != 8) + break; + std::uint64_t off = lseek(fd, 0, SEEK_CUR); + objects.emplace(ptr, std::make_pair(off, size)); + lseek(fd, size, SEEK_CUR); + } + + std::vector args; + + std::uint64_t addr = 0; + + for (int i = 0; i < argc; i++) + { + // Replace args + std::string arg = argv[i]; + + if (arg.find("--start-address=0x") == 0) + { + std::from_chars(arg.data() + strlen("--start-address=0x"), arg.data() + arg.size(), addr, 16); + + if (objects.count(addr)) + { + // Extract object into a tmp file + lseek(fd, objects[addr].first, SEEK_SET); + const int fd2 = open("/tmp/rpcs3.objdump.tmp", O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + sendfile(fd2, fd, nullptr, objects[addr].second); + close(fd2); + + args.emplace_back("--adjust-vma=" + to_hex(addr)); + continue; + } + } + + if (objects.count(addr) && arg.find("--stop-address=0x") == 0) + { + continue; + } + + if (objects.count(addr) && arg == "-d") + { + arg = "-D"; + } + + if (arg == "-l") + { + arg = "-Mintel,x86-64"; + } + + args.emplace_back(std::move(arg)); + } + + if (objects.count(addr)) + { + args.pop_back(); + args.emplace_back("-b"); + args.emplace_back("binary"); + args.emplace_back("-m"); + args.emplace_back("i386"); + args.emplace_back("/tmp/rpcs3.objdump.tmp"); + } + + args[0] = "/usr/bin/objdump"; + + std::vector new_argv; + + for (auto& arg : args) + { + new_argv.push_back(arg.data()); + } + + new_argv.push_back(nullptr); + + if (objects.count(addr)) + { + int fds[2]; + pipe(fds); + + // objdump is broken; fix address truncation + if (fork() > 0) + { + close(fds[1]); + char c = 0; + std::string buf; + + while (read(fds[0], &c, 1) != 0) + { + if (c) + { + buf += c; + + if (c == '\n') + { + // Replace broken address + if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] >= 'a' && buf[0] <= 'f')) + { + std::uint64_t ptr = -1; + auto cvt = std::from_chars(buf.data(), buf.data() + buf.size(), ptr, 16); + + if (cvt.ec == std::errc() && ptr < addr) + { + auto fix = to_hex((ptr - std::uint32_t(addr)) + addr, false); + write(STDOUT_FILENO, fix.data(), fix.size()); + buf = std::string(cvt.ptr); + } + } + + write(STDOUT_FILENO, buf.data(), buf.size()); + buf.clear(); + } + } + + c = 0; + } + + return 0; + } + else + { + while ((dup2(fds[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {} + close(fds[1]); + close(fds[0]); + // Fallthrough + } + } + + return execv(new_argv[0], new_argv.data()); +} diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 64ba48884f..445b502d4f 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -3487,7 +3487,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator #endif // Get function chunk name - const std::string name = fmt::format("spu-cx%05x-%s", addr, fmt::base57(be_t{m_hash_start})); + const std::string name = fmt::format("__spu-cx%05x-%s", addr, fmt::base57(be_t{m_hash_start})); llvm::Function* result = llvm::cast(m_module->getOrInsertFunction(name, chunk_type).getCallee()); // Set parameters @@ -3512,7 +3512,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // 5. $3 const auto func_type = get_ftype(); - const std::string fname = fmt::format("spu-fx%05x-%s", addr, fmt::base57(be_t{m_hash_start})); + const std::string fname = fmt::format("__spu-fx%05x-%s", addr, fmt::base57(be_t{m_hash_start})); llvm::Function* fn = llvm::cast(m_module->getOrInsertFunction(fname, func_type).getCallee()); fn->setLinkage(llvm::GlobalValue::InternalLinkage); @@ -4381,7 +4381,7 @@ public: sha1_finish(&ctx, output); m_hash.clear(); - fmt::append(m_hash, "spu-0x%05x-%s", func.entry_point, fmt::base57(output)); + fmt::append(m_hash, "__spu-0x%05x-%s", func.entry_point, fmt::base57(output)); be_t hash_start; std::memcpy(&hash_start, output, sizeof(hash_start)); @@ -4649,7 +4649,7 @@ public: m_ir->CreateUnreachable(); } - m_dispatch = cast(_module->getOrInsertFunction("spu-null", entry_chunk->chunk->getFunctionType()).getCallee()); + m_dispatch = cast(_module->getOrInsertFunction("__spu-null", entry_chunk->chunk->getFunctionType()).getCallee()); m_dispatch->setLinkage(llvm::GlobalValue::InternalLinkage); m_dispatch->setCallingConv(entry_chunk->chunk->getCallingConv()); set_function(m_dispatch);