1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 02:32:36 +01:00

Improve JIT profiling on Linux

Add JIT object dumping functionality.
Add source for objdump interception utility.
This commit is contained in:
Nekotekina 2022-01-23 15:22:00 +03:00
parent 14951d8713
commit 11ee1f3eb2
4 changed files with 230 additions and 18 deletions

View File

@ -24,6 +24,37 @@ void jit_announce(uptr func, usz size, std::string_view name)
return; return;
} }
if (!name.empty())
{
// If directory ASMJIT doesn't exist, nothing will be written
static const fs::file s_asm = []()
{
fs::remove_all(fs::get_cache_dir() + "/ASMJIT/", false);
return fs::file(fmt::format("%s/ASMJIT/.objects", fs::get_cache_dir()), fs::rewrite + fs::append);
}();
if (s_asm)
{
// Dump object: addr + size + bytes
s_asm.write(fmt::format("%s%s%s",
std::string_view(reinterpret_cast<char*>(&func), 8),
std::string_view(reinterpret_cast<char*>(&size), 8),
std::string_view(reinterpret_cast<char*>(func), size)));
}
if (s_asm && name[0] != '_')
{
// Save some objects separately
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), name), fs::rewrite);
if (dump)
{
dump.write(reinterpret_cast<uchar*>(func), size);
}
}
}
#ifdef __linux__ #ifdef __linux__
static const fs::file s_map(fmt::format("/tmp/perf-%d.map", getpid()), fs::rewrite + fs::append); static const fs::file s_map(fmt::format("/tmp/perf-%d.map", getpid()), fs::rewrite + fs::append);
@ -146,17 +177,6 @@ void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept
} }
} }
if (!dump_name.empty())
{
// If directory ASMJIT doesn't exist, nothing will be written
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), dump_name), fs::rewrite);
if (dump)
{
dump.write(p, codeSize);
}
}
return p; return p;
} }

View File

@ -77,8 +77,6 @@ struct jit_runtime_base
const asmjit::Environment& environment() const noexcept; const asmjit::Environment& environment() const noexcept;
void* _add(asmjit::CodeHolder* code) noexcept; void* _add(asmjit::CodeHolder* code) noexcept;
virtual uchar* _alloc(usz size, usz align) noexcept = 0; virtual uchar* _alloc(usz size, usz align) noexcept = 0;
std::string_view dump_name;
}; };
// ASMJIT runtime for emitting code in a single 2G region // ASMJIT runtime for emitting code in a single 2G region
@ -257,7 +255,6 @@ inline FT build_function_asm(std::string_view name, F&& builder)
builder(compiler, args); builder(compiler, args);
} }
rt.dump_name = name;
const auto result = rt._add(&code); const auto result = rt._add(&code);
jit_announce(result, code.codeSize(), name); jit_announce(result, code.codeSize(), name);
return reinterpret_cast<FT>(uptr(result)); return reinterpret_cast<FT>(uptr(result));

195
objdump.cpp Normal file
View File

@ -0,0 +1,195 @@
// objdump injection utility for Linux perf tools.
// Profiling JIT generated code is always problematic.
// On Linux, perf annotation tools do not automatically
// disassemble runtime-generated code.
// However, it's possible to override objdump utility
// which is used to disassemeble executables.
// This tool intercepts objdump commands, and if they
// correspond to JIT generated objects in RPCS3,
// it should be able to correctly disassemble them.
// Usage:
// 1. Make sure ~/.cache/rpcs3/ASMJIT directory exists.
// 2. Build this utility, for example:
// g++-11 objdump.cpp -o objdump
// 3. Run perf, for example:
// perf record -b -p `pgrep rpcs3`
// 4. Specify --objdump override, for example:
// perf report --objdump=./objdump --gtk
#include <cstring>
#include <cstdio>
#include <cstdint>
#include <unistd.h>
#include <sys/file.h>
#include <sys/wait.h>
#include <sys/sendfile.h>
#include <spawn.h>
#include <unordered_map>
#include <string>
#include <vector>
#include <charconv>
std::string to_hex(std::uint64_t value, bool prfx = true)
{
char buf[20]{}, *ptr = buf + 19;
do *--ptr = "0123456789abcdef"[value % 16], value /= 16; while (value);
if (!prfx) return ptr;
*--ptr = 'x';
*--ptr = '0';
return ptr;
}
int main(int argc, char* argv[])
{
std::string home;
if (const char* d = ::getenv("XDG_CACHE_HOME"))
home = d;
else if (const char* d = ::getenv("XDG_CONFIG_HOME"))
home = d;
else if (const char* d = ::getenv("HOME"))
home = d, home += "/.cache";
// Get cache path
home += "/rpcs3/ASMJIT/";
// Get object names
int fd = open((home + ".objects").c_str(), O_RDONLY);
if (fd < 0)
return 1;
// Addr -> offset;size in .objects
std::unordered_map<std::uint64_t, std::pair<std::uint64_t, std::uint64_t>> objects;
while (true)
{
// Size is name size, not object size
std::uint64_t ptr, size;
if (read(fd, &ptr, 8) != 8 || read(fd, &size, 8) != 8)
break;
std::uint64_t off = lseek(fd, 0, SEEK_CUR);
objects.emplace(ptr, std::make_pair(off, size));
lseek(fd, size, SEEK_CUR);
}
std::vector<std::string> args;
std::uint64_t addr = 0;
for (int i = 0; i < argc; i++)
{
// Replace args
std::string arg = argv[i];
if (arg.find("--start-address=0x") == 0)
{
std::from_chars(arg.data() + strlen("--start-address=0x"), arg.data() + arg.size(), addr, 16);
if (objects.count(addr))
{
// Extract object into a tmp file
lseek(fd, objects[addr].first, SEEK_SET);
const int fd2 = open("/tmp/rpcs3.objdump.tmp", O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
sendfile(fd2, fd, nullptr, objects[addr].second);
close(fd2);
args.emplace_back("--adjust-vma=" + to_hex(addr));
continue;
}
}
if (objects.count(addr) && arg.find("--stop-address=0x") == 0)
{
continue;
}
if (objects.count(addr) && arg == "-d")
{
arg = "-D";
}
if (arg == "-l")
{
arg = "-Mintel,x86-64";
}
args.emplace_back(std::move(arg));
}
if (objects.count(addr))
{
args.pop_back();
args.emplace_back("-b");
args.emplace_back("binary");
args.emplace_back("-m");
args.emplace_back("i386");
args.emplace_back("/tmp/rpcs3.objdump.tmp");
}
args[0] = "/usr/bin/objdump";
std::vector<char*> new_argv;
for (auto& arg : args)
{
new_argv.push_back(arg.data());
}
new_argv.push_back(nullptr);
if (objects.count(addr))
{
int fds[2];
pipe(fds);
// objdump is broken; fix address truncation
if (fork() > 0)
{
close(fds[1]);
char c = 0;
std::string buf;
while (read(fds[0], &c, 1) != 0)
{
if (c)
{
buf += c;
if (c == '\n')
{
// Replace broken address
if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] >= 'a' && buf[0] <= 'f'))
{
std::uint64_t ptr = -1;
auto cvt = std::from_chars(buf.data(), buf.data() + buf.size(), ptr, 16);
if (cvt.ec == std::errc() && ptr < addr)
{
auto fix = to_hex((ptr - std::uint32_t(addr)) + addr, false);
write(STDOUT_FILENO, fix.data(), fix.size());
buf = std::string(cvt.ptr);
}
}
write(STDOUT_FILENO, buf.data(), buf.size());
buf.clear();
}
}
c = 0;
}
return 0;
}
else
{
while ((dup2(fds[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {}
close(fds[1]);
close(fds[0]);
// Fallthrough
}
}
return execv(new_argv[0], new_argv.data());
}

View File

@ -3487,7 +3487,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
#endif #endif
// Get function chunk name // Get function chunk name
const std::string name = fmt::format("spu-cx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start})); const std::string name = fmt::format("__spu-cx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start}));
llvm::Function* result = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(name, chunk_type).getCallee()); llvm::Function* result = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(name, chunk_type).getCallee());
// Set parameters // Set parameters
@ -3512,7 +3512,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// 5. $3 // 5. $3
const auto func_type = get_ftype<u32[4], u8*, u8*, u32, u32[4], u32[4]>(); const auto func_type = get_ftype<u32[4], u8*, u8*, u32, u32[4], u32[4]>();
const std::string fname = fmt::format("spu-fx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start})); const std::string fname = fmt::format("__spu-fx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start}));
llvm::Function* fn = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(fname, func_type).getCallee()); llvm::Function* fn = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(fname, func_type).getCallee());
fn->setLinkage(llvm::GlobalValue::InternalLinkage); fn->setLinkage(llvm::GlobalValue::InternalLinkage);
@ -4381,7 +4381,7 @@ public:
sha1_finish(&ctx, output); sha1_finish(&ctx, output);
m_hash.clear(); m_hash.clear();
fmt::append(m_hash, "spu-0x%05x-%s", func.entry_point, fmt::base57(output)); fmt::append(m_hash, "__spu-0x%05x-%s", func.entry_point, fmt::base57(output));
be_t<u64> hash_start; be_t<u64> hash_start;
std::memcpy(&hash_start, output, sizeof(hash_start)); std::memcpy(&hash_start, output, sizeof(hash_start));
@ -4649,7 +4649,7 @@ public:
m_ir->CreateUnreachable(); m_ir->CreateUnreachable();
} }
m_dispatch = cast<Function>(_module->getOrInsertFunction("spu-null", entry_chunk->chunk->getFunctionType()).getCallee()); m_dispatch = cast<Function>(_module->getOrInsertFunction("__spu-null", entry_chunk->chunk->getFunctionType()).getCallee());
m_dispatch->setLinkage(llvm::GlobalValue::InternalLinkage); m_dispatch->setLinkage(llvm::GlobalValue::InternalLinkage);
m_dispatch->setCallingConv(entry_chunk->chunk->getCallingConv()); m_dispatch->setCallingConv(entry_chunk->chunk->getCallingConv());
set_function(m_dispatch); set_function(m_dispatch);