mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
Improve JIT profiling on Linux
Add JIT object dumping functionality. Add source for objdump interception utility.
This commit is contained in:
parent
14951d8713
commit
11ee1f3eb2
@ -24,6 +24,37 @@ void jit_announce(uptr func, usz size, std::string_view name)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!name.empty())
|
||||||
|
{
|
||||||
|
// If directory ASMJIT doesn't exist, nothing will be written
|
||||||
|
static const fs::file s_asm = []()
|
||||||
|
{
|
||||||
|
fs::remove_all(fs::get_cache_dir() + "/ASMJIT/", false);
|
||||||
|
|
||||||
|
return fs::file(fmt::format("%s/ASMJIT/.objects", fs::get_cache_dir()), fs::rewrite + fs::append);
|
||||||
|
}();
|
||||||
|
|
||||||
|
if (s_asm)
|
||||||
|
{
|
||||||
|
// Dump object: addr + size + bytes
|
||||||
|
s_asm.write(fmt::format("%s%s%s",
|
||||||
|
std::string_view(reinterpret_cast<char*>(&func), 8),
|
||||||
|
std::string_view(reinterpret_cast<char*>(&size), 8),
|
||||||
|
std::string_view(reinterpret_cast<char*>(func), size)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s_asm && name[0] != '_')
|
||||||
|
{
|
||||||
|
// Save some objects separately
|
||||||
|
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), name), fs::rewrite);
|
||||||
|
|
||||||
|
if (dump)
|
||||||
|
{
|
||||||
|
dump.write(reinterpret_cast<uchar*>(func), size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
static const fs::file s_map(fmt::format("/tmp/perf-%d.map", getpid()), fs::rewrite + fs::append);
|
static const fs::file s_map(fmt::format("/tmp/perf-%d.map", getpid()), fs::rewrite + fs::append);
|
||||||
|
|
||||||
@ -146,17 +177,6 @@ void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!dump_name.empty())
|
|
||||||
{
|
|
||||||
// If directory ASMJIT doesn't exist, nothing will be written
|
|
||||||
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), dump_name), fs::rewrite);
|
|
||||||
|
|
||||||
if (dump)
|
|
||||||
{
|
|
||||||
dump.write(p, codeSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,8 +77,6 @@ struct jit_runtime_base
|
|||||||
const asmjit::Environment& environment() const noexcept;
|
const asmjit::Environment& environment() const noexcept;
|
||||||
void* _add(asmjit::CodeHolder* code) noexcept;
|
void* _add(asmjit::CodeHolder* code) noexcept;
|
||||||
virtual uchar* _alloc(usz size, usz align) noexcept = 0;
|
virtual uchar* _alloc(usz size, usz align) noexcept = 0;
|
||||||
|
|
||||||
std::string_view dump_name;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// ASMJIT runtime for emitting code in a single 2G region
|
// ASMJIT runtime for emitting code in a single 2G region
|
||||||
@ -257,7 +255,6 @@ inline FT build_function_asm(std::string_view name, F&& builder)
|
|||||||
builder(compiler, args);
|
builder(compiler, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
rt.dump_name = name;
|
|
||||||
const auto result = rt._add(&code);
|
const auto result = rt._add(&code);
|
||||||
jit_announce(result, code.codeSize(), name);
|
jit_announce(result, code.codeSize(), name);
|
||||||
return reinterpret_cast<FT>(uptr(result));
|
return reinterpret_cast<FT>(uptr(result));
|
||||||
|
195
objdump.cpp
Normal file
195
objdump.cpp
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
// objdump injection utility for Linux perf tools.
|
||||||
|
// Profiling JIT generated code is always problematic.
|
||||||
|
// On Linux, perf annotation tools do not automatically
|
||||||
|
// disassemble runtime-generated code.
|
||||||
|
// However, it's possible to override objdump utility
|
||||||
|
// which is used to disassemeble executables.
|
||||||
|
// This tool intercepts objdump commands, and if they
|
||||||
|
// correspond to JIT generated objects in RPCS3,
|
||||||
|
// it should be able to correctly disassemble them.
|
||||||
|
// Usage:
|
||||||
|
// 1. Make sure ~/.cache/rpcs3/ASMJIT directory exists.
|
||||||
|
// 2. Build this utility, for example:
|
||||||
|
// g++-11 objdump.cpp -o objdump
|
||||||
|
// 3. Run perf, for example:
|
||||||
|
// perf record -b -p `pgrep rpcs3`
|
||||||
|
// 4. Specify --objdump override, for example:
|
||||||
|
// perf report --objdump=./objdump --gtk
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/file.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <sys/sendfile.h>
|
||||||
|
#include <spawn.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <charconv>
|
||||||
|
|
||||||
|
std::string to_hex(std::uint64_t value, bool prfx = true)
|
||||||
|
{
|
||||||
|
char buf[20]{}, *ptr = buf + 19;
|
||||||
|
do *--ptr = "0123456789abcdef"[value % 16], value /= 16; while (value);
|
||||||
|
if (!prfx) return ptr;
|
||||||
|
*--ptr = 'x';
|
||||||
|
*--ptr = '0';
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
std::string home;
|
||||||
|
|
||||||
|
if (const char* d = ::getenv("XDG_CACHE_HOME"))
|
||||||
|
home = d;
|
||||||
|
else if (const char* d = ::getenv("XDG_CONFIG_HOME"))
|
||||||
|
home = d;
|
||||||
|
else if (const char* d = ::getenv("HOME"))
|
||||||
|
home = d, home += "/.cache";
|
||||||
|
|
||||||
|
// Get cache path
|
||||||
|
home += "/rpcs3/ASMJIT/";
|
||||||
|
|
||||||
|
// Get object names
|
||||||
|
int fd = open((home + ".objects").c_str(), O_RDONLY);
|
||||||
|
|
||||||
|
if (fd < 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
// Addr -> offset;size in .objects
|
||||||
|
std::unordered_map<std::uint64_t, std::pair<std::uint64_t, std::uint64_t>> objects;
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
// Size is name size, not object size
|
||||||
|
std::uint64_t ptr, size;
|
||||||
|
if (read(fd, &ptr, 8) != 8 || read(fd, &size, 8) != 8)
|
||||||
|
break;
|
||||||
|
std::uint64_t off = lseek(fd, 0, SEEK_CUR);
|
||||||
|
objects.emplace(ptr, std::make_pair(off, size));
|
||||||
|
lseek(fd, size, SEEK_CUR);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> args;
|
||||||
|
|
||||||
|
std::uint64_t addr = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < argc; i++)
|
||||||
|
{
|
||||||
|
// Replace args
|
||||||
|
std::string arg = argv[i];
|
||||||
|
|
||||||
|
if (arg.find("--start-address=0x") == 0)
|
||||||
|
{
|
||||||
|
std::from_chars(arg.data() + strlen("--start-address=0x"), arg.data() + arg.size(), addr, 16);
|
||||||
|
|
||||||
|
if (objects.count(addr))
|
||||||
|
{
|
||||||
|
// Extract object into a tmp file
|
||||||
|
lseek(fd, objects[addr].first, SEEK_SET);
|
||||||
|
const int fd2 = open("/tmp/rpcs3.objdump.tmp", O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
||||||
|
sendfile(fd2, fd, nullptr, objects[addr].second);
|
||||||
|
close(fd2);
|
||||||
|
|
||||||
|
args.emplace_back("--adjust-vma=" + to_hex(addr));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (objects.count(addr) && arg.find("--stop-address=0x") == 0)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (objects.count(addr) && arg == "-d")
|
||||||
|
{
|
||||||
|
arg = "-D";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arg == "-l")
|
||||||
|
{
|
||||||
|
arg = "-Mintel,x86-64";
|
||||||
|
}
|
||||||
|
|
||||||
|
args.emplace_back(std::move(arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (objects.count(addr))
|
||||||
|
{
|
||||||
|
args.pop_back();
|
||||||
|
args.emplace_back("-b");
|
||||||
|
args.emplace_back("binary");
|
||||||
|
args.emplace_back("-m");
|
||||||
|
args.emplace_back("i386");
|
||||||
|
args.emplace_back("/tmp/rpcs3.objdump.tmp");
|
||||||
|
}
|
||||||
|
|
||||||
|
args[0] = "/usr/bin/objdump";
|
||||||
|
|
||||||
|
std::vector<char*> new_argv;
|
||||||
|
|
||||||
|
for (auto& arg : args)
|
||||||
|
{
|
||||||
|
new_argv.push_back(arg.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
new_argv.push_back(nullptr);
|
||||||
|
|
||||||
|
if (objects.count(addr))
|
||||||
|
{
|
||||||
|
int fds[2];
|
||||||
|
pipe(fds);
|
||||||
|
|
||||||
|
// objdump is broken; fix address truncation
|
||||||
|
if (fork() > 0)
|
||||||
|
{
|
||||||
|
close(fds[1]);
|
||||||
|
char c = 0;
|
||||||
|
std::string buf;
|
||||||
|
|
||||||
|
while (read(fds[0], &c, 1) != 0)
|
||||||
|
{
|
||||||
|
if (c)
|
||||||
|
{
|
||||||
|
buf += c;
|
||||||
|
|
||||||
|
if (c == '\n')
|
||||||
|
{
|
||||||
|
// Replace broken address
|
||||||
|
if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] >= 'a' && buf[0] <= 'f'))
|
||||||
|
{
|
||||||
|
std::uint64_t ptr = -1;
|
||||||
|
auto cvt = std::from_chars(buf.data(), buf.data() + buf.size(), ptr, 16);
|
||||||
|
|
||||||
|
if (cvt.ec == std::errc() && ptr < addr)
|
||||||
|
{
|
||||||
|
auto fix = to_hex((ptr - std::uint32_t(addr)) + addr, false);
|
||||||
|
write(STDOUT_FILENO, fix.data(), fix.size());
|
||||||
|
buf = std::string(cvt.ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write(STDOUT_FILENO, buf.data(), buf.size());
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while ((dup2(fds[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {}
|
||||||
|
close(fds[1]);
|
||||||
|
close(fds[0]);
|
||||||
|
// Fallthrough
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return execv(new_argv[0], new_argv.data());
|
||||||
|
}
|
@ -3487,7 +3487,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Get function chunk name
|
// Get function chunk name
|
||||||
const std::string name = fmt::format("spu-cx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start}));
|
const std::string name = fmt::format("__spu-cx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start}));
|
||||||
llvm::Function* result = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(name, chunk_type).getCallee());
|
llvm::Function* result = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(name, chunk_type).getCallee());
|
||||||
|
|
||||||
// Set parameters
|
// Set parameters
|
||||||
@ -3512,7 +3512,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||||||
// 5. $3
|
// 5. $3
|
||||||
const auto func_type = get_ftype<u32[4], u8*, u8*, u32, u32[4], u32[4]>();
|
const auto func_type = get_ftype<u32[4], u8*, u8*, u32, u32[4], u32[4]>();
|
||||||
|
|
||||||
const std::string fname = fmt::format("spu-fx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start}));
|
const std::string fname = fmt::format("__spu-fx%05x-%s", addr, fmt::base57(be_t<u64>{m_hash_start}));
|
||||||
llvm::Function* fn = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(fname, func_type).getCallee());
|
llvm::Function* fn = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(fname, func_type).getCallee());
|
||||||
|
|
||||||
fn->setLinkage(llvm::GlobalValue::InternalLinkage);
|
fn->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||||
@ -4381,7 +4381,7 @@ public:
|
|||||||
sha1_finish(&ctx, output);
|
sha1_finish(&ctx, output);
|
||||||
|
|
||||||
m_hash.clear();
|
m_hash.clear();
|
||||||
fmt::append(m_hash, "spu-0x%05x-%s", func.entry_point, fmt::base57(output));
|
fmt::append(m_hash, "__spu-0x%05x-%s", func.entry_point, fmt::base57(output));
|
||||||
|
|
||||||
be_t<u64> hash_start;
|
be_t<u64> hash_start;
|
||||||
std::memcpy(&hash_start, output, sizeof(hash_start));
|
std::memcpy(&hash_start, output, sizeof(hash_start));
|
||||||
@ -4649,7 +4649,7 @@ public:
|
|||||||
m_ir->CreateUnreachable();
|
m_ir->CreateUnreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_dispatch = cast<Function>(_module->getOrInsertFunction("spu-null", entry_chunk->chunk->getFunctionType()).getCallee());
|
m_dispatch = cast<Function>(_module->getOrInsertFunction("__spu-null", entry_chunk->chunk->getFunctionType()).getCallee());
|
||||||
m_dispatch->setLinkage(llvm::GlobalValue::InternalLinkage);
|
m_dispatch->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||||
m_dispatch->setCallingConv(entry_chunk->chunk->getCallingConv());
|
m_dispatch->setCallingConv(entry_chunk->chunk->getCallingConv());
|
||||||
set_function(m_dispatch);
|
set_function(m_dispatch);
|
||||||
|
Loading…
Reference in New Issue
Block a user