1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 12:12:50 +01:00
rpcs3/Utilities/JITLLVM.cpp

806 lines
19 KiB
C++
Raw Normal View History

2020-12-12 13:01:29 +01:00
#include "util/types.hpp"
#include "util/sysinfo.hpp"
#include "JIT.h"
#include "StrFmt.h"
#include "File.h"
2020-03-07 10:29:23 +01:00
#include "util/logs.hpp"
#include "mutex.h"
#include "util/vm.hpp"
#include "util/asm.hpp"
#include "Crypto/unzip.h"
#include <charconv>
LOG_CHANNEL(jit_log, "JIT");
#ifdef LLVM_AVAILABLE
#include <unordered_map>
#ifdef _MSC_VER
#pragma warning(push, 0)
2019-11-30 00:11:28 +01:00
#else
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall"
#pragma GCC diagnostic ignored "-Wextra"
#pragma GCC diagnostic ignored "-Wold-style-cast"
2021-03-05 20:05:37 +01:00
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
2021-03-23 20:32:50 +01:00
#pragma GCC diagnostic ignored "-Wredundant-decls"
2021-03-30 17:31:46 +02:00
#pragma GCC diagnostic ignored "-Weffc++"
#pragma GCC diagnostic ignored "-Wmissing-noreturn"
#endif
2024-03-10 12:56:16 +01:00
#include <llvm/Support/CodeGen.h>
#include "llvm/Support/TargetSelect.h"
2023-09-25 17:04:48 +02:00
#include "llvm/TargetParser/Host.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolSize.h"
#ifdef _MSC_VER
#pragma warning(pop)
2019-11-30 00:11:28 +01:00
#else
#pragma GCC diagnostic pop
#endif
const bool jit_initialize = []() -> bool
{
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
llvm::InitializeNativeTargetAsmParser();
LLVMLinkInMCJIT();
return true;
}();
[[noreturn]] static void null(const char* name)
{
fmt::throw_exception("Null function: %s", name);
}
namespace vm
{
extern u8* const g_sudo_addr;
}
static shared_mutex null_mtx;
static std::unordered_map<std::string, u64> null_funcs;
static u64 make_null_function(const std::string& name)
{
if (name.starts_with("__0x"))
{
u32 addr = -1;
auto res = std::from_chars(name.c_str() + 4, name.c_str() + name.size(), addr, 16);
if (res.ec == std::errc() && res.ptr == name.c_str() + name.size() && addr < 0x8000'0000)
{
// Point the garbage to reserved, non-executable memory
return reinterpret_cast<u64>(vm::g_sudo_addr + addr);
}
}
std::lock_guard lock(null_mtx);
if (u64& func_ptr = null_funcs[name]) [[likely]]
{
// Already exists
return func_ptr;
}
else
{
using namespace asmjit;
// Build a "null" function that contains its name
const auto func = build_function_asm<void (*)()>("NULL", [&](native_asm& c, auto& args)
{
#if defined(ARCH_X64)
Label data = c.newLabel();
c.lea(args[0], x86::qword_ptr(data, 0));
2021-12-28 20:25:36 +01:00
c.jmp(Imm(&null));
c.align(AlignMode::kCode, 16);
c.bind(data);
// Copy function name bytes
for (char ch : name)
c.db(ch);
c.db(0);
2021-12-28 20:25:36 +01:00
c.align(AlignMode::kData, 16);
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#else
// AArch64 implementation
Label data = c.newLabel();
Label jump_address = c.newLabel();
c.ldr(args[0], arm::ptr(data, 0));
c.ldr(a64::x14, arm::ptr(jump_address, 0));
c.br(a64::x14);
// Data frame
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
c.align(AlignMode::kCode, 16);
c.bind(jump_address);
c.embedUInt64(reinterpret_cast<u64>(&null));
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
c.align(AlignMode::kData, 16);
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
c.bind(data);
c.embed(name.c_str(), name.size());
c.embedUInt8(0U);
c.align(AlignMode::kData, 16);
#endif
});
func_ptr = reinterpret_cast<u64>(func);
return func_ptr;
}
}
struct JITAnnouncer : llvm::JITEventListener
{
void notifyObjectLoaded(u64, const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& info) override
{
using namespace llvm;
object::OwningBinary<object::ObjectFile> debug_obj_ = info.getObjectForDebug(obj);
if (!debug_obj_.getBinary())
{
#ifdef __linux__
jit_log.error("LLVM: Failed to announce JIT events (no debug object)");
#endif
return;
}
const object::ObjectFile& debug_obj = *debug_obj_.getBinary();
for (const auto& [sym, size] : computeSymbolSizes(debug_obj))
{
Expected<object::SymbolRef::Type> type_ = sym.getType();
if (!type_ || *type_ != object::SymbolRef::ST_Function)
continue;
Expected<StringRef> name = sym.getName();
if (!name)
continue;
Expected<u64> addr = sym.getAddress();
if (!addr)
continue;
jit_announce(*addr, size, {name->data(), name->size()});
}
}
};
// Simple memory manager
struct MemoryManager1 : llvm::RTDyldMemoryManager
2017-06-24 17:36:49 +02:00
{
// 256 MiB for code or data
static constexpr u64 c_max_size = 0x20000000 / 2;
// Allocation unit (2M)
static constexpr u64 c_page_size = 2 * 1024 * 1024;
2017-06-24 17:36:49 +02:00
// Reserve 512 MiB
u8* const ptr = static_cast<u8*>(utils::memory_reserve(c_max_size * 2));
2017-06-24 17:36:49 +02:00
u64 code_ptr = 0;
u64 data_ptr = c_max_size;
2017-06-24 17:36:49 +02:00
MemoryManager1() = default;
2021-04-03 18:38:02 +02:00
MemoryManager1(const MemoryManager1&) = delete;
MemoryManager1& operator=(const MemoryManager1&) = delete;
~MemoryManager1() override
{
// Hack: don't release to prevent reuse of address space, see jit_announce
utils::memory_decommit(ptr, c_max_size * 2);
}
llvm::JITSymbol findSymbol(const std::string& name) override
{
u64 addr = RTDyldMemoryManager::getSymbolAddress(name);
if (!addr)
{
addr = make_null_function(name);
if (!addr)
{
fmt::throw_exception("Failed to link '%s'", name);
}
2017-03-11 17:49:32 +01:00
}
return {addr, llvm::JITSymbolFlags::Exported};
}
2020-12-12 14:29:55 +01:00
u8* allocate(u64& oldp, uptr size, uint align, utils::protection prot)
{
if (align > c_page_size)
{
2021-01-12 13:06:51 +01:00
jit_log.fatal("Unsupported alignment (size=0x%x, align=0x%x)", size, align);
return nullptr;
}
const u64 olda = utils::align(oldp, align);
const u64 newp = utils::align(olda + size, align);
if ((newp - 1) / c_max_size != oldp / c_max_size)
{
2021-01-12 13:06:51 +01:00
jit_log.fatal("Out of memory (size=0x%x, align=0x%x)", size, align);
return nullptr;
}
if ((oldp - 1) / c_page_size != (newp - 1) / c_page_size)
2017-03-20 14:23:50 +01:00
{
// Allocate pages on demand
const u64 pagea = utils::align(oldp, c_page_size);
const u64 psize = utils::align(newp - pagea, c_page_size);
utils::memory_commit(this->ptr + pagea, psize, prot);
2017-03-20 14:23:50 +01:00
}
// Update allocation counter
oldp = newp;
return this->ptr + olda;
}
2021-03-05 20:05:37 +01:00
u8* allocateCodeSection(uptr size, uint align, uint /*sec_id*/, llvm::StringRef /*sec_name*/) override
{
return allocate(code_ptr, size, align, utils::protection::wx);
}
2021-03-05 20:05:37 +01:00
u8* allocateDataSection(uptr size, uint align, uint /*sec_id*/, llvm::StringRef /*sec_name*/, bool /*is_ro*/) override
{
return allocate(data_ptr, size, align, utils::protection::rw);
}
bool finalizeMemory(std::string* = nullptr) override
{
return false;
}
2021-03-05 20:05:37 +01:00
void registerEHFrames(u8*, u64, usz) override
{
}
void deregisterEHFrames() override
{
}
};
// Simple memory manager
struct MemoryManager2 : llvm::RTDyldMemoryManager
{
MemoryManager2() = default;
~MemoryManager2() override
{
}
llvm::JITSymbol findSymbol(const std::string& name) override
{
u64 addr = RTDyldMemoryManager::getSymbolAddress(name);
if (!addr)
{
addr = make_null_function(name);
if (!addr)
{
fmt::throw_exception("Failed to link '%s' (MM2)", name);
}
}
return {addr, llvm::JITSymbolFlags::Exported};
}
2021-03-05 20:05:37 +01:00
u8* allocateCodeSection(uptr size, uint align, uint /*sec_id*/, llvm::StringRef /*sec_name*/) override
{
return jit_runtime::alloc(size, align, true);
}
2021-03-05 20:05:37 +01:00
u8* allocateDataSection(uptr size, uint align, uint /*sec_id*/, llvm::StringRef /*sec_name*/, bool /*is_ro*/) override
{
return jit_runtime::alloc(size, align, false);
}
bool finalizeMemory(std::string* = nullptr) override
{
return false;
}
2021-03-05 20:05:37 +01:00
void registerEHFrames(u8*, u64, usz) override
{
}
void deregisterEHFrames() override
{
}
};
// Helper class
class ObjectCache final : public llvm::ObjectCache
{
const std::string& m_path;
const std::add_pointer_t<jit_compiler> m_compiler = nullptr;
public:
ObjectCache(const std::string& path, jit_compiler* compiler = nullptr)
: m_path(path)
, m_compiler(compiler)
{
}
~ObjectCache() override = default;
void notifyObjectCompiled(const llvm::Module* _module, llvm::MemoryBufferRef obj) override
{
std::string name = m_path;
name.append(_module->getName().data());
//fs::file(name, fs::rewrite).write(obj.getBufferStart(), obj.getBufferSize());
name.append(".gz");
if (!obj.getBufferSize())
{
jit_log.error("LLVM: Nothing to write: %s", name);
return;
}
ensure(m_compiler);
fs::file module_file(name, fs::rewrite);
if (!module_file)
{
jit_log.error("LLVM: Failed to create module file: %s (%s)", name, fs::g_tls_error);
return;
}
// Bold assumption about upper limit of space consumption
const usz max_size = obj.getBufferSize() * 4;
if (!m_compiler->add_sub_disk_space(0 - max_size))
{
jit_log.error("LLVM: Failed to create module file: %s (not enough disk space left)", name);
return;
}
if (!zip(obj.getBufferStart(), obj.getBufferSize(), module_file))
{
jit_log.error("LLVM: Failed to compress module: %s", _module->getName().data());
module_file.close();
fs::remove_file(name);
return;
}
2024-07-27 11:20:49 +02:00
jit_log.trace("LLVM: Created module: %s", _module->getName().data());
// Restore space that was overestimated
ensure(m_compiler->add_sub_disk_space(max_size - module_file.size()));
}
2017-07-15 11:20:40 +02:00
static std::unique_ptr<llvm::MemoryBuffer> load(const std::string& path)
{
if (fs::file cached{path + ".gz", fs::read})
{
const std::vector<u8> cached_data = cached.to_vector<u8>();
if (cached_data.empty()) [[unlikely]]
{
return nullptr;
}
const std::vector<u8> out = unzip(cached_data);
if (out.empty())
{
jit_log.error("LLVM: Failed to unzip module: '%s'", path);
return nullptr;
}
auto buf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(out.size());
std::memcpy(buf->getBufferStart(), out.data(), out.size());
return buf;
}
2017-07-15 11:20:40 +02:00
if (fs::file cached{path, fs::read})
{
if (cached.size() == 0) [[unlikely]]
{
return nullptr;
}
2018-05-01 12:20:36 +02:00
auto buf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(cached.size());
cached.read(buf->getBufferStart(), buf->getBufferSize());
return buf;
}
2017-07-15 11:20:40 +02:00
return nullptr;
}
std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* _module) override
2017-07-15 11:20:40 +02:00
{
std::string path = m_path;
path.append(_module->getName().data());
2017-07-15 11:20:40 +02:00
if (auto buf = load(path))
{
jit_log.notice("LLVM: Loaded module: %s", _module->getName().data());
2017-07-15 11:20:40 +02:00
return buf;
}
2017-07-15 11:20:40 +02:00
return nullptr;
}
};
std::string jit_compiler::cpu(const std::string& _cpu)
{
std::string m_cpu = _cpu;
2017-03-14 13:23:07 +01:00
if (m_cpu.empty())
{
m_cpu = llvm::sys::getHostCPUName().str();
if (m_cpu == "generic")
{
// Try to detect a best match based on other criteria
m_cpu = fallback_cpu_detection();
}
if (m_cpu == "sandybridge" ||
m_cpu == "ivybridge" ||
m_cpu == "haswell" ||
m_cpu == "broadwell" ||
m_cpu == "skylake" ||
m_cpu == "skylake-avx512" ||
2019-03-05 19:46:58 +01:00
m_cpu == "cascadelake" ||
2019-12-20 19:11:07 +01:00
m_cpu == "cooperlake" ||
2018-05-01 12:20:36 +02:00
m_cpu == "cannonlake" ||
m_cpu == "icelake" ||
m_cpu == "icelake-client" ||
2019-12-20 19:11:07 +01:00
m_cpu == "icelake-server" ||
m_cpu == "tigerlake" ||
m_cpu == "rocketlake" ||
m_cpu == "alderlake" ||
m_cpu == "raptorlake" ||
m_cpu == "meteorlake")
{
2018-05-01 12:20:36 +02:00
// Downgrade if AVX is not supported by some chips
if (!utils::has_avx())
{
m_cpu = "nehalem";
}
}
2018-05-01 12:20:36 +02:00
if (m_cpu == "skylake-avx512" ||
2019-03-05 19:46:58 +01:00
m_cpu == "cascadelake" ||
2019-12-20 19:11:07 +01:00
m_cpu == "cooperlake" ||
2018-05-01 12:20:36 +02:00
m_cpu == "cannonlake" ||
m_cpu == "icelake" ||
m_cpu == "icelake-client" ||
2019-12-20 19:11:07 +01:00
m_cpu == "icelake-server" ||
m_cpu == "tigerlake" ||
m_cpu == "rocketlake")
2018-05-01 12:20:36 +02:00
{
// Downgrade if AVX-512 is disabled or not supported
if (!utils::has_avx512())
2018-05-01 12:20:36 +02:00
{
m_cpu = "skylake";
}
}
2019-11-14 17:09:34 +01:00
if (m_cpu == "znver1" && utils::has_clwb())
{
// Upgrade
m_cpu = "znver2";
}
if ((m_cpu == "znver3" || m_cpu == "goldmont" || m_cpu == "alderlake" || m_cpu == "raptorlake" || m_cpu == "meteorlake") && utils::has_avx512_icl())
{
// Upgrade
m_cpu = "icelake-client";
}
if (m_cpu == "goldmont" && utils::has_avx2())
{
// Upgrade
m_cpu = "alderlake";
}
2017-03-14 13:23:07 +01:00
}
return m_cpu;
}
std::string jit_compiler::triple1()
{
#if defined(_WIN32)
return llvm::Triple::normalize(llvm::sys::getProcessTriple());
#elif defined(__APPLE__) && defined(ARCH_X64)
return llvm::Triple::normalize("x86_64-unknown-linux-gnu");
#elif defined(__APPLE__) && defined(ARCH_ARM64)
return llvm::Triple::normalize("aarch64-unknown-linux-android"); // Set environment to android to reserve x18
#else
return llvm::Triple::normalize(llvm::sys::getProcessTriple());
#endif
}
std::string jit_compiler::triple2()
{
#if defined(_WIN32) && defined(ARCH_X64)
return llvm::Triple::normalize("x86_64-unknown-linux-gnu");
#elif defined(_WIN32) && defined(ARCH_ARM64)
return llvm::Triple::normalize("aarch64-unknown-linux-gnu");
#elif defined(__APPLE__) && defined(ARCH_X64)
return llvm::Triple::normalize("x86_64-unknown-linux-gnu");
#elif defined(__APPLE__) && defined(ARCH_ARM64)
return llvm::Triple::normalize("aarch64-unknown-linux-android"); // Set environment to android to reserve x18
#else
return llvm::Triple::normalize(llvm::sys::getProcessTriple());
#endif
}
bool jit_compiler::add_sub_disk_space(ssz space)
{
if (space >= 0)
{
ensure(m_disk_space.fetch_add(space) < ~static_cast<usz>(space));
return true;
}
return m_disk_space.fetch_op([sub_size = static_cast<usz>(0 - space)](usz& val)
{
if (val >= sub_size)
{
val -= sub_size;
return true;
}
return false;
}).second;
}
jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, const std::string& _cpu, u32 flags)
: m_context(new llvm::LLVMContext)
, m_cpu(cpu(_cpu))
{
2017-02-26 16:56:31 +01:00
std::string result;
auto null_mod = std::make_unique<llvm::Module> ("null_", *m_context);
null_mod->setTargetTriple(jit_compiler::triple1());
std::unique_ptr<llvm::RTDyldMemoryManager> mem;
if (_link.empty())
2017-06-24 17:36:49 +02:00
{
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
if (flags & 0x1)
{
mem = std::make_unique<MemoryManager1>();
}
else
{
mem = std::make_unique<MemoryManager2>();
null_mod->setTargetTriple(jit_compiler::triple2());
}
2017-06-24 17:36:49 +02:00
}
else
{
mem = std::make_unique<MemoryManager1>();
}
{
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
2017-06-24 17:36:49 +02:00
.setErrorStr(&result)
.setEngineKind(llvm::EngineKind::JIT)
.setMCJITMemoryManager(std::move(mem))
2024-03-10 12:56:16 +01:00
#if LLVM_VERSION_MAJOR < 18
2017-06-24 17:36:49 +02:00
.setOptLevel(llvm::CodeGenOpt::Aggressive)
2024-03-10 12:56:16 +01:00
#else
.setOptLevel(llvm::CodeGenOptLevel::Aggressive)
#endif
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
#ifdef __APPLE__
//.setCodeModel(llvm::CodeModel::Large)
#endif
.setRelocationModel(llvm::Reloc::Model::PIC_)
2017-06-24 17:36:49 +02:00
.setMCPU(m_cpu)
.create());
}
2017-06-24 17:36:49 +02:00
if (!_link.empty())
{
for (auto&& [name, addr] : _link)
2017-06-24 17:36:49 +02:00
{
m_engine->updateGlobalMapping(name, addr);
2017-06-24 17:36:49 +02:00
}
}
if (!_link.empty() || !(flags & 0x1))
{
m_engine->RegisterJITEventListener(llvm::JITEventListener::createIntelJITEventListener());
m_engine->RegisterJITEventListener(new JITAnnouncer);
}
if (!m_engine)
{
fmt::throw_exception("LLVM: Failed to create ExecutionEngine: %s", result);
}
fs::device_stat stats{};
if (fs::statfs(fs::get_cache_dir(), stats))
{
m_disk_space = stats.avail_free / 4;
}
2017-06-24 17:36:49 +02:00
}
2017-06-24 17:36:49 +02:00
jit_compiler::~jit_compiler()
{
2017-02-26 16:56:31 +01:00
}
void jit_compiler::add(std::unique_ptr<llvm::Module> _module, const std::string& path)
2017-02-26 16:56:31 +01:00
{
ObjectCache cache{path, this};
m_engine->setObjectCache(&cache);
2017-02-26 16:56:31 +01:00
const auto ptr = _module.get();
m_engine->addModule(std::move(_module));
m_engine->generateCodeForModule(ptr);
m_engine->setObjectCache(nullptr);
2017-02-26 16:56:31 +01:00
2018-05-01 12:20:36 +02:00
for (auto& func : ptr->functions())
{
// Delete IR to lower memory consumption
func.deleteBody();
}
}
void jit_compiler::add(std::unique_ptr<llvm::Module> _module)
2018-05-01 12:20:36 +02:00
{
const auto ptr = _module.get();
m_engine->addModule(std::move(_module));
2018-05-01 12:20:36 +02:00
m_engine->generateCodeForModule(ptr);
for (auto& func : ptr->functions())
2017-02-26 16:56:31 +01:00
{
// Delete IR to lower memory consumption
func.deleteBody();
2017-02-26 16:56:31 +01:00
}
}
bool jit_compiler::add(const std::string& path)
2017-07-15 11:20:40 +02:00
{
auto cache = ObjectCache::load(path);
if (!cache)
{
jit_log.error("ObjectCache: Failed to read file. (path='%s', error=%s)", path, fs::g_tls_error);
return false;
}
if (auto object_file = llvm::object::ObjectFile::createObjectFile(*cache))
{
m_engine->addObjectFile(llvm::object::OwningBinary<llvm::object::ObjectFile>(std::move(*object_file), std::move(cache)));
jit_log.trace("ObjectCache: Successfully added %s", path);
return true;
}
else
{
jit_log.error("ObjectCache: Adding failed: %s", path);
return false;
}
2017-07-15 11:20:40 +02:00
}
bool jit_compiler::check(const std::string& path)
{
if (auto cache = ObjectCache::load(path))
{
if (auto object_file = llvm::object::ObjectFile::createObjectFile(*cache))
{
return true;
}
if (fs::remove_file(path))
{
jit_log.error("ObjectCache: Removed damaged file: %s", path);
}
}
return false;
}
2022-08-17 15:53:05 +02:00
void jit_compiler::update_global_mapping(const std::string& name, u64 addr)
{
m_engine->updateGlobalMapping(name, addr);
}
2017-06-24 17:36:49 +02:00
void jit_compiler::fin()
2017-02-26 16:56:31 +01:00
{
m_engine->finalizeObject();
}
2017-06-29 16:25:39 +02:00
u64 jit_compiler::get(const std::string& name)
{
2017-06-29 16:25:39 +02:00
return m_engine->getGlobalValueAddress(name);
2017-06-24 17:36:49 +02:00
}
llvm::StringRef fallback_cpu_detection()
{
#if defined (ARCH_X64)
// If we got here we either have a very old and outdated CPU or a new CPU that has not been seen by LLVM yet.
const std::string brand = utils::get_cpu_brand();
const auto family = utils::get_cpu_family();
const auto model = utils::get_cpu_model();
2024-10-06 17:06:50 +02:00
jit_log.error("CPU wasn't identified by LLVM, brand = %s, family = 0x%x, model = 0x%x", brand, family, model);
if (brand.starts_with("AMD"))
{
switch (family)
{
2024-08-03 15:27:33 +02:00
case 0x10:
2024-10-06 17:06:50 +02:00
case 0x12: // Unimplemented in LLVM
2024-08-03 15:27:33 +02:00
return "amdfam10";
case 0x15:
// Bulldozer class, includes piledriver, excavator, steamroller, etc
return utils::has_avx2() ? "bdver4" : "bdver1";
case 0x17:
case 0x18:
// No major differences between znver1 and znver2, return the lesser
return "znver1";
case 0x19:
// Models 0-Fh are zen3 as are 20h-60h. The rest we can assume are zen4
2024-10-06 17:06:50 +02:00
return ((model >= 0x20 && model <= 0x60) || model < 0x10) ? "znver3" : "znver4";
case 0x1a:
// Only one generation in family 1a so far, zen5, which we do not support yet.
// Return zen4 as a workaround until the next LLVM upgrade.
return "znver4";
default:
2024-10-06 17:06:50 +02:00
// Safest guesses
return utils::has_avx512() ? "znver4" :
utils::has_avx2() ? "znver1" :
utils::has_avx() ? "bdver1" :
"nehalem";
}
}
else if (brand.find("Intel") != std::string::npos)
2024-08-03 04:09:10 +02:00
{
if (!utils::has_avx())
{
return "nehalem";
}
if (!utils::has_avx2())
{
return "ivybridge";
}
if (!utils::has_avx512())
{
return "skylake";
}
if (utils::has_avx512_icl())
{
return "cannonlake";
}
return "icelake-client";
}
else if (brand.starts_with("VirtualApple"))
{
// No AVX. This will change in MacOS 15+, at which point we may revise this.
2024-08-03 04:09:10 +02:00
return utils::has_avx() ? "haswell" : "nehalem";
}
#elif defined(ARCH_ARM64)
// TODO: Read the data from /proc/cpuinfo. ARM CPU registers are not accessible from usermode.
// This will be a pain when supporting snapdragon on windows but we'll cross that bridge when we get there.
// Require at least armv8-2a. Older chips are going to be useless anyway.
return "cortex-a78";
#endif
// Failed to guess, use generic fallback
return "generic";
}
2023-12-29 15:14:22 +01:00
#endif // LLVM_AVAILABLE