mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-24 11:43:05 +01:00
Update to LLVM 16.0.0, switch to upstream LLVM
This commit is contained in:
parent
7081b89e97
commit
fb88e1c1c9
@ -7,9 +7,9 @@ git submodule -q update --init --depth 1 $(awk '/path/ && !/llvm/ { print $3 }'
|
||||
|
||||
# Prefer newer Clang than in base system (see also .ci/install-freebsd.sh)
|
||||
# libc++ isn't in llvm* packages, so download manually
|
||||
fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.6/llvm-project-15.0.6.src.tar.xz
|
||||
fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-project-16.0.0.src.tar.xz
|
||||
tar xf llvm*.tar.xz
|
||||
export CC=clang15 CXX=clang++15
|
||||
export CC=clang16 CXX=clang++16
|
||||
cmake -B libcxx_build -G Ninja -S llvm*/libcxx \
|
||||
-DLLVM_CCACHE_BUILD=ON \
|
||||
-DLIBCXX_INCLUDE_BENCHMARKS=OFF \
|
||||
@ -20,7 +20,7 @@ export CXXFLAGS="$CXXFLAGS -nostdinc++ -isystem$PWD/libcxx_prefix/include/c++/v1
|
||||
export LDFLAGS="$LDFLAGS -nostdlib++ -L$PWD/libcxx_prefix/lib -l:libc++.a -lcxxrt"
|
||||
|
||||
CONFIGURE_ARGS="
|
||||
-DWITH_LLVM=OFF
|
||||
-DWITH_LLVM=ON
|
||||
-DUSE_SDL=OFF
|
||||
-DUSE_PRECOMPILED_HEADERS=OFF
|
||||
-DUSE_NATIVE_INSTRUCTIONS=OFF
|
||||
|
@ -9,16 +9,11 @@ if [ -z "$CIRRUS_CI" ]; then
|
||||
cd rpcs3 || exit 1
|
||||
fi
|
||||
|
||||
# Pull all the submodules except llvm, since it is built separately and we just download that build
|
||||
# Pull all the submodules except llvm
|
||||
# Note: Tried to use git submodule status, but it takes over 20 seconds
|
||||
# shellcheck disable=SC2046
|
||||
git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodules)
|
||||
|
||||
# Download pre-compiled llvm libs
|
||||
curl -sLO https://github.com/RPCS3/llvm-mirror/releases/download/custom-build/llvmlibs-linux.tar.gz
|
||||
mkdir llvmlibs
|
||||
tar -xzf ./llvmlibs-linux.tar.gz -C llvmlibs
|
||||
|
||||
mkdir build && cd build || exit 1
|
||||
|
||||
if [ "$COMPILER" = "gcc" ]; then
|
||||
@ -42,8 +37,6 @@ export CFLAGS="$CFLAGS -fuse-ld=${LINKER}"
|
||||
|
||||
cmake .. \
|
||||
-DCMAKE_INSTALL_PREFIX=/usr \
|
||||
-DBUILD_LLVM_SUBMODULE=OFF \
|
||||
-DLLVM_DIR=llvmlibs/lib/cmake/llvm/ \
|
||||
-DUSE_NATIVE_INSTRUCTIONS=OFF \
|
||||
-DUSE_PRECOMPILED_HEADERS=OFF \
|
||||
-DCMAKE_C_FLAGS="$CFLAGS" \
|
||||
|
@ -1,10 +1,10 @@
|
||||
#!/bin/sh -ex
|
||||
|
||||
brew install -f --overwrite llvm@14 nasm ninja git p7zip create-dmg ccache
|
||||
brew install -f --overwrite llvm@16 nasm ninja git p7zip create-dmg ccache
|
||||
|
||||
#/usr/sbin/softwareupdate --install-rosetta --agree-to-license
|
||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||
arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@14 sdl2 glew cmake
|
||||
arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@16 sdl2 glew cmake
|
||||
|
||||
#export MACOSX_DEPLOYMENT_TARGET=12.0
|
||||
export CXX=clang++
|
||||
@ -33,7 +33,7 @@ cd ..
|
||||
export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5"
|
||||
export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2"
|
||||
|
||||
export PATH="$BREW_PATH/opt/llvm@14/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
|
||||
export PATH="$BREW_PATH/opt/llvm@16/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
|
||||
export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib"
|
||||
export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie"
|
||||
export LIBRARY_PATH="$BREW_X64_PATH/lib"
|
||||
|
@ -15,7 +15,7 @@ echo "AVVER=$AVVER" >> ../.ci/ci-vars.env
|
||||
cd bin
|
||||
mkdir "rpcs3.app/Contents/lib/"
|
||||
|
||||
cp "/usr/local/Homebrew/opt/llvm@14/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
|
||||
cp "/usr/local/Homebrew/opt/llvm@16/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
|
||||
|
||||
rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \
|
||||
"rpcs3.app/Contents/Frameworks/QtQml.framework" \
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh -ex
|
||||
|
||||
curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z.sha256"
|
||||
curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z.sha256"
|
||||
curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256"
|
||||
|
@ -9,7 +9,7 @@ export ASSUME_ALWAYS_YES=true
|
||||
pkg info # debug
|
||||
|
||||
# Prefer newer Clang than in base system (see also .ci/build-freebsd.sh)
|
||||
pkg install llvm15
|
||||
pkg install llvm16
|
||||
|
||||
# Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets)
|
||||
pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg
|
||||
|
@ -19,7 +19,7 @@ QT_DECL_URL="${QT_HOST}${QT_PREFIX}qtdeclarative${QT_SUFFIX}"
|
||||
QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}"
|
||||
QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}"
|
||||
QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}"
|
||||
LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z'
|
||||
LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z'
|
||||
GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z'
|
||||
VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe"
|
||||
|
||||
|
@ -61,7 +61,7 @@ windows_task:
|
||||
|
||||
linux_task:
|
||||
container:
|
||||
image: rpcs3/rpcs3-ci-bionic:1.6
|
||||
image: rpcs3/rpcs3-ci-bionic:1.7
|
||||
cpu: 4
|
||||
memory: 16G
|
||||
env:
|
||||
|
34
3rdparty/llvm.cmake
vendored
34
3rdparty/llvm.cmake
vendored
@ -2,7 +2,7 @@ if(WITH_LLVM)
|
||||
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
|
||||
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM)
|
||||
|
||||
if(BUILD_LLVM_SUBMODULE)
|
||||
if(BUILD_LLVM)
|
||||
message(STATUS "LLVM will be built from the submodule.")
|
||||
|
||||
set(LLVM_TARGETS_TO_BUILD "AArch64;X86")
|
||||
@ -38,49 +38,33 @@ if(WITH_LLVM)
|
||||
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})
|
||||
|
||||
# now tries to find LLVM again
|
||||
find_package(LLVM 13.0 CONFIG)
|
||||
find_package(LLVM 16.0 CONFIG)
|
||||
if(NOT LLVM_FOUND)
|
||||
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
|
||||
endif()
|
||||
|
||||
else()
|
||||
message(STATUS "Using prebuilt LLVM")
|
||||
message(STATUS "Using prebuilt or system LLVM")
|
||||
|
||||
if (LLVM_DIR AND NOT IS_ABSOLUTE "${LLVM_DIR}")
|
||||
# change relative LLVM_DIR to be relative to the source dir
|
||||
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
|
||||
endif()
|
||||
|
||||
find_package(LLVM 13.0 CONFIG)
|
||||
find_package(LLVM 16.0 CONFIG)
|
||||
|
||||
if (NOT LLVM_FOUND)
|
||||
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 11)
|
||||
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 11.0. \
|
||||
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
|
||||
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 16)
|
||||
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 16. \
|
||||
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
|
||||
endif()
|
||||
|
||||
message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \
|
||||
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
|
||||
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(LLVM_LIBS LLVMMCJIT)
|
||||
|
||||
if(COMPILER_X86)
|
||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser)
|
||||
endif()
|
||||
|
||||
if(COMPILER_ARM)
|
||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser LLVMAArch64CodeGen LLVMAArch64AsmParser)
|
||||
endif()
|
||||
|
||||
if(WIN32 OR CMAKE_SYSTEM MATCHES "Linux")
|
||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMIntelJITEvents)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM MATCHES "Linux")
|
||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMPerfJITEvents)
|
||||
endif()
|
||||
set(LLVM_LIBS LLVM)
|
||||
|
||||
add_library(3rdparty_llvm INTERFACE)
|
||||
target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS})
|
||||
|
@ -111,7 +111,7 @@ git submodule update --init
|
||||
|
||||
Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`)
|
||||
|
||||
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
|
||||
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
|
||||
|
||||
If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.:
|
||||
* glslang
|
||||
|
@ -14,7 +14,7 @@ endif()
|
||||
|
||||
option(USE_NATIVE_INSTRUCTIONS "USE_NATIVE_INSTRUCTIONS makes rpcs3 compile with -march=native, which is useful for local builds, but not good for packages." ON)
|
||||
option(WITH_LLVM "Enable usage of LLVM library" ON)
|
||||
option(BUILD_LLVM_SUBMODULE "Build LLVM from git submodule" ON)
|
||||
option(BUILD_LLVM "Build LLVM from git submodule" OFF)
|
||||
option(USE_FAUDIO "FAudio audio backend" ON)
|
||||
option(USE_LIBEVDEV "libevdev-based joystick support" ON)
|
||||
option(USE_DISCORD_RPC "Discord rich presence integration" OFF)
|
||||
|
@ -198,6 +198,9 @@ static u8* add_jit_memory(usz size, uint align)
|
||||
});
|
||||
}
|
||||
|
||||
ensure(pointer + pos >= get_jit_memory() + Off);
|
||||
ensure(pointer + pos < get_jit_memory() + Off + 0x40000000);
|
||||
|
||||
return pointer + pos;
|
||||
}
|
||||
|
||||
@ -1319,7 +1322,10 @@ std::string jit_compiler::cpu(const std::string& _cpu)
|
||||
m_cpu == "icelake-client" ||
|
||||
m_cpu == "icelake-server" ||
|
||||
m_cpu == "tigerlake" ||
|
||||
m_cpu == "rocketlake")
|
||||
m_cpu == "rocketlake" ||
|
||||
m_cpu == "alderlake" ||
|
||||
m_cpu == "raptorlake" ||
|
||||
m_cpu == "meteorlake")
|
||||
{
|
||||
// Downgrade if AVX is not supported by some chips
|
||||
if (!utils::has_avx())
|
||||
@ -1350,6 +1356,18 @@ std::string jit_compiler::cpu(const std::string& _cpu)
|
||||
// Upgrade
|
||||
m_cpu = "znver2";
|
||||
}
|
||||
|
||||
if ((m_cpu == "znver3" || m_cpu == "goldmont" || m_cpu == "alderlake" || m_cpu == "raptorlake" || m_cpu == "meteorlake") && utils::has_avx512_icl())
|
||||
{
|
||||
// Upgrade
|
||||
m_cpu = "icelake-client";
|
||||
}
|
||||
|
||||
if (m_cpu == "goldmont" && utils::has_avx2())
|
||||
{
|
||||
// Upgrade
|
||||
m_cpu = "alderlake";
|
||||
}
|
||||
}
|
||||
|
||||
return m_cpu;
|
||||
@ -1362,15 +1380,13 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||
std::string result;
|
||||
|
||||
auto null_mod = std::make_unique<llvm::Module> ("null_", *m_context);
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// Force override triple on Apple arm64 or we'll get linking errors.
|
||||
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple));
|
||||
#endif
|
||||
null_mod->setTargetTriple(llvm::Triple::normalize(llvm::sys::getProcessTriple()));
|
||||
|
||||
std::unique_ptr<llvm::RTDyldMemoryManager> mem;
|
||||
|
||||
if (_link.empty())
|
||||
{
|
||||
std::unique_ptr<llvm::RTDyldMemoryManager> mem;
|
||||
|
||||
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
|
||||
if (flags & 0x1)
|
||||
{
|
||||
mem = std::make_unique<MemoryManager1>();
|
||||
@ -1378,31 +1394,33 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||
else
|
||||
{
|
||||
mem = std::make_unique<MemoryManager2>();
|
||||
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple));
|
||||
#if defined(_WIN32) && defined(ARCH_X64)
|
||||
null_mod->setTargetTriple(llvm::Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mem = std::make_unique<MemoryManager1>();
|
||||
}
|
||||
|
||||
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
|
||||
{
|
||||
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
|
||||
.setErrorStr(&result)
|
||||
.setEngineKind(llvm::EngineKind::JIT)
|
||||
.setMCJITMemoryManager(std::move(mem))
|
||||
.setOptLevel(llvm::CodeGenOpt::Aggressive)
|
||||
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
|
||||
#ifdef __APPLE__
|
||||
.setCodeModel(llvm::CodeModel::Large)
|
||||
#endif
|
||||
.setRelocationModel(llvm::Reloc::Model::PIC_)
|
||||
.setMCPU(m_cpu)
|
||||
.create());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Primary JIT
|
||||
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
|
||||
.setErrorStr(&result)
|
||||
.setEngineKind(llvm::EngineKind::JIT)
|
||||
.setMCJITMemoryManager(std::make_unique<MemoryManager1>())
|
||||
.setOptLevel(llvm::CodeGenOpt::Aggressive)
|
||||
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
|
||||
.setMCPU(m_cpu)
|
||||
.create());
|
||||
|
||||
if (!_link.empty())
|
||||
{
|
||||
for (auto&& [name, addr] : _link)
|
||||
{
|
||||
m_engine->updateGlobalMapping(name, addr);
|
||||
|
@ -16,54 +16,97 @@
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Debug'">%(AdditionalLibraryDirectories);..\llvm_build\Debug\lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>%(AdditionalDependencies);
|
||||
LLVMProfileData.lib;
|
||||
LLVMDebugInfoCodeView.lib;
|
||||
LLVMDebugInfoMSF.lib;
|
||||
LLVMInstrumentation.lib;
|
||||
LLVMMCJIT.lib;
|
||||
LLVMRuntimeDyld.lib;
|
||||
LLVMVectorize.lib;
|
||||
LLVMX86CodeGen.lib;
|
||||
LLVMGlobalISel.lib;
|
||||
LLVMX86Disassembler.lib;
|
||||
LLVMExecutionEngine.lib;
|
||||
LLVMAsmPrinter.lib;
|
||||
LLVMSelectionDAG.lib;
|
||||
LLVMCodeGen.lib;
|
||||
LLVMScalarOpts.lib;
|
||||
LLVMInstCombine.lib;
|
||||
LLVMTransformUtils.lib;
|
||||
LLVMAnalysis.lib;
|
||||
LLVMTarget.lib;
|
||||
LLVMX86Desc.lib;
|
||||
LLVMObject.lib;
|
||||
LLVMMCParser.lib;
|
||||
LLVMBitReader.lib;
|
||||
LLVMCore.lib;
|
||||
LLVMMC.lib;
|
||||
LLVMX86Info.lib;
|
||||
LLVMSupport.lib;
|
||||
LLVMMCDisassembler.lib;
|
||||
LLVMipo.lib;
|
||||
LLVMBinaryFormat.lib;
|
||||
LLVMPasses.lib;
|
||||
LLVMIRReader.lib;
|
||||
LLVMLinker.lib;
|
||||
LLVMAsmParser.lib;
|
||||
LLVMX86AsmParser.lib;
|
||||
LLVMDemangle.lib;
|
||||
LLVMDebugInfoDWARF.lib;
|
||||
LLVMRemarks.lib;
|
||||
LLVMBitstreamReader.lib;
|
||||
LLVMTextAPI.lib;
|
||||
LLVMCFGuard.lib;
|
||||
LLVMAggressiveInstCombine.lib;
|
||||
LLVMBitWriter.lib;
|
||||
LLVMCoroutines.lib;
|
||||
LLVMObjCARCOpts.lib;
|
||||
LLVMIntelJITEvents.lib;
|
||||
LLVMAggressiveInstCombine.lib;
|
||||
LLVMAnalysis.lib;
|
||||
LLVMAsmParser.lib;
|
||||
LLVMAsmPrinter.lib;
|
||||
LLVMBinaryFormat.lib;
|
||||
LLVMBitReader.lib;
|
||||
LLVMBitstreamReader.lib;
|
||||
LLVMBitWriter.lib;
|
||||
LLVMCFGuard.lib;
|
||||
LLVMCFIVerify.lib;
|
||||
LLVMCodeGen.lib;
|
||||
LLVMCore.lib;
|
||||
LLVMCoroutines.lib;
|
||||
LLVMCoverage.lib;
|
||||
LLVMDebugInfoCodeView.lib;
|
||||
LLVMDebuginfod.lib;
|
||||
LLVMDebugInfoDWARF.lib;
|
||||
LLVMDebugInfoGSYM.lib;
|
||||
LLVMDebugInfoLogicalView.lib;
|
||||
LLVMDebugInfoMSF.lib;
|
||||
LLVMDebugInfoPDB.lib;
|
||||
LLVMDemangle.lib;
|
||||
LLVMDiff.lib;
|
||||
LLVMDlltoolDriver.lib;
|
||||
LLVMDWARFLinker.lib;
|
||||
LLVMDWARFLinkerParallel.lib;
|
||||
LLVMDWP.lib;
|
||||
LLVMExecutionEngine.lib;
|
||||
LLVMExegesis.lib;
|
||||
LLVMExegesisX86.lib;
|
||||
LLVMExtensions.lib;
|
||||
LLVMFileCheck.lib;
|
||||
LLVMFrontendHLSL.lib;
|
||||
LLVMFrontendOpenACC.lib;
|
||||
LLVMFrontendOpenMP.lib;
|
||||
LLVMFuzzerCLI.lib;
|
||||
LLVMFuzzMutate.lib;
|
||||
LLVMGlobalISel.lib;
|
||||
LLVMInstCombine.lib;
|
||||
LLVMInstrumentation.lib;
|
||||
LLVMIntelJITEvents.lib;
|
||||
LLVMInterfaceStub.lib;
|
||||
LLVMInterpreter.lib;
|
||||
LLVMipo.lib;
|
||||
LLVMIRPrinter.lib;
|
||||
LLVMIRReader.lib;
|
||||
LLVMJITLink.lib;
|
||||
LLVMLibDriver.lib;
|
||||
LLVMLineEditor.lib;
|
||||
LLVMLinker.lib;
|
||||
LLVMLTO.lib;
|
||||
LLVMMCA.lib;
|
||||
LLVMMCDisassembler.lib;
|
||||
LLVMMCJIT.lib;
|
||||
LLVMMC.lib;
|
||||
LLVMMCParser.lib;
|
||||
LLVMMIRParser.lib;
|
||||
LLVMObjCARCOpts.lib;
|
||||
LLVMObjCopy.lib;
|
||||
LLVMObject.lib;
|
||||
LLVMObjectYAML.lib;
|
||||
LLVMOption.lib;
|
||||
LLVMOrcJIT.lib;
|
||||
LLVMOrcShared.lib;
|
||||
LLVMOrcTargetProcess.lib;
|
||||
LLVMPasses.lib;
|
||||
LLVMProfileData.lib;
|
||||
LLVMRemarks.lib;
|
||||
LLVMRuntimeDyld.lib;
|
||||
LLVMScalarOpts.lib;
|
||||
LLVMSelectionDAG.lib;
|
||||
LLVMSupport.lib;
|
||||
LLVMSymbolize.lib;
|
||||
LLVMTableGenGlobalISel.lib;
|
||||
LLVMTableGen.lib;
|
||||
LLVMTarget.lib;
|
||||
LLVMTargetParser.lib;
|
||||
LLVMTextAPI.lib;
|
||||
LLVMTransformUtils.lib;
|
||||
LLVMVectorize.lib;
|
||||
LLVMWindowsDriver.lib;
|
||||
LLVMWindowsManifest.lib;
|
||||
LLVMX86AsmParser.lib;
|
||||
LLVMX86CodeGen.lib;
|
||||
LLVMX86Desc.lib;
|
||||
LLVMX86Disassembler.lib;
|
||||
LLVMX86Info.lib;
|
||||
LLVMX86TargetMCA.lib;
|
||||
LLVMXRay.lib;
|
||||
</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup />
|
||||
</Project>
|
||||
</Project>
|
||||
|
@ -17,54 +17,97 @@
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Release'">%(AdditionalLibraryDirectories);..\llvm_build\Release\lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>%(AdditionalDependencies);
|
||||
LLVMProfileData.lib;
|
||||
LLVMDebugInfoCodeView.lib;
|
||||
LLVMDebugInfoMSF.lib;
|
||||
LLVMInstrumentation.lib;
|
||||
LLVMMCJIT.lib;
|
||||
LLVMRuntimeDyld.lib;
|
||||
LLVMVectorize.lib;
|
||||
LLVMX86CodeGen.lib;
|
||||
LLVMGlobalISel.lib;
|
||||
LLVMX86Disassembler.lib;
|
||||
LLVMExecutionEngine.lib;
|
||||
LLVMAsmPrinter.lib;
|
||||
LLVMSelectionDAG.lib;
|
||||
LLVMCodeGen.lib;
|
||||
LLVMScalarOpts.lib;
|
||||
LLVMInstCombine.lib;
|
||||
LLVMTransformUtils.lib;
|
||||
LLVMAnalysis.lib;
|
||||
LLVMTarget.lib;
|
||||
LLVMX86Desc.lib;
|
||||
LLVMObject.lib;
|
||||
LLVMMCParser.lib;
|
||||
LLVMBitReader.lib;
|
||||
LLVMCore.lib;
|
||||
LLVMMC.lib;
|
||||
LLVMX86Info.lib;
|
||||
LLVMSupport.lib;
|
||||
LLVMMCDisassembler.lib;
|
||||
LLVMipo.lib;
|
||||
LLVMBinaryFormat.lib;
|
||||
LLVMPasses.lib;
|
||||
LLVMIRReader.lib;
|
||||
LLVMLinker.lib;
|
||||
LLVMAsmParser.lib;
|
||||
LLVMX86AsmParser.lib;
|
||||
LLVMDemangle.lib;
|
||||
LLVMDebugInfoDWARF.lib;
|
||||
LLVMRemarks.lib;
|
||||
LLVMBitstreamReader.lib;
|
||||
LLVMTextAPI.lib;
|
||||
LLVMCFGuard.lib;
|
||||
LLVMAggressiveInstCombine.lib;
|
||||
LLVMBitWriter.lib;
|
||||
LLVMCoroutines.lib;
|
||||
LLVMObjCARCOpts.lib;
|
||||
LLVMIntelJITEvents.lib;
|
||||
LLVMAggressiveInstCombine.lib;
|
||||
LLVMAnalysis.lib;
|
||||
LLVMAsmParser.lib;
|
||||
LLVMAsmPrinter.lib;
|
||||
LLVMBinaryFormat.lib;
|
||||
LLVMBitReader.lib;
|
||||
LLVMBitstreamReader.lib;
|
||||
LLVMBitWriter.lib;
|
||||
LLVMCFGuard.lib;
|
||||
LLVMCFIVerify.lib;
|
||||
LLVMCodeGen.lib;
|
||||
LLVMCore.lib;
|
||||
LLVMCoroutines.lib;
|
||||
LLVMCoverage.lib;
|
||||
LLVMDebugInfoCodeView.lib;
|
||||
LLVMDebuginfod.lib;
|
||||
LLVMDebugInfoDWARF.lib;
|
||||
LLVMDebugInfoGSYM.lib;
|
||||
LLVMDebugInfoLogicalView.lib;
|
||||
LLVMDebugInfoMSF.lib;
|
||||
LLVMDebugInfoPDB.lib;
|
||||
LLVMDemangle.lib;
|
||||
LLVMDiff.lib;
|
||||
LLVMDlltoolDriver.lib;
|
||||
LLVMDWARFLinker.lib;
|
||||
LLVMDWARFLinkerParallel.lib;
|
||||
LLVMDWP.lib;
|
||||
LLVMExecutionEngine.lib;
|
||||
LLVMExegesis.lib;
|
||||
LLVMExegesisX86.lib;
|
||||
LLVMExtensions.lib;
|
||||
LLVMFileCheck.lib;
|
||||
LLVMFrontendHLSL.lib;
|
||||
LLVMFrontendOpenACC.lib;
|
||||
LLVMFrontendOpenMP.lib;
|
||||
LLVMFuzzerCLI.lib;
|
||||
LLVMFuzzMutate.lib;
|
||||
LLVMGlobalISel.lib;
|
||||
LLVMInstCombine.lib;
|
||||
LLVMInstrumentation.lib;
|
||||
LLVMIntelJITEvents.lib;
|
||||
LLVMInterfaceStub.lib;
|
||||
LLVMInterpreter.lib;
|
||||
LLVMipo.lib;
|
||||
LLVMIRPrinter.lib;
|
||||
LLVMIRReader.lib;
|
||||
LLVMJITLink.lib;
|
||||
LLVMLibDriver.lib;
|
||||
LLVMLineEditor.lib;
|
||||
LLVMLinker.lib;
|
||||
LLVMLTO.lib;
|
||||
LLVMMCA.lib;
|
||||
LLVMMCDisassembler.lib;
|
||||
LLVMMCJIT.lib;
|
||||
LLVMMC.lib;
|
||||
LLVMMCParser.lib;
|
||||
LLVMMIRParser.lib;
|
||||
LLVMObjCARCOpts.lib;
|
||||
LLVMObjCopy.lib;
|
||||
LLVMObject.lib;
|
||||
LLVMObjectYAML.lib;
|
||||
LLVMOption.lib;
|
||||
LLVMOrcJIT.lib;
|
||||
LLVMOrcShared.lib;
|
||||
LLVMOrcTargetProcess.lib;
|
||||
LLVMPasses.lib;
|
||||
LLVMProfileData.lib;
|
||||
LLVMRemarks.lib;
|
||||
LLVMRuntimeDyld.lib;
|
||||
LLVMScalarOpts.lib;
|
||||
LLVMSelectionDAG.lib;
|
||||
LLVMSupport.lib;
|
||||
LLVMSymbolize.lib;
|
||||
LLVMTableGenGlobalISel.lib;
|
||||
LLVMTableGen.lib;
|
||||
LLVMTarget.lib;
|
||||
LLVMTargetParser.lib;
|
||||
LLVMTextAPI.lib;
|
||||
LLVMTransformUtils.lib;
|
||||
LLVMVectorize.lib;
|
||||
LLVMWindowsDriver.lib;
|
||||
LLVMWindowsManifest.lib;
|
||||
LLVMX86AsmParser.lib;
|
||||
LLVMX86CodeGen.lib;
|
||||
LLVMX86Desc.lib;
|
||||
LLVMX86Disassembler.lib;
|
||||
LLVMX86Info.lib;
|
||||
LLVMX86TargetMCA.lib;
|
||||
LLVMXRay.lib;
|
||||
</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup />
|
||||
</Project>
|
||||
</Project>
|
||||
|
2
llvm
2
llvm
@ -1 +1 @@
|
||||
Subproject commit 9b52b6c39ae9f0759fbce7dd0db4b3290d6ebc56
|
||||
Subproject commit 89d5468e9505ddb04754eadbfed526f5b6ad4cbd
|
@ -75,6 +75,14 @@ cpu_translator::cpu_translator(llvm::Module* _module, bool is_be)
|
||||
return result;
|
||||
}
|
||||
});
|
||||
|
||||
register_intrinsic("any_select_by_bit4", [&](llvm::CallInst* ci) -> llvm::Value*
|
||||
{
|
||||
const auto s = bitcast<s8[16]>(m_ir->CreateShl(bitcast<u64[2]>(ci->getOperand(0)), 3));;
|
||||
const auto a = bitcast<u8[16]>(ci->getOperand(1));
|
||||
const auto b = bitcast<u8[16]>(ci->getOperand(2));
|
||||
return m_ir->CreateSelect(m_ir->CreateICmpSLT(s, llvm::ConstantAggregateZero::get(get_type<s8[16]>())), b, a);
|
||||
});
|
||||
}
|
||||
|
||||
void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
|
||||
@ -112,6 +120,8 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||
cpu == "broadwell" ||
|
||||
cpu == "skylake" ||
|
||||
cpu == "alderlake" ||
|
||||
cpu == "raptorlake" ||
|
||||
cpu == "meteorlake" ||
|
||||
cpu == "bdver2" ||
|
||||
cpu == "bdver3" ||
|
||||
cpu == "bdver4" ||
|
||||
@ -135,7 +145,9 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||
// Test VNNI feature (TODO)
|
||||
if (cpu == "cascadelake" ||
|
||||
cpu == "cooperlake" ||
|
||||
cpu == "alderlake")
|
||||
cpu == "alderlake" ||
|
||||
cpu == "raptorlake" ||
|
||||
cpu == "meteorlake")
|
||||
{
|
||||
m_use_vnni = true;
|
||||
}
|
||||
|
@ -19,7 +19,9 @@
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/IntrinsicsX86.h"
|
||||
#include "llvm/IR/IntrinsicsAArch64.h"
|
||||
|
||||
@ -59,6 +61,62 @@ concept DSLValue = requires (T& v)
|
||||
{ v.eval(std::declval<llvm::IRBuilder<>*>()) } -> LLVMValue;
|
||||
};
|
||||
|
||||
template <usz N>
|
||||
struct get_int_bits
|
||||
{
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<1>
|
||||
{
|
||||
using utype = bool;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<2>
|
||||
{
|
||||
using utype = i2;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<4>
|
||||
{
|
||||
using utype = i4;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<8>
|
||||
{
|
||||
using utype = u8;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<16>
|
||||
{
|
||||
using utype = u16;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<32>
|
||||
{
|
||||
using utype = u32;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<64>
|
||||
{
|
||||
using utype = u64;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct get_int_bits<128>
|
||||
{
|
||||
using utype = u128;
|
||||
};
|
||||
|
||||
template <usz Bits>
|
||||
using get_int_vt = typename get_int_bits<Bits>::utype;
|
||||
|
||||
template <typename T = void>
|
||||
struct llvm_value_t
|
||||
{
|
||||
@ -3292,10 +3350,41 @@ public:
|
||||
|
||||
// Infinite-precision shift left
|
||||
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
||||
static auto inf_shl(T&& a, U&& b)
|
||||
auto inf_shl(T&& a, U&& b)
|
||||
{
|
||||
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
||||
|
||||
if constexpr (esz == 32)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psllv.d", {std::forward<T>(a), std::forward<U>(b)}});
|
||||
#endif
|
||||
}
|
||||
|
||||
if constexpr (esz == 16)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psllv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
|
||||
|
||||
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
{
|
||||
using t32 = value_t<u32[4]>;
|
||||
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
|
||||
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
|
||||
auto sizeL = eval(b32 & 0xffff);
|
||||
auto sizeH = eval(b32 >> 16);
|
||||
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {a32, sizeL}});
|
||||
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {eval(a32 & 0xffff0000), sizeH}});
|
||||
return eval(bitcast<CT>((dataL & 0xffff) | dataH));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return eval(select(b < esz, a << b, splat<CT>(0)));
|
||||
|
||||
/*
|
||||
return expr(select(b < esz, a << b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
||||
{
|
||||
static const auto M = match<CT>();
|
||||
@ -3314,14 +3403,46 @@ public:
|
||||
value = nullptr;
|
||||
return {};
|
||||
});
|
||||
*/
|
||||
}
|
||||
|
||||
// Infinite-precision logical shift right (unsigned)
|
||||
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
||||
static auto inf_lshr(T&& a, U&& b)
|
||||
auto inf_lshr(T&& a, U&& b)
|
||||
{
|
||||
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
||||
|
||||
if constexpr (esz == 32)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrlv.d", {std::forward<T>(a), std::forward<U>(b)}});
|
||||
#endif
|
||||
}
|
||||
|
||||
if constexpr (esz == 16)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrlv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
|
||||
|
||||
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
{
|
||||
using t32 = value_t<u32[4]>;
|
||||
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
|
||||
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
|
||||
auto sizeL = eval(b32 & 0xffff);
|
||||
auto sizeH = eval(b32 >> 16);
|
||||
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {eval(a32 & 0xffff), sizeL}});
|
||||
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {a32, sizeH}});
|
||||
return eval(bitcast<CT>(dataL | (dataH & 0xffff0000)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return eval(select(b < esz, a >> b, splat<CT>(0)));
|
||||
|
||||
/*
|
||||
return expr(select(b < esz, a >> b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
||||
{
|
||||
static const auto M = match<CT>();
|
||||
@ -3340,14 +3461,46 @@ public:
|
||||
value = nullptr;
|
||||
return {};
|
||||
});
|
||||
*/
|
||||
}
|
||||
|
||||
// Infinite-precision arithmetic shift right (signed)
|
||||
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
||||
static auto inf_ashr(T&& a, U&& b)
|
||||
auto inf_ashr(T&& a, U&& b)
|
||||
{
|
||||
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
||||
|
||||
if constexpr (esz == 32)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrav.d", {std::forward<T>(a), std::forward<U>(b)}});
|
||||
#endif
|
||||
}
|
||||
|
||||
if constexpr (esz == 16)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrav.w.128", {std::forward<T>(a), std::forward<U>(b)}});
|
||||
|
||||
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||
{
|
||||
using t32 = value_t<u32[4]>;
|
||||
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
|
||||
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
|
||||
auto sizeL = eval(b32 & 0xffff);
|
||||
auto sizeH = eval(b32 >> 16);
|
||||
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {eval(a32 << 16), sizeL}});
|
||||
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {a32, sizeH}});
|
||||
return eval(bitcast<CT>((dataL >> 16) | (dataH & 0xffff0000)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return eval(a >> select(b > (esz - 1), splat<CT>(esz - 1), b));
|
||||
|
||||
/*
|
||||
return expr(a >> select(b > (esz - 1), splat<CT>(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
||||
{
|
||||
static const auto M = match<CT>();
|
||||
@ -3366,6 +3519,7 @@ public:
|
||||
value = nullptr;
|
||||
return {};
|
||||
});
|
||||
*/
|
||||
}
|
||||
|
||||
template <typename... Types>
|
||||
@ -3567,6 +3721,18 @@ public:
|
||||
template <typename T = v128>
|
||||
llvm::Constant* make_const_vector(T, llvm::Type*, u32 = __builtin_LINE());
|
||||
|
||||
template <typename T>
|
||||
llvm::KnownBits get_known_bits(T a)
|
||||
{
|
||||
return llvm::computeKnownBits(a.eval(m_ir), m_module->getDataLayout());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
llvm::KnownBits kbc(T value)
|
||||
{
|
||||
return llvm::KnownBits::makeConstant(llvm::APInt(sizeof(T) * 8, u64(value)));
|
||||
}
|
||||
|
||||
private:
|
||||
// Custom intrinsic table
|
||||
std::unordered_map<std::string_view, std::function<llvm::Value*(llvm::CallInst*)>> m_intrinsics;
|
||||
@ -3647,6 +3813,13 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
// (m << 3) >= 0 ? a : b
|
||||
template <typename T, typename U, typename V>
|
||||
static auto select_by_bit4(T&& m, U&& a, V&& b)
|
||||
{
|
||||
return llvm_calli<u8[16], T, U, V>{"any_select_by_bit4", {std::forward<T>(m), std::forward<U>(a), std::forward<V>(b)}};
|
||||
}
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T>, f32[4]>>>
|
||||
static auto fre(T&& a)
|
||||
{
|
||||
|
@ -2130,10 +2130,10 @@ static void ppu_check(ppu_thread& ppu, u64 addr)
|
||||
{
|
||||
ppu.cia = ::narrow<u32>(addr);
|
||||
|
||||
// ppu_check() shall not return directly
|
||||
if (ppu.test_stopped())
|
||||
{
|
||||
return;
|
||||
}
|
||||
;
|
||||
ppu_escape(&ppu);
|
||||
}
|
||||
|
||||
static void ppu_trace(u64 addr)
|
||||
@ -3368,13 +3368,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
{
|
||||
std::unordered_map<std::string, u64> link_table
|
||||
{
|
||||
{ "sys_game_watchdog_start", reinterpret_cast<u64>(ppu_execute_syscall) },
|
||||
{ "sys_game_watchdog_stop", reinterpret_cast<u64>(ppu_execute_syscall) },
|
||||
{ "sys_game_watchdog_clear", reinterpret_cast<u64>(ppu_execute_syscall) },
|
||||
{ "sys_game_get_system_sw_version", reinterpret_cast<u64>(ppu_execute_syscall) },
|
||||
{ "sys_game_board_storage_read", reinterpret_cast<u64>(ppu_execute_syscall) },
|
||||
{ "sys_game_board_storage_write", reinterpret_cast<u64>(ppu_execute_syscall) },
|
||||
{ "sys_game_get_rtc_status", reinterpret_cast<u64>(ppu_execute_syscall) },
|
||||
{ "__trap", reinterpret_cast<u64>(&ppu_trap) },
|
||||
{ "__error", reinterpret_cast<u64>(&ppu_error) },
|
||||
{ "__check", reinterpret_cast<u64>(&ppu_check) },
|
||||
@ -3388,6 +3381,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||
{ "__dcbz", reinterpret_cast<u64>(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) },
|
||||
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
|
||||
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
|
||||
{ "__escape", reinterpret_cast<u64>(+ppu_escape) },
|
||||
};
|
||||
|
||||
for (u64 index = 0; index < 1024; index++)
|
||||
@ -3943,12 +3937,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||
std::unique_ptr<Module> _module = std::make_unique<Module>(obj_name, jit.get_context());
|
||||
|
||||
// Initialize target
|
||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
||||
// Force target linux on macOS arm64 to bypass some 64-bit address space linking issues
|
||||
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
|
||||
#else
|
||||
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
|
||||
#endif
|
||||
_module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());
|
||||
|
||||
// Initialize translator
|
||||
@ -3978,6 +3967,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||
}
|
||||
|
||||
{
|
||||
if (g_cfg.core.ppu_debug)
|
||||
{
|
||||
translator.build_interpreter();
|
||||
}
|
||||
|
||||
legacy::FunctionPassManager pm(_module.get());
|
||||
|
||||
// Basic optimizations
|
||||
|
@ -200,7 +200,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
|
||||
// Create tail call to the check function
|
||||
m_ir->SetInsertPoint(vcheck);
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr());
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
else
|
||||
@ -604,12 +604,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
|
||||
{
|
||||
// Read, byteswap, bitcast
|
||||
const auto int_type = m_ir->getIntNTy(size);
|
||||
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align}, true);
|
||||
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align});
|
||||
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
|
||||
}
|
||||
|
||||
// Read normally
|
||||
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align}, true);
|
||||
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align});
|
||||
}
|
||||
|
||||
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
|
||||
@ -625,7 +625,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
|
||||
}
|
||||
|
||||
// Write
|
||||
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align}, true);
|
||||
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align});
|
||||
}
|
||||
|
||||
void PPUTranslator::CompilationError(const std::string& error)
|
||||
@ -1945,12 +1945,14 @@ void PPUTranslator::SC(ppu_opcode_t op)
|
||||
if (index < 1024)
|
||||
{
|
||||
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
@ -2507,6 +2509,7 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
|
||||
RegStore(Trunc(GetAddr()), m_cia);
|
||||
FlushRegisters();
|
||||
Call(GetType<void>(), "__resinterp", m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
return;
|
||||
}
|
||||
@ -2649,6 +2652,7 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
|
||||
RegStore(Trunc(GetAddr()), m_cia);
|
||||
FlushRegisters();
|
||||
Call(GetType<void>(), "__resinterp", m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
return;
|
||||
}
|
||||
@ -2786,11 +2790,7 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op)
|
||||
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
|
||||
const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
|
||||
const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>());
|
||||
#if LLVM_VERSION_MAJOR < 15
|
||||
Call(GetType<void>(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
|
||||
#else
|
||||
Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3313,7 +3313,7 @@ void PPUTranslator::STVLX(ppu_opcode_t op)
|
||||
const auto mask = bitcast<bool[16]>(splat<u16>(0xffff) << trunc<u16>(value<u64>(addr) & 0xf));
|
||||
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
|
||||
const auto align = splat<u32>(16);
|
||||
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}});
|
||||
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
|
||||
}
|
||||
|
||||
void PPUTranslator::STDBRX(ppu_opcode_t op)
|
||||
@ -3343,7 +3343,7 @@ void PPUTranslator::STVRX(ppu_opcode_t op)
|
||||
const auto mask = bitcast<bool[16]>(trunc<u16>(splat<u64>(0xffff) << (value<u64>(addr) & 0xf) >> 16));
|
||||
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
|
||||
const auto align = splat<u32>(16);
|
||||
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}});
|
||||
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
|
||||
}
|
||||
|
||||
void PPUTranslator::STFSUX(ppu_opcode_t op)
|
||||
@ -3524,7 +3524,7 @@ void PPUTranslator::DCBZ(ppu_opcode_t op)
|
||||
}
|
||||
else
|
||||
{
|
||||
Call(GetType<void>(), "llvm.memset.p0i8.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getTrue());
|
||||
Call(GetType<void>(), "llvm.memset.p0.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getFalse());
|
||||
}
|
||||
}
|
||||
|
||||
@ -4601,6 +4601,7 @@ void PPUTranslator::UNK(ppu_opcode_t op)
|
||||
{
|
||||
FlushRegisters();
|
||||
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
@ -4862,6 +4863,7 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
|
||||
void PPUTranslator::Trap()
|
||||
{
|
||||
Call(GetType<void>(), "__trap", m_thread, GetAddr());
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
@ -4909,4 +4911,184 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void PPUTranslator::build_interpreter()
|
||||
{
|
||||
#define BUILD_VEC_INST(i) { \
|
||||
m_function = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("op_" #i, get_type<void>(), m_thread_type->getPointerTo()).getCallee()); \
|
||||
std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \
|
||||
std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \
|
||||
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \
|
||||
m_ir = &irb; \
|
||||
m_thread = m_function->getArg(0); \
|
||||
ppu_opcode_t op{}; \
|
||||
op.vd = 0; \
|
||||
op.va = 1; \
|
||||
op.vb = 2; \
|
||||
op.vc = 3; \
|
||||
this->i(op); \
|
||||
FlushRegisters(); \
|
||||
m_ir->CreateRetVoid(); \
|
||||
replace_intrinsics(*m_function); \
|
||||
}
|
||||
|
||||
BUILD_VEC_INST(VADDCUW);
|
||||
BUILD_VEC_INST(VADDFP);
|
||||
BUILD_VEC_INST(VADDSBS);
|
||||
BUILD_VEC_INST(VADDSHS);
|
||||
BUILD_VEC_INST(VADDSWS);
|
||||
BUILD_VEC_INST(VADDUBM);
|
||||
BUILD_VEC_INST(VADDUBS);
|
||||
BUILD_VEC_INST(VADDUHM);
|
||||
BUILD_VEC_INST(VADDUHS);
|
||||
BUILD_VEC_INST(VADDUWM);
|
||||
BUILD_VEC_INST(VADDUWS);
|
||||
BUILD_VEC_INST(VAND);
|
||||
BUILD_VEC_INST(VANDC);
|
||||
BUILD_VEC_INST(VAVGSB);
|
||||
BUILD_VEC_INST(VAVGSH);
|
||||
BUILD_VEC_INST(VAVGSW);
|
||||
BUILD_VEC_INST(VAVGUB);
|
||||
BUILD_VEC_INST(VAVGUH);
|
||||
BUILD_VEC_INST(VAVGUW);
|
||||
BUILD_VEC_INST(VCFSX);
|
||||
BUILD_VEC_INST(VCFUX);
|
||||
BUILD_VEC_INST(VCMPBFP);
|
||||
BUILD_VEC_INST(VCMPBFP_);
|
||||
BUILD_VEC_INST(VCMPEQFP);
|
||||
BUILD_VEC_INST(VCMPEQFP_);
|
||||
BUILD_VEC_INST(VCMPEQUB);
|
||||
BUILD_VEC_INST(VCMPEQUB_);
|
||||
BUILD_VEC_INST(VCMPEQUH);
|
||||
BUILD_VEC_INST(VCMPEQUH_);
|
||||
BUILD_VEC_INST(VCMPEQUW);
|
||||
BUILD_VEC_INST(VCMPEQUW_);
|
||||
BUILD_VEC_INST(VCMPGEFP);
|
||||
BUILD_VEC_INST(VCMPGEFP_);
|
||||
BUILD_VEC_INST(VCMPGTFP);
|
||||
BUILD_VEC_INST(VCMPGTFP_);
|
||||
BUILD_VEC_INST(VCMPGTSB);
|
||||
BUILD_VEC_INST(VCMPGTSB_);
|
||||
BUILD_VEC_INST(VCMPGTSH);
|
||||
BUILD_VEC_INST(VCMPGTSH_);
|
||||
BUILD_VEC_INST(VCMPGTSW);
|
||||
BUILD_VEC_INST(VCMPGTSW_);
|
||||
BUILD_VEC_INST(VCMPGTUB);
|
||||
BUILD_VEC_INST(VCMPGTUB_);
|
||||
BUILD_VEC_INST(VCMPGTUH);
|
||||
BUILD_VEC_INST(VCMPGTUH_);
|
||||
BUILD_VEC_INST(VCMPGTUW);
|
||||
BUILD_VEC_INST(VCMPGTUW_);
|
||||
BUILD_VEC_INST(VCTSXS);
|
||||
BUILD_VEC_INST(VCTUXS);
|
||||
BUILD_VEC_INST(VEXPTEFP);
|
||||
BUILD_VEC_INST(VLOGEFP);
|
||||
BUILD_VEC_INST(VMADDFP);
|
||||
BUILD_VEC_INST(VMAXFP);
|
||||
BUILD_VEC_INST(VMAXSB);
|
||||
BUILD_VEC_INST(VMAXSH);
|
||||
BUILD_VEC_INST(VMAXSW);
|
||||
BUILD_VEC_INST(VMAXUB);
|
||||
BUILD_VEC_INST(VMAXUH);
|
||||
BUILD_VEC_INST(VMAXUW);
|
||||
BUILD_VEC_INST(VMHADDSHS);
|
||||
BUILD_VEC_INST(VMHRADDSHS);
|
||||
BUILD_VEC_INST(VMINFP);
|
||||
BUILD_VEC_INST(VMINSB);
|
||||
BUILD_VEC_INST(VMINSH);
|
||||
BUILD_VEC_INST(VMINSW);
|
||||
BUILD_VEC_INST(VMINUB);
|
||||
BUILD_VEC_INST(VMINUH);
|
||||
BUILD_VEC_INST(VMINUW);
|
||||
BUILD_VEC_INST(VMLADDUHM);
|
||||
BUILD_VEC_INST(VMRGHB);
|
||||
BUILD_VEC_INST(VMRGHH);
|
||||
BUILD_VEC_INST(VMRGHW);
|
||||
BUILD_VEC_INST(VMRGLB);
|
||||
BUILD_VEC_INST(VMRGLH);
|
||||
BUILD_VEC_INST(VMRGLW);
|
||||
BUILD_VEC_INST(VMSUMMBM);
|
||||
BUILD_VEC_INST(VMSUMSHM);
|
||||
BUILD_VEC_INST(VMSUMSHS);
|
||||
BUILD_VEC_INST(VMSUMUBM);
|
||||
BUILD_VEC_INST(VMSUMUHM);
|
||||
BUILD_VEC_INST(VMSUMUHS);
|
||||
BUILD_VEC_INST(VMULESB);
|
||||
BUILD_VEC_INST(VMULESH);
|
||||
BUILD_VEC_INST(VMULEUB);
|
||||
BUILD_VEC_INST(VMULEUH);
|
||||
BUILD_VEC_INST(VMULOSB);
|
||||
BUILD_VEC_INST(VMULOSH);
|
||||
BUILD_VEC_INST(VMULOUB);
|
||||
BUILD_VEC_INST(VMULOUH);
|
||||
BUILD_VEC_INST(VNMSUBFP);
|
||||
BUILD_VEC_INST(VNOR);
|
||||
BUILD_VEC_INST(VOR);
|
||||
BUILD_VEC_INST(VPERM);
|
||||
BUILD_VEC_INST(VPKPX);
|
||||
BUILD_VEC_INST(VPKSHSS);
|
||||
BUILD_VEC_INST(VPKSHUS);
|
||||
BUILD_VEC_INST(VPKSWSS);
|
||||
BUILD_VEC_INST(VPKSWUS);
|
||||
BUILD_VEC_INST(VPKUHUM);
|
||||
BUILD_VEC_INST(VPKUHUS);
|
||||
BUILD_VEC_INST(VPKUWUM);
|
||||
BUILD_VEC_INST(VPKUWUS);
|
||||
BUILD_VEC_INST(VREFP);
|
||||
BUILD_VEC_INST(VRFIM);
|
||||
BUILD_VEC_INST(VRFIN);
|
||||
BUILD_VEC_INST(VRFIP);
|
||||
BUILD_VEC_INST(VRFIZ);
|
||||
BUILD_VEC_INST(VRLB);
|
||||
BUILD_VEC_INST(VRLH);
|
||||
BUILD_VEC_INST(VRLW);
|
||||
BUILD_VEC_INST(VRSQRTEFP);
|
||||
BUILD_VEC_INST(VSEL);
|
||||
BUILD_VEC_INST(VSL);
|
||||
BUILD_VEC_INST(VSLB);
|
||||
BUILD_VEC_INST(VSLDOI);
|
||||
BUILD_VEC_INST(VSLH);
|
||||
BUILD_VEC_INST(VSLO);
|
||||
BUILD_VEC_INST(VSLW);
|
||||
BUILD_VEC_INST(VSPLTB);
|
||||
BUILD_VEC_INST(VSPLTH);
|
||||
BUILD_VEC_INST(VSPLTISB);
|
||||
BUILD_VEC_INST(VSPLTISH);
|
||||
BUILD_VEC_INST(VSPLTISW);
|
||||
BUILD_VEC_INST(VSPLTW);
|
||||
BUILD_VEC_INST(VSR);
|
||||
BUILD_VEC_INST(VSRAB);
|
||||
BUILD_VEC_INST(VSRAH);
|
||||
BUILD_VEC_INST(VSRAW);
|
||||
BUILD_VEC_INST(VSRB);
|
||||
BUILD_VEC_INST(VSRH);
|
||||
BUILD_VEC_INST(VSRO);
|
||||
BUILD_VEC_INST(VSRW);
|
||||
BUILD_VEC_INST(VSUBCUW);
|
||||
BUILD_VEC_INST(VSUBFP);
|
||||
BUILD_VEC_INST(VSUBSBS);
|
||||
BUILD_VEC_INST(VSUBSHS);
|
||||
BUILD_VEC_INST(VSUBSWS);
|
||||
BUILD_VEC_INST(VSUBUBM);
|
||||
BUILD_VEC_INST(VSUBUBS);
|
||||
BUILD_VEC_INST(VSUBUHM);
|
||||
BUILD_VEC_INST(VSUBUHS);
|
||||
BUILD_VEC_INST(VSUBUWM);
|
||||
BUILD_VEC_INST(VSUBUWS);
|
||||
BUILD_VEC_INST(VSUMSWS);
|
||||
BUILD_VEC_INST(VSUM2SWS);
|
||||
BUILD_VEC_INST(VSUM4SBS);
|
||||
BUILD_VEC_INST(VSUM4SHS);
|
||||
BUILD_VEC_INST(VSUM4UBS);
|
||||
BUILD_VEC_INST(VUPKHPX);
|
||||
BUILD_VEC_INST(VUPKHSB);
|
||||
BUILD_VEC_INST(VUPKHSH);
|
||||
BUILD_VEC_INST(VUPKLPX);
|
||||
BUILD_VEC_INST(VUPKLSB);
|
||||
BUILD_VEC_INST(VUPKLSH);
|
||||
BUILD_VEC_INST(VXOR);
|
||||
#undef BUILD_VEC_INST
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -856,6 +856,8 @@ public:
|
||||
void FCTID_(ppu_opcode_t op) { return FCTID(op); }
|
||||
void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); }
|
||||
void FCFID_(ppu_opcode_t op) { return FCFID(op); }
|
||||
|
||||
void build_interpreter();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -3914,6 +3914,7 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out)
|
||||
#if LLVM_VERSION_MAJOR < 17
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#endif
|
||||
#include "llvm/Support/Host.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/IR/InlineAsm.h"
|
||||
@ -5006,7 +5007,11 @@ public:
|
||||
|
||||
// Create LLVM module
|
||||
std::unique_ptr<Module> _module = std::make_unique<Module>(m_hash + ".obj", m_context);
|
||||
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
|
||||
#if defined(_WIN32) && defined(ARCH_X64)
|
||||
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||
#else
|
||||
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
|
||||
#endif
|
||||
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
||||
m_module = _module.get();
|
||||
|
||||
@ -5227,6 +5232,7 @@ public:
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
m_ir->SetInsertPoint(label_stop);
|
||||
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
m_ir->SetInsertPoint(label_diff);
|
||||
@ -5681,7 +5687,11 @@ public:
|
||||
|
||||
// Create LLVM module
|
||||
std::unique_ptr<Module> _module = std::make_unique<Module>("spu_interpreter.obj", m_context);
|
||||
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
|
||||
#if defined(_WIN32) && defined(ARCH_X64)
|
||||
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||
#else
|
||||
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
|
||||
#endif
|
||||
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
||||
m_module = _module.get();
|
||||
|
||||
@ -5982,7 +5992,8 @@ public:
|
||||
ncall->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->SetInsertPoint(_stop);
|
||||
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
|
||||
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc), true);
|
||||
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
}
|
||||
@ -6355,7 +6366,7 @@ public:
|
||||
|
||||
llvm::Value* get_rchcnt(u32 off, u64 inv = 0)
|
||||
{
|
||||
const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off), true);
|
||||
const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off));
|
||||
const auto shv = m_ir->CreateLShr(val, spu_channel::off_count);
|
||||
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
|
||||
}
|
||||
@ -6415,20 +6426,20 @@ public:
|
||||
}
|
||||
case MFC_Cmd:
|
||||
{
|
||||
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size), true);
|
||||
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size));
|
||||
res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
|
||||
break;
|
||||
}
|
||||
case SPU_RdInMbox:
|
||||
{
|
||||
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox), true);
|
||||
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox));
|
||||
res.value = m_ir->CreateLShr(res.value, 8);
|
||||
res.value = m_ir->CreateAnd(res.value, 7);
|
||||
break;
|
||||
}
|
||||
case SPU_RdEventStat:
|
||||
{
|
||||
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>());
|
||||
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events)), 32), get_type<u32>());
|
||||
res.value = call("spu_get_events", &exec_get_events, m_thread, mask);
|
||||
break;
|
||||
}
|
||||
@ -6815,7 +6826,7 @@ public:
|
||||
if (csize > 0 && csize <= 16)
|
||||
{
|
||||
// Generate single copy operation
|
||||
m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo()), true), m_ir->CreateBitCast(dst, vtype->getPointerTo()), true);
|
||||
m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo())), m_ir->CreateBitCast(dst, vtype->getPointerTo()));
|
||||
}
|
||||
else if (csize <= stride * 16 && !(csize % 32))
|
||||
{
|
||||
@ -6826,7 +6837,7 @@ public:
|
||||
const auto _dst = m_ir->CreateGEP(get_type<u8>(), dst, m_ir->getInt32(i));
|
||||
if (csize - i < stride)
|
||||
{
|
||||
m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>()), true), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()), true);
|
||||
m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>())), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -7086,18 +7097,51 @@ public:
|
||||
void ROTM(spu_opcode_t op)
|
||||
{
|
||||
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
||||
set_vr(op.rt, inf_lshr(a, -b & 63));
|
||||
|
||||
auto minusb = eval(-b);
|
||||
if (auto [ok, x] = match_expr(b, -match<u32[4]>()); ok)
|
||||
{
|
||||
minusb = eval(x);
|
||||
}
|
||||
|
||||
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
|
||||
{
|
||||
set_vr(op.rt, a >> (minusb & 31));
|
||||
return;
|
||||
}
|
||||
|
||||
set_vr(op.rt, inf_lshr(a, minusb & 63));
|
||||
}
|
||||
|
||||
void ROTMA(spu_opcode_t op)
|
||||
{
|
||||
const auto [a, b] = get_vrs<s32[4]>(op.ra, op.rb);
|
||||
set_vr(op.rt, inf_ashr(a, -b & 63));
|
||||
|
||||
auto minusb = eval(-b);
|
||||
if (auto [ok, x] = match_expr(b, -match<s32[4]>()); ok)
|
||||
{
|
||||
minusb = eval(x);
|
||||
}
|
||||
|
||||
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
|
||||
{
|
||||
set_vr(op.rt, a >> (minusb & 31));
|
||||
return;
|
||||
}
|
||||
|
||||
set_vr(op.rt, inf_ashr(a, minusb & 63));
|
||||
}
|
||||
|
||||
void SHL(spu_opcode_t op)
|
||||
{
|
||||
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
||||
|
||||
if (auto k = get_known_bits(b); (k & kbc<u32>(32)).isZero())
|
||||
{
|
||||
set_vr(op.rt, a << (b & 31));
|
||||
return;
|
||||
}
|
||||
|
||||
set_vr(op.rt, inf_shl(a, b & 63));
|
||||
}
|
||||
|
||||
@ -7110,18 +7154,51 @@ public:
|
||||
void ROTHM(spu_opcode_t op)
|
||||
{
|
||||
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
|
||||
set_vr(op.rt, inf_lshr(a, -b & 31));
|
||||
|
||||
auto minusb = eval(-b);
|
||||
if (auto [ok, x] = match_expr(b, -match<u16[8]>()); ok)
|
||||
{
|
||||
minusb = eval(x);
|
||||
}
|
||||
|
||||
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
|
||||
{
|
||||
set_vr(op.rt, a >> (minusb & 15));
|
||||
return;
|
||||
}
|
||||
|
||||
set_vr(op.rt, inf_lshr(a, minusb & 31));
|
||||
}
|
||||
|
||||
void ROTMAH(spu_opcode_t op)
|
||||
{
|
||||
const auto [a, b] = get_vrs<s16[8]>(op.ra, op.rb);
|
||||
set_vr(op.rt, inf_ashr(a, -b & 31));
|
||||
|
||||
auto minusb = eval(-b);
|
||||
if (auto [ok, x] = match_expr(b, -match<s16[8]>()); ok)
|
||||
{
|
||||
minusb = eval(x);
|
||||
}
|
||||
|
||||
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
|
||||
{
|
||||
set_vr(op.rt, a >> (minusb & 15));
|
||||
return;
|
||||
}
|
||||
|
||||
set_vr(op.rt, inf_ashr(a, minusb & 31));
|
||||
}
|
||||
|
||||
void SHLH(spu_opcode_t op)
|
||||
{
|
||||
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
|
||||
|
||||
if (auto k = get_known_bits(b); (k & kbc<u16>(16)).isZero())
|
||||
{
|
||||
set_vr(op.rt, a << (b & 15));
|
||||
return;
|
||||
}
|
||||
|
||||
set_vr(op.rt, inf_shl(a, b & 31));
|
||||
}
|
||||
|
||||
@ -8093,6 +8170,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (auto [ok, y] = match_expr(x, bitcast<bool[std::extent_v<VT>]>(match<get_int_vt<std::extent_v<VT>>>())); ok)
|
||||
{
|
||||
// Don't ruin FSMB/FSM/FSMH instructions
|
||||
return false;
|
||||
}
|
||||
|
||||
set_vr(op.rt4, select(x, get_vr<VT>(op.rb), get_vr<VT>(op.ra)));
|
||||
return true;
|
||||
}
|
||||
@ -8337,9 +8420,9 @@ public:
|
||||
const auto bx = pshufb(bs, c);
|
||||
|
||||
if (perm_only)
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx));
|
||||
set_vr(op.rt4, select_by_bit4(c, ax, bx));
|
||||
else
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx) | x);
|
||||
set_vr(op.rt4, select_by_bit4(c, ax, bx) | x);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -8352,9 +8435,9 @@ public:
|
||||
const auto ax = pshufb(as, c);
|
||||
|
||||
if (perm_only)
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b));
|
||||
set_vr(op.rt4, select_by_bit4(c, ax, b));
|
||||
else
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b) | x);
|
||||
set_vr(op.rt4, select_by_bit4(c, ax, b) | x);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -8371,9 +8454,9 @@ public:
|
||||
const auto bx = pshufb(bs, c);
|
||||
|
||||
if (perm_only)
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx));
|
||||
set_vr(op.rt4, select_by_bit4(c, a, bx));
|
||||
else
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx) | x);
|
||||
set_vr(op.rt4, select_by_bit4(c, a, bx) | x);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -8401,9 +8484,9 @@ public:
|
||||
const auto bx = pshufb(b, cr);
|
||||
|
||||
if (perm_only)
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx));
|
||||
set_vr(op.rt4, select_by_bit4(cr, ax, bx));
|
||||
else
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx) | x);
|
||||
set_vr(op.rt4, select_by_bit4(cr, ax, bx) | x);
|
||||
}
|
||||
|
||||
void MPYA(spu_opcode_t op)
|
||||
@ -9611,13 +9694,13 @@ public:
|
||||
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
|
||||
{
|
||||
const auto bswapped = byteswap(data);
|
||||
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
|
||||
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
|
||||
}
|
||||
|
||||
auto make_load_ls(value_t<u64> addr)
|
||||
{
|
||||
value_t<u8[16]> data;
|
||||
data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
|
||||
data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
|
||||
return byteswap(data);
|
||||
}
|
||||
|
||||
@ -9839,7 +9922,7 @@ public:
|
||||
target->addIncoming(e_addr, e_exec);
|
||||
m_ir->CreateCondBr(get_imm<bool>(op.d).value, d_exec, d_done, m_md_unlikely);
|
||||
m_ir->SetInsertPoint(d_exec);
|
||||
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true);
|
||||
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
|
||||
m_ir->CreateBr(d_done);
|
||||
m_ir->SetInsertPoint(d_done);
|
||||
m_ir->CreateBr(m_interp_bblock);
|
||||
@ -9890,7 +9973,7 @@ public:
|
||||
|
||||
if (op.d)
|
||||
{
|
||||
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true);
|
||||
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
|
||||
}
|
||||
|
||||
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
|
||||
@ -10211,7 +10294,7 @@ public:
|
||||
|
||||
// Exit function on unexpected target
|
||||
m_ir->SetInsertPoint(sw->getDefaultDest());
|
||||
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc), true);
|
||||
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
|
||||
|
||||
if (m_finfo && m_finfo->fn)
|
||||
{
|
||||
|
@ -136,7 +136,7 @@ namespace psf
|
||||
{
|
||||
std::string_view value{value_array, CharN};
|
||||
value = value.substr(0, std::min<usz>(value.find_first_of('\0'), value.size()));
|
||||
return string(CharN, value, allow_truncate);
|
||||
return string(max_size, value, allow_truncate);
|
||||
}
|
||||
|
||||
// Make array entry
|
||||
|
@ -71,16 +71,4 @@ namespace utils
|
||||
u64 _get_main_tid();
|
||||
|
||||
inline const u64 main_tid = _get_main_tid();
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
const std::string c_llvm_default_triple = "x86_64-unknown-linux-gnu";
|
||||
#elif defined(ARCH_ARM64)
|
||||
const std::string c_llvm_default_triple = "arm64-unknown-linux-gnu";
|
||||
#else
|
||||
const std::string c_llvm_default_triple = "Unimplemented!"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ namespace std
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__INTELLISENSE__)
|
||||
#if defined(__INTELLISENSE__) || (defined (__clang__) && (__clang_major__ <= 16))
|
||||
#define consteval constexpr
|
||||
#define constinit
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user