mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-21 18:22:33 +01:00
Update ASMJIT (new upstream API)
This commit is contained in:
parent
900d7df40f
commit
cb2748ae08
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -5,7 +5,7 @@
|
||||
[submodule "asmjit"]
|
||||
path = 3rdparty/asmjit/asmjit
|
||||
url = ../../asmjit/asmjit.git
|
||||
branch = oldstable
|
||||
branch = aarch64
|
||||
ignore = dirty
|
||||
[submodule "llvm"]
|
||||
path = llvm
|
||||
|
2
3rdparty/asmjit/asmjit
vendored
2
3rdparty/asmjit/asmjit
vendored
@ -1 +1 @@
|
||||
Subproject commit 723f58581afc0f4cb16ba13396ff77e425896847
|
||||
Subproject commit eae7197fce03fd52a6e71ca89207a88ce270fb1a
|
184
3rdparty/asmjit/asmjit.vcxproj
vendored
184
3rdparty/asmjit/asmjit.vcxproj
vendored
@ -11,66 +11,146 @@
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="asmjit\src\asmjit\base\arch.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codebuilder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codecompiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codeemitter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codeholder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\constpool.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\cpuinfo.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\func.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\globals.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\inst.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\logging.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\osutils.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\regalloc.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\runtime.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\string.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\vmem.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\zone.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\archtraits.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\builder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\codeholder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\codewriter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\compiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\constpool.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\cpuinfo.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\emithelper.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\emitter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\emitterutils.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\environment.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\errorhandler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\formatter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\funcargscontext.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\func.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\globals.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\inst.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\jitallocator.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\jitruntime.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\logger.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\osutils.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\ralocal.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\rapass.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\rastack.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\string.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\support.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\target.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\type.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\virtmem.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zone.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonehash.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonelist.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonestack.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonetree.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonevector.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86builder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86compiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86inst.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86instimpl.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86internal.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86logging.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86emithelper.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86formatter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86func.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86instapi.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86instdb.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86operand_regs.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86regalloc.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86rapass.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64builder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64compiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64emithelper.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64instapi.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64instdb.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64rapass.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\armformatter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\armfunc.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="asmjit\src\asmjit\base\arch.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codebuilder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codecompiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codeemitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codeholder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\constpool.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\context_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\cpuinfo.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\func.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\logging.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\inst.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\lock.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\osutils.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\regalloc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\runtime.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\string.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\vectypes.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\vmem.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\zone.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\api-build_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\api-config.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\archcommons.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\archtraits.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\builder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\codebuffer.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\codeholder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\codewriter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\compilerdefs.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\compiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\constpool.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\cpuinfo.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\emithelper_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\emitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\emitterutils_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\environment.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\errorhandler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\formatter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\formatter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\funcargscontext_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\func.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\inst.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\jitallocator.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\jitruntime.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\logger.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\misc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\osutils.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\osutils_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\raassignment_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\rabuilders_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\radefs_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\ralocal_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\rapass_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\rastack_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\string.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\support.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\target.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\type.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\virtmem.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zone.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonehash.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonelist.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonestack.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonestring.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonetree.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonevector.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86archtraits_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86builder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86compiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86inst.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instimpl_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86internal_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86logging_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86emithelper_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86emitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86formatter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86func_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instapi_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86opcode_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86regalloc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86rapass_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64builder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64compiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64emithelper_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64emitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64instapi_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64rapass_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64utils.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armarchtraits_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armformatter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armfunc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armglobals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armoperand.h" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{AC40FF01-426E-4838-A317-66354CEFAE88}</ProjectGuid>
|
||||
|
184
3rdparty/asmjit/asmjit.vcxproj.filters
vendored
184
3rdparty/asmjit/asmjit.vcxproj.filters
vendored
@ -1,65 +1,145 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<ClCompile Include="asmjit\src\asmjit\base\arch.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codebuilder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codecompiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codeemitter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\codeholder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\constpool.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\cpuinfo.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\func.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\globals.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\inst.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\logging.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\osutils.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\regalloc.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\runtime.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\string.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\vmem.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\base\zone.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\archtraits.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\builder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\codeholder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\codewriter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\compiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\constpool.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\cpuinfo.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\emithelper.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\emitter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\emitterutils.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\environment.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\errorhandler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\formatter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\funcargscontext.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\func.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\globals.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\inst.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\jitallocator.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\jitruntime.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\logger.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\osutils.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\ralocal.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\rapass.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\rastack.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\string.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\support.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\target.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\type.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\virtmem.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zone.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonehash.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonelist.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonestack.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonetree.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\core\zonevector.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86builder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86compiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86inst.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86instimpl.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86internal.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86logging.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86emithelper.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86formatter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86func.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86instapi.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86instdb.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86operand_regs.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86regalloc.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\x86\x86rapass.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64assembler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64builder.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64compiler.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64emithelper.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64instapi.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64instdb.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64operand.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\a64rapass.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\armformatter.cpp" />
|
||||
<ClCompile Include="asmjit\src\asmjit\arm\armfunc.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="asmjit\src\asmjit\base\arch.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codebuilder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codecompiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codeemitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\codeholder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\constpool.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\context_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\cpuinfo.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\func.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\logging.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\inst.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\lock.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\osutils.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\regalloc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\runtime.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\string.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\vectypes.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\vmem.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\base\zone.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\api-build_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\api-config.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\archcommons.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\archtraits.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\builder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\codebuffer.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\codeholder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\codewriter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\compilerdefs.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\compiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\constpool.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\cpuinfo.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\emithelper_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\emitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\emitterutils_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\environment.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\errorhandler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\formatter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\formatter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\funcargscontext_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\func.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\inst.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\jitallocator.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\jitruntime.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\logger.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\misc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\osutils.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\osutils_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\raassignment_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\rabuilders_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\radefs_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\ralocal_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\rapass_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\rastack_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\string.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\support.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\target.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\type.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\virtmem.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zone.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonehash.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonelist.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonestack.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonestring.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonetree.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\core\zonevector.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86archtraits_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86builder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86compiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86inst.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instimpl_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86internal_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86logging_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86emithelper_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86emitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86formatter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86func_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instapi_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86opcode_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86regalloc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\x86\x86rapass_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64assembler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64builder.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64compiler.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64emithelper_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64emitter.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64globals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64instapi_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64operand.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64rapass_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\a64utils.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armarchtraits_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armformatter_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armfunc_p.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armglobals.h" />
|
||||
<ClInclude Include="asmjit\src\asmjit\arm\armoperand.h" />
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -113,8 +113,32 @@ static u8* add_jit_memory(usz size, uint align)
|
||||
return pointer + pos;
|
||||
}
|
||||
|
||||
const asmjit::Environment& jit_runtime_base::environment() const noexcept
|
||||
{
|
||||
static const asmjit::Environment g_env = asmjit::Environment::host();
|
||||
|
||||
return g_env;
|
||||
}
|
||||
|
||||
void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept
|
||||
{
|
||||
ensure(!code->flatten());
|
||||
ensure(!code->resolveUnresolvedLinks());
|
||||
usz codeSize = ensure(code->codeSize());
|
||||
auto p = ensure(this->_alloc(codeSize, 64));
|
||||
ensure(!code->relocateToBase(uptr(p)));
|
||||
|
||||
asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize);
|
||||
|
||||
for (asmjit::Section* section : code->_sections)
|
||||
{
|
||||
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
jit_runtime::jit_runtime()
|
||||
: HostRuntime()
|
||||
{
|
||||
}
|
||||
|
||||
@ -122,38 +146,9 @@ jit_runtime::~jit_runtime()
|
||||
{
|
||||
}
|
||||
|
||||
asmjit::Error jit_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
|
||||
uchar* jit_runtime::_alloc(usz size, usz align) noexcept
|
||||
{
|
||||
usz codeSize = code->getCodeSize();
|
||||
if (!codeSize) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorNoCodeGenerated;
|
||||
}
|
||||
|
||||
void* p = jit_runtime::alloc(codeSize, 16);
|
||||
if (!p) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorNoVirtualMemory;
|
||||
}
|
||||
|
||||
usz relocSize = code->relocate(p);
|
||||
if (!relocSize) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorInvalidState;
|
||||
}
|
||||
|
||||
flush(p, relocSize);
|
||||
*dst = p;
|
||||
|
||||
return asmjit::kErrorOk;
|
||||
}
|
||||
|
||||
asmjit::Error jit_runtime::_release(void*) noexcept
|
||||
{
|
||||
return asmjit::kErrorOk;
|
||||
return jit_runtime::alloc(size, align, true);
|
||||
}
|
||||
|
||||
u8* jit_runtime::alloc(usz size, uint align, bool exec) noexcept
|
||||
@ -200,12 +195,12 @@ void jit_runtime::finalize() noexcept
|
||||
std::memcpy(alloc(s_data_init.size(), 1, false), s_data_init.data(), s_data_init.size());
|
||||
}
|
||||
|
||||
asmjit::Runtime& asmjit::get_global_runtime()
|
||||
jit_runtime_base& asmjit::get_global_runtime()
|
||||
{
|
||||
// 16 MiB for internal needs
|
||||
static constexpr u64 size = 1024 * 1024 * 16;
|
||||
|
||||
struct custom_runtime final : asmjit::HostRuntime
|
||||
struct custom_runtime final : jit_runtime_base
|
||||
{
|
||||
custom_runtime() noexcept
|
||||
{
|
||||
@ -214,7 +209,7 @@ asmjit::Runtime& asmjit::get_global_runtime()
|
||||
{
|
||||
if (auto ptr = utils::memory_reserve(size, reinterpret_cast<void*>(addr)))
|
||||
{
|
||||
m_pos.raw() = static_cast<std::byte*>(ptr);
|
||||
m_pos.raw() = static_cast<uchar*>(ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -226,49 +221,26 @@ asmjit::Runtime& asmjit::get_global_runtime()
|
||||
utils::memory_commit(m_pos, size, utils::protection::wx);
|
||||
}
|
||||
|
||||
custom_runtime(const custom_runtime&) = delete;
|
||||
|
||||
custom_runtime& operator=(const custom_runtime&) = delete;
|
||||
|
||||
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override
|
||||
uchar* _alloc(usz size, usz align) noexcept override
|
||||
{
|
||||
usz codeSize = code->getCodeSize();
|
||||
if (!codeSize) [[unlikely]]
|
||||
return m_pos.atomic_op([&](uchar*& pos) -> uchar*
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorNoCodeGenerated;
|
||||
}
|
||||
const auto r = reinterpret_cast<uchar*>(utils::align(uptr(pos), align));
|
||||
|
||||
void* p = m_pos.fetch_add(utils::align(codeSize, 64));
|
||||
if (!p || m_pos > m_max) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
jit_log.fatal("Out of memory (static asmjit)");
|
||||
return asmjit::kErrorNoVirtualMemory;
|
||||
}
|
||||
if (r >= pos && r + size > pos && r + size <= m_max)
|
||||
{
|
||||
pos = r + size;
|
||||
return r;
|
||||
}
|
||||
|
||||
usz relocSize = code->relocate(p);
|
||||
if (!relocSize) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorInvalidState;
|
||||
}
|
||||
|
||||
flush(p, relocSize);
|
||||
*dst = p;
|
||||
|
||||
return asmjit::kErrorOk;
|
||||
}
|
||||
|
||||
asmjit::Error _release(void*) noexcept override
|
||||
{
|
||||
return asmjit::kErrorOk;
|
||||
return nullptr;
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
atomic_t<std::byte*> m_pos{};
|
||||
atomic_t<uchar*> m_pos{};
|
||||
|
||||
std::byte* m_max{};
|
||||
uchar* m_max{};
|
||||
};
|
||||
|
||||
// Magic static
|
||||
@ -276,37 +248,17 @@ asmjit::Runtime& asmjit::get_global_runtime()
|
||||
return g_rt;
|
||||
}
|
||||
|
||||
asmjit::Error asmjit::inline_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
|
||||
asmjit::inline_runtime::inline_runtime(uchar* data, usz size)
|
||||
: m_data(data)
|
||||
, m_size(size)
|
||||
{
|
||||
usz codeSize = code->getCodeSize();
|
||||
if (!codeSize) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorNoCodeGenerated;
|
||||
}
|
||||
|
||||
if (utils::align(codeSize, 4096) > m_size) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorNoVirtualMemory;
|
||||
}
|
||||
|
||||
usz relocSize = code->relocate(m_data);
|
||||
if (!relocSize) [[unlikely]]
|
||||
{
|
||||
*dst = nullptr;
|
||||
return asmjit::kErrorInvalidState;
|
||||
}
|
||||
|
||||
flush(m_data, relocSize);
|
||||
*dst = m_data;
|
||||
|
||||
return asmjit::kErrorOk;
|
||||
}
|
||||
|
||||
asmjit::Error asmjit::inline_runtime::_release(void*) noexcept
|
||||
uchar* asmjit::inline_runtime::_alloc(usz size, usz align) noexcept
|
||||
{
|
||||
return asmjit::kErrorOk;
|
||||
ensure(align <= 4096);
|
||||
|
||||
return size <= m_size ? m_data : nullptr;
|
||||
}
|
||||
|
||||
asmjit::inline_runtime::~inline_runtime()
|
||||
@ -397,19 +349,19 @@ static u64 make_null_function(const std::string& name)
|
||||
using namespace asmjit;
|
||||
|
||||
// Build a "null" function that contains its name
|
||||
const auto func = build_function_asm<void (*)()>("NULL", [&](X86Assembler& c, auto& args)
|
||||
const auto func = build_function_asm<void (*)()>("NULL", [&](x86::Assembler& c, auto& args)
|
||||
{
|
||||
Label data = c.newLabel();
|
||||
c.lea(args[0], x86::qword_ptr(data, 0));
|
||||
c.jmp(imm_ptr(&null));
|
||||
c.align(kAlignCode, 16);
|
||||
c.jmp(Imm(&null));
|
||||
c.align(AlignMode::kCode, 16);
|
||||
c.bind(data);
|
||||
|
||||
// Copy function name bytes
|
||||
for (char ch : name)
|
||||
c.db(ch);
|
||||
c.db(0);
|
||||
c.align(kAlignData, 16);
|
||||
c.align(AlignMode::kData, 16);
|
||||
});
|
||||
|
||||
func_ptr = reinterpret_cast<u64>(func);
|
||||
|
@ -4,7 +4,9 @@
|
||||
|
||||
// Include asmjit with warnings ignored
|
||||
#define ASMJIT_EMBED
|
||||
#define ASMJIT_DEBUG
|
||||
#define ASMJIT_STATIC
|
||||
#define ASMJIT_BUILD_DEBUG
|
||||
#undef Bool
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push, 0)
|
||||
@ -49,17 +51,27 @@ enum class jit_class
|
||||
spu_data,
|
||||
};
|
||||
|
||||
struct jit_runtime_base
|
||||
{
|
||||
jit_runtime_base() noexcept = default;
|
||||
virtual ~jit_runtime_base() = default;
|
||||
|
||||
jit_runtime_base(const jit_runtime_base&) = delete;
|
||||
jit_runtime_base& operator=(const jit_runtime_base&) = delete;
|
||||
|
||||
const asmjit::Environment& environment() const noexcept;
|
||||
void* _add(asmjit::CodeHolder* code) noexcept;
|
||||
virtual uchar* _alloc(usz size, usz align) noexcept = 0;
|
||||
};
|
||||
|
||||
// ASMJIT runtime for emitting code in a single 2G region
|
||||
struct jit_runtime final : asmjit::HostRuntime
|
||||
struct jit_runtime final : jit_runtime_base
|
||||
{
|
||||
jit_runtime();
|
||||
~jit_runtime() override;
|
||||
|
||||
// Allocate executable memory
|
||||
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
|
||||
|
||||
// Do nothing (deallocation is delayed)
|
||||
asmjit::Error _release(void* p) noexcept override;
|
||||
uchar* _alloc(usz size, usz align) noexcept override;
|
||||
|
||||
// Allocate memory
|
||||
static u8* alloc(usz size, uint align, bool exec = true) noexcept;
|
||||
@ -74,35 +86,25 @@ struct jit_runtime final : asmjit::HostRuntime
|
||||
namespace asmjit
|
||||
{
|
||||
// Should only be used to build global functions
|
||||
asmjit::Runtime& get_global_runtime();
|
||||
jit_runtime_base& get_global_runtime();
|
||||
|
||||
// Don't use directly
|
||||
class inline_runtime : public HostRuntime
|
||||
class inline_runtime : public jit_runtime_base
|
||||
{
|
||||
uchar* m_data;
|
||||
usz m_size;
|
||||
|
||||
public:
|
||||
inline_runtime(const inline_runtime&) = delete;
|
||||
|
||||
inline_runtime& operator=(const inline_runtime&) = delete;
|
||||
|
||||
inline_runtime(uchar* data, usz size)
|
||||
: m_data(data)
|
||||
, m_size(size)
|
||||
{
|
||||
}
|
||||
|
||||
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
|
||||
|
||||
asmjit::Error _release(void*) noexcept override;
|
||||
inline_runtime(uchar* data, usz size);
|
||||
|
||||
~inline_runtime();
|
||||
|
||||
uchar* _alloc(usz size, usz align) noexcept override;
|
||||
};
|
||||
|
||||
// Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please)
|
||||
template <typename F>
|
||||
[[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, F func)
|
||||
[[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::x86::Assembler& c, asmjit::Label fallback, F func)
|
||||
{
|
||||
Label fall = c.newLabel();
|
||||
Label begin = c.newLabel();
|
||||
@ -117,7 +119,7 @@ namespace asmjit
|
||||
func();
|
||||
|
||||
// Other bad statuses are ignored regardless of repeat flag (TODO)
|
||||
c.align(kAlignCode, 16);
|
||||
c.align(AlignMode::kCode, 16);
|
||||
c.bind(begin);
|
||||
return fall;
|
||||
|
||||
@ -125,7 +127,7 @@ namespace asmjit
|
||||
}
|
||||
|
||||
// Helper to spill RDX (EDX) register for RDTSC
|
||||
inline void build_swap_rdx_with(asmjit::X86Assembler& c, std::array<X86Gp, 4>& args, const asmjit::X86Gp& with)
|
||||
inline void build_swap_rdx_with(asmjit::x86::Assembler& c, std::array<x86::Gp, 4>& args, const asmjit::x86::Gp& with)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
c.xchg(args[1], with);
|
||||
@ -137,7 +139,7 @@ namespace asmjit
|
||||
}
|
||||
|
||||
// Get full RDTSC value into chosen register (clobbers rax/rdx or saves only rax with other target)
|
||||
inline void build_get_tsc(asmjit::X86Assembler& c, const asmjit::X86Gp& to = asmjit::x86::rax)
|
||||
inline void build_get_tsc(asmjit::x86::Assembler& c, const asmjit::x86::Gp& to = asmjit::x86::rax)
|
||||
{
|
||||
if (&to != &x86::rax && &to != &x86::rdx)
|
||||
{
|
||||
@ -164,6 +166,8 @@ namespace asmjit
|
||||
c.or_(to.r64(), x86::rdx);
|
||||
}
|
||||
}
|
||||
|
||||
using imm_ptr = Imm;
|
||||
}
|
||||
|
||||
// Build runtime function with asmjit::X86Assembler
|
||||
@ -175,10 +179,9 @@ inline FT build_function_asm(std::string_view name, F&& builder)
|
||||
auto& rt = get_global_runtime();
|
||||
|
||||
CodeHolder code;
|
||||
code.init(rt.getCodeInfo());
|
||||
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
|
||||
code.init(rt.environment());
|
||||
|
||||
std::array<X86Gp, 4> args;
|
||||
std::array<x86::Gp, 4> args;
|
||||
#ifdef _WIN32
|
||||
args[0] = x86::rcx;
|
||||
args[1] = x86::rdx;
|
||||
@ -191,19 +194,12 @@ inline FT build_function_asm(std::string_view name, F&& builder)
|
||||
args[3] = x86::rcx;
|
||||
#endif
|
||||
|
||||
X86Assembler compiler(&code);
|
||||
x86::Assembler compiler(&code);
|
||||
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
|
||||
builder(std::ref(compiler), args);
|
||||
ensure(compiler.getLastError() == 0);
|
||||
|
||||
FT result;
|
||||
|
||||
if (rt.add(&result, &code))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
jit_announce(result, code.getCodeSize(), name);
|
||||
return result;
|
||||
const auto result = rt._add(&code);
|
||||
jit_announce(result, code.codeSize(), name);
|
||||
return reinterpret_cast<FT>(uptr(result));
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
@ -253,10 +249,9 @@ public:
|
||||
inline_runtime rt(m_data, Size);
|
||||
|
||||
CodeHolder code;
|
||||
code.init(rt.getCodeInfo());
|
||||
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
|
||||
code.init(rt.environment());
|
||||
|
||||
std::array<X86Gp, 4> args;
|
||||
std::array<x86::Gp, 4> args;
|
||||
#ifdef _WIN32
|
||||
args[0] = x86::rcx;
|
||||
args[1] = x86::rdx;
|
||||
@ -269,19 +264,10 @@ public:
|
||||
args[3] = x86::rcx;
|
||||
#endif
|
||||
|
||||
X86Assembler compiler(&code);
|
||||
x86::Assembler compiler(&code);
|
||||
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
|
||||
builder(std::ref(compiler), args);
|
||||
|
||||
FT result;
|
||||
|
||||
if (compiler.getLastError() || rt.add(&result, &code))
|
||||
{
|
||||
ensure(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
jit_announce(result, code.getCodeSize(), name);
|
||||
}
|
||||
jit_announce(rt._add(&code), code.codeSize(), name);
|
||||
}
|
||||
|
||||
operator FT() const noexcept
|
||||
|
@ -2190,7 +2190,7 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept
|
||||
|
||||
thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* _base))
|
||||
{
|
||||
return build_function_asm<native_entry>("thread_base_trampoline", [&](asmjit::X86Assembler& c, auto& args)
|
||||
return build_function_asm<native_entry>("thread_base_trampoline", [&](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -2203,7 +2203,7 @@ thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base*
|
||||
|
||||
// Call finalize, return if zero
|
||||
c.mov(args[0], x86::rax);
|
||||
c.call(imm_ptr<native_entry(*)(u64)>(finalize));
|
||||
c.call(imm_ptr(static_cast<native_entry(*)(u64)>(&finalize)));
|
||||
c.test(x86::rax, x86::rax);
|
||||
c.jz(_ret);
|
||||
|
||||
|
@ -1910,14 +1910,14 @@ std::vector<ppu_function_t>& ppu_function_manager::access(bool ghc)
|
||||
|
||||
static std::vector<ppu_function_t> list_ghc
|
||||
{
|
||||
build_function_asm<ppu_function_t>("ppu_unregistered", [](asmjit::X86Assembler& c, auto& args)
|
||||
build_function_asm<ppu_function_t>("ppu_unregistered", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
c.mov(args[0], x86::rbp);
|
||||
c.jmp(imm_ptr(list[0]));
|
||||
}),
|
||||
build_function_asm<ppu_function_t>("ppu_return", [](asmjit::X86Assembler& c, auto& args)
|
||||
build_function_asm<ppu_function_t>("ppu_return", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -1937,7 +1937,7 @@ u32 ppu_function_manager::add_function(ppu_function_t function)
|
||||
list.push_back(function);
|
||||
|
||||
// Generate trampoline
|
||||
list2.push_back(build_function_asm<ppu_function_t>("ppu_trampolinea", [&](asmjit::X86Assembler& c, auto& args)
|
||||
list2.push_back(build_function_asm<ppu_function_t>("ppu_trampolinea", [&](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
|
@ -147,7 +147,7 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op);
|
||||
|
||||
extern void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
||||
|
||||
const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](asmjit::X86Assembler& c, auto& args)
|
||||
const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
|
||||
using namespace asmjit;
|
||||
@ -248,7 +248,7 @@ const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_escape", [](asmjit::X86Assembler& c, auto& args)
|
||||
const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_escape", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -262,7 +262,7 @@ const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_esc
|
||||
|
||||
void ppu_recompiler_fallback(ppu_thread& ppu);
|
||||
|
||||
const auto ppu_recompiler_fallback_ghc = build_function_asm<void(*)(ppu_thread& ppu)>("ppu_trampolineb", [](asmjit::X86Assembler& c, auto& args)
|
||||
const auto ppu_recompiler_fallback_ghc = build_function_asm<void(*)(ppu_thread& ppu)>("ppu_trampolineb", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -1817,7 +1817,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
|
||||
return ppu_load_acquire_reservation<u64>(ppu, addr);
|
||||
}
|
||||
|
||||
const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>("ppu_stcx_accurate_tx", [](asmjit::X86Assembler& c, auto& args)
|
||||
const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>("ppu_stcx_accurate_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -1859,10 +1859,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(args[2], 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(args[2], 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(args[2], 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(args[2], 96));
|
||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[2], 0));
|
||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[2], 32));
|
||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[2], 64));
|
||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[2], 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1897,10 +1897,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(x86::rbp, 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(x86::rbp, 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(x86::rbp, 96));
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
|
||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
||||
@ -1943,10 +1943,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
|
||||
// Load old data to store back in rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
c.vmovaps(x86::ymm1, x86::yword_ptr(x86::rbp, 32));
|
||||
c.vmovaps(x86::ymm2, x86::yword_ptr(x86::rbp, 64));
|
||||
c.vmovaps(x86::ymm3, x86::yword_ptr(x86::rbp, 96));
|
||||
c.vmovaps(x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
|
||||
c.vmovaps(x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
|
||||
c.vmovaps(x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
|
||||
c.vmovaps(x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1974,10 +1974,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
|
||||
// Store previous data back to rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(args[2], 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 96), x86::ymm3);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 0), x86::ymm0);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 32), x86::ymm1);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 64), x86::ymm2);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 96), x86::ymm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -96,15 +96,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
using namespace asmjit;
|
||||
|
||||
StringLogger logger;
|
||||
logger.addOptions(Logger::kOptionBinaryForm);
|
||||
logger.addFlags(FormatFlags::kMachineCode);
|
||||
|
||||
std::string log;
|
||||
|
||||
CodeHolder code;
|
||||
code.init(m_asmrt.getCodeInfo());
|
||||
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
|
||||
code.init(m_asmrt.environment());
|
||||
|
||||
X86Assembler compiler(&code);
|
||||
x86::Assembler compiler(&code);
|
||||
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
|
||||
this->c = &compiler;
|
||||
|
||||
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
|
||||
@ -137,7 +137,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
this->qw1 = &x86::rcx;
|
||||
#endif
|
||||
|
||||
const std::array<const X86Xmm*, 16> vec_vars
|
||||
const std::array<const x86::Xmm*, 16> vec_vars
|
||||
{
|
||||
&x86::xmm0,
|
||||
&x86::xmm1,
|
||||
@ -333,20 +333,20 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
});
|
||||
|
||||
c->setExtraReg(x86::k7);
|
||||
c->z().vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
|
||||
c->z().vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off));
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
|
||||
c->vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off));
|
||||
}
|
||||
|
||||
if (first)
|
||||
{
|
||||
c->vpcmpud(x86::k1, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
|
||||
c->vpcmpud(x86::k1, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vpcmpud(x86::k3, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
|
||||
c->vpcmpud(x86::k3, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4);
|
||||
c->korw(x86::k1, x86::k3, x86::k1);
|
||||
}
|
||||
|
||||
@ -378,15 +378,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
|
||||
if (cmask == 0xff)
|
||||
{
|
||||
c->vmovdqa(x86::ymm0, x86::yword_ptr(*ls, starta));
|
||||
c->vmovdqa(x86::ymm0, x86::ymmword_ptr(*ls, starta));
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0);
|
||||
c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask);
|
||||
c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask);
|
||||
}
|
||||
|
||||
c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
|
||||
c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
|
||||
c->vptest(x86::ymm0, x86::ymm0);
|
||||
c->jnz(label_diff);
|
||||
|
||||
@ -401,9 +401,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
const u32 cmask1 = get_code_mask(starta + 32, enda);
|
||||
|
||||
c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0);
|
||||
c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0);
|
||||
c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1);
|
||||
c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
|
||||
c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0);
|
||||
c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1);
|
||||
c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
|
||||
c->vptest(x86::ymm0, x86::ymm0);
|
||||
c->jnz(label_diff);
|
||||
|
||||
@ -453,21 +453,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
xmm2z = true;
|
||||
}
|
||||
|
||||
c->vpblendd(x86::ymm1, x86::ymm2, x86::yword_ptr(*qw1, j - ls_off), cmask);
|
||||
c->vpblendd(x86::ymm1, x86::ymm2, x86::ymmword_ptr(*qw1, j - ls_off), cmask);
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vmovdqa32(x86::ymm1, x86::yword_ptr(*qw1, j - ls_off));
|
||||
c->vmovdqa32(x86::ymm1, x86::ymmword_ptr(*qw1, j - ls_off));
|
||||
}
|
||||
|
||||
// Perform bitwise comparison and accumulate
|
||||
if (first)
|
||||
{
|
||||
c->vpxor(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off));
|
||||
c->vpxor(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off));
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vpternlogd(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */);
|
||||
c->vpternlogd(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */);
|
||||
}
|
||||
|
||||
for (u32 i = j; i < j + 32; i += 4)
|
||||
@ -500,15 +500,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
|
||||
if (cmask == 0xff)
|
||||
{
|
||||
c->vmovaps(x86::ymm0, x86::yword_ptr(*ls, starta));
|
||||
c->vmovaps(x86::ymm0, x86::ymmword_ptr(*ls, starta));
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0);
|
||||
c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask);
|
||||
c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask);
|
||||
}
|
||||
|
||||
c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
|
||||
c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
|
||||
c->vptest(x86::ymm0, x86::ymm0);
|
||||
c->jnz(label_diff);
|
||||
|
||||
@ -523,9 +523,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
const u32 cmask1 = get_code_mask(starta + 32, enda);
|
||||
|
||||
c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0);
|
||||
c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0);
|
||||
c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1);
|
||||
c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
|
||||
c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0);
|
||||
c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1);
|
||||
c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
|
||||
c->vptest(x86::ymm0, x86::ymm0);
|
||||
c->jnz(label_diff);
|
||||
|
||||
@ -586,21 +586,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
xmm2z = true;
|
||||
}
|
||||
|
||||
c->vblendps(reg1, x86::ymm2, x86::yword_ptr(*ls, j - ls_off), cmask);
|
||||
c->vblendps(reg1, x86::ymm2, x86::ymmword_ptr(*ls, j - ls_off), cmask);
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vmovaps(reg1, x86::yword_ptr(*ls, j - ls_off));
|
||||
c->vmovaps(reg1, x86::ymmword_ptr(*ls, j - ls_off));
|
||||
}
|
||||
|
||||
// Perform bitwise comparison and accumulate
|
||||
if (!order++)
|
||||
{
|
||||
c->vxorps(reg0, reg1, x86::yword_ptr(x86::rax, code_off));
|
||||
c->vxorps(reg0, reg1, x86::ymmword_ptr(x86::rax, code_off));
|
||||
}
|
||||
else
|
||||
{
|
||||
c->vxorps(reg1, reg1, x86::yword_ptr(x86::rax, code_off));
|
||||
c->vxorps(reg1, reg1, x86::ymmword_ptr(x86::rax, code_off));
|
||||
c->vorps(reg0, reg1, reg0);
|
||||
}
|
||||
|
||||
@ -800,7 +800,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
{
|
||||
if (m_preds.count(pos))
|
||||
{
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
}
|
||||
|
||||
c->bind(found->second);
|
||||
@ -832,7 +832,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
}
|
||||
|
||||
// Simply return
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
c->bind(label_stop);
|
||||
c->add(x86::rsp, 0x28);
|
||||
c->ret();
|
||||
@ -840,7 +840,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
if (g_cfg.core.spu_verification)
|
||||
{
|
||||
// Dispatch
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
c->bind(label_diff);
|
||||
c->inc(SPU_OFF_64(block_failure));
|
||||
c->add(x86::rsp, 0x28);
|
||||
@ -855,7 +855,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
// Build instruction dispatch table
|
||||
if (instr_table.isValid())
|
||||
{
|
||||
c->align(kAlignData, 8);
|
||||
c->align(AlignMode::kData, 8);
|
||||
c->bind(instr_table);
|
||||
|
||||
// Get actual instruction table bounds
|
||||
@ -877,7 +877,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
}
|
||||
}
|
||||
|
||||
c->align(kAlignData, words_align);
|
||||
c->align(AlignMode::kData, words_align);
|
||||
c->bind(label_code);
|
||||
for (u32 d : words)
|
||||
c->dd(d);
|
||||
@ -893,20 +893,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
xmm_consts.clear();
|
||||
|
||||
// Compile and get function address
|
||||
spu_function_t fn;
|
||||
spu_function_t fn = reinterpret_cast<spu_function_t>(m_asmrt._add(&code));
|
||||
|
||||
if (auto err = m_asmrt.add(&fn, &code))
|
||||
if (!fn)
|
||||
{
|
||||
if (err == asmjit::ErrorCode::kErrorNoVirtualMemory)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
spu_log.fatal("Failed to build a function");
|
||||
}
|
||||
else
|
||||
{
|
||||
jit_announce(fn, code.getCodeSize(), fmt::format("spu-b-%s", fmt::base57(be_t<u64>(m_hash_start))));
|
||||
jit_announce(fn, code.codeSize(), fmt::format("spu-b-%s", fmt::base57(be_t<u64>(m_hash_start))));
|
||||
}
|
||||
|
||||
// Install compiled function pointer
|
||||
@ -927,7 +922,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
||||
{
|
||||
// Add ASMJIT logs
|
||||
fmt::append(log, "Address: %p\n\n", fn);
|
||||
log += logger.getString();
|
||||
log.append(logger._content.data(), logger._content.size());
|
||||
log += "\n\n\n";
|
||||
|
||||
// Append log file
|
||||
@ -962,7 +957,7 @@ spu_recompiler::XmmLink spu_recompiler::XmmGet(s8 reg, XmmType type) // get xmm
|
||||
return result;
|
||||
}
|
||||
|
||||
inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
|
||||
inline asmjit::x86::Mem spu_recompiler::XmmConst(const v128& data)
|
||||
{
|
||||
// Find existing const
|
||||
auto& xmm_label = xmm_consts[std::make_pair(data._u64[0], data._u64[1])];
|
||||
@ -973,7 +968,7 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
|
||||
|
||||
consts.emplace_back([=, this]
|
||||
{
|
||||
c->align(asmjit::kAlignData, 16);
|
||||
c->align(asmjit::AlignMode::kData, 16);
|
||||
c->bind(xmm_label);
|
||||
c->dq(data._u64[0]);
|
||||
c->dq(data._u64[1]);
|
||||
@ -983,17 +978,17 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
|
||||
return asmjit::x86::oword_ptr(xmm_label);
|
||||
}
|
||||
|
||||
inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128& data)
|
||||
inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128& data)
|
||||
{
|
||||
return XmmConst(v128::fromF(data));
|
||||
}
|
||||
|
||||
inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128i& data)
|
||||
inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128i& data)
|
||||
{
|
||||
return XmmConst(v128::fromV(data));
|
||||
}
|
||||
|
||||
inline asmjit::X86Mem spu_recompiler::get_pc(u32 addr)
|
||||
inline asmjit::x86::Mem spu_recompiler::get_pc(u32 addr)
|
||||
{
|
||||
return asmjit::x86::qword_ptr(*pc0, addr - m_base);
|
||||
}
|
||||
@ -1108,7 +1103,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->add(x86::rsp, 0x28);
|
||||
c->jmp(imm_ptr<void(*)(spu_thread*)>(_throw));
|
||||
c->jmp(imm_ptr(+_throw));
|
||||
|
||||
// Save addr in srr0 and disable interrupts
|
||||
c->bind(intr);
|
||||
@ -1123,7 +1118,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
|
||||
c->test(*addr, 0xff80007f);
|
||||
c->cmovnz(*addr, rip->r32());
|
||||
c->shr(*addr, 5);
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
c->bind(no_intr);
|
||||
}
|
||||
|
||||
@ -1210,7 +1205,7 @@ void spu_recompiler::branch_set_link(u32 target)
|
||||
after.emplace_back([=, this, target = local->second]
|
||||
{
|
||||
// Clear return info after use
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
c->bind(ret);
|
||||
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
|
||||
c->and_(qw1->r32(), 0x3fff0);
|
||||
@ -1246,9 +1241,9 @@ void spu_recompiler::fall(spu_opcode_t op)
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(arg1->r32(), op.opcode);
|
||||
c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast<void*>(g_spu_interpreter_fast.decode(op.opcode))));
|
||||
c->mov(*qw0, asmjit::imm_ptr(g_spu_interpreter_fast.decode(op.opcode)));
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(asmjit::imm_ptr<void(*)(spu_thread*, u32, spu_inter_func_t)>(gate));
|
||||
c->call(asmjit::imm_ptr(+gate));
|
||||
}
|
||||
|
||||
void spu_recompiler::UNK(spu_opcode_t op)
|
||||
@ -1266,7 +1261,7 @@ void spu_recompiler::UNK(spu_opcode_t op)
|
||||
c->mov(arg1->r32(), op.opcode);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->add(asmjit::x86::rsp, 0x28);
|
||||
c->jmp(asmjit::imm_ptr<void(*)(spu_thread*, u32)>(gate));
|
||||
c->jmp(asmjit::imm_ptr(+gate));
|
||||
m_pos = -1;
|
||||
}
|
||||
|
||||
@ -1295,7 +1290,7 @@ void spu_recompiler::STOP(spu_opcode_t op)
|
||||
c->mov(arg1->r32(), op.opcode & 0x3fff);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_stop));
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
c->bind(ret);
|
||||
|
||||
c->add(SPU_OFF_32(pc), 4);
|
||||
@ -1362,14 +1357,14 @@ void spu_recompiler::RDCH(spu_opcode_t op)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
auto read_channel = [&](X86Mem channel_ptr, bool sync = true)
|
||||
auto read_channel = [&](x86::Mem channel_ptr, bool sync = true)
|
||||
{
|
||||
Label wait = c->newLabel();
|
||||
Label again = c->newLabel();
|
||||
Label ret = c->newLabel();
|
||||
c->mov(addr->r64(), channel_ptr);
|
||||
c->xor_(qw0->r32(), qw0->r32());
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
c->bind(again);
|
||||
c->bt(addr->r64(), spu_channel::off_count);
|
||||
c->jnc(wait);
|
||||
@ -1380,7 +1375,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
|
||||
c->lea(addr->r64(), get_pc(pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(arg1->r32(), op.ra);
|
||||
c->mov(arg1->r32(), +op.ra);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_rdch));
|
||||
c->jmp(ret);
|
||||
@ -1482,13 +1477,12 @@ void spu_recompiler::RDCH(spu_opcode_t op)
|
||||
*_res = v128::from32r(out);
|
||||
};
|
||||
|
||||
using ftype = void (*)(spu_thread*, v128*);
|
||||
c->lea(addr->r64(), get_pc(m_pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->lea(*arg1, SPU_OFF_128(gpr, op.rt));
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(sub1) : asmjit::imm_ptr<ftype>(sub2));
|
||||
c->call(asmjit::imm_ptr(g_cfg.core.spu_loop_detection ? +sub1 : +sub2));
|
||||
return;
|
||||
}
|
||||
case SPU_RdEventMask:
|
||||
@ -1523,7 +1517,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
|
||||
c->lea(addr->r64(), get_pc(m_pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(arg1->r32(), op.ra);
|
||||
c->mov(arg1->r32(), +op.ra);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_rdch));
|
||||
c->movd(x86::xmm0, *addr);
|
||||
@ -1540,7 +1534,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false)
|
||||
auto ch_cnt = [&](x86::Mem channel_ptr, bool inv = false)
|
||||
{
|
||||
// Load channel count
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
@ -1631,7 +1625,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
|
||||
c->lea(addr->r64(), get_pc(m_pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(arg1->r32(), op.ra);
|
||||
c->mov(arg1->r32(), +op.ra);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_rchcnt));
|
||||
break;
|
||||
@ -2320,7 +2314,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
Label ret = c->newLabel();
|
||||
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
|
||||
c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox));
|
||||
c->align(kAlignCode, 16);
|
||||
c->align(AlignMode::kCode, 16);
|
||||
c->bind(again);
|
||||
c->mov(qw0->r32(), qw0->r32());
|
||||
c->bt(addr->r64(), spu_channel::off_count);
|
||||
@ -2332,7 +2326,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
c->lea(addr->r64(), get_pc(pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(arg1->r32(), op.ra);
|
||||
c->mov(arg1->r32(), +op.ra);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_wrch));
|
||||
c->jmp(ret);
|
||||
@ -2359,7 +2353,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
c->lea(addr->r64(), get_pc(pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->lea(arg1->r32(), MFC_WrTagMask);
|
||||
c->mov(arg1->r32(), MFC_WrTagMask);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_wrch));
|
||||
c->jmp(ret);
|
||||
@ -2383,7 +2377,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
c->lea(addr->r64(), get_pc(pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(arg1->r32(), op.ra);
|
||||
c->mov(arg1->r32(), +op.ra);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_wrch));
|
||||
c->jmp(ret);
|
||||
@ -2476,7 +2470,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
c->btr(SPU_OFF_32(ch_stall_mask), arg1->r32());
|
||||
c->jnc(ret);
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr<void(*)(spu_thread*, u32)>(sub));
|
||||
c->call(imm_ptr(+sub));
|
||||
c->bind(ret);
|
||||
return;
|
||||
}
|
||||
@ -2488,7 +2482,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
};
|
||||
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr<void(*)(spu_thread*)>(sub));
|
||||
c->call(imm_ptr(+sub));
|
||||
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
|
||||
c->mov(SPU_OFF_32(ch_dec_value), qw0->r32());
|
||||
return;
|
||||
@ -2515,7 +2509,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
|
||||
c->lea(addr->r64(), get_pc(m_pos));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
c->mov(SPU_OFF_32(pc), *addr);
|
||||
c->mov(arg1->r32(), op.ra);
|
||||
c->mov(arg1->r32(), +op.ra);
|
||||
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(imm_ptr(spu_wrch));
|
||||
@ -2529,7 +2523,7 @@ void spu_recompiler::BIZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
@ -2545,7 +2539,7 @@ void spu_recompiler::BINZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
@ -2561,7 +2555,7 @@ void spu_recompiler::BIHZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
@ -2577,7 +2571,7 @@ void spu_recompiler::BIHNZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
c->and_(*addr, 0x3fffc);
|
||||
@ -2669,13 +2663,13 @@ void spu_recompiler::BISLED(spu_opcode_t op)
|
||||
|
||||
asmjit::Label branch_label = c->newLabel();
|
||||
c->mov(*arg0, *cpu);
|
||||
c->call(asmjit::imm_ptr<u32(*)(spu_thread*)>(get_events));
|
||||
c->call(asmjit::imm_ptr(+get_events));
|
||||
c->test(*addr, 1);
|
||||
c->jne(branch_label);
|
||||
|
||||
after.emplace_back([=, this]()
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
c->and_(*addr, 0x3fffc);
|
||||
branch_indirect(op, true, false);
|
||||
@ -2895,7 +2889,7 @@ void spu_recompiler::CDX(spu_opcode_t op)
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
|
||||
c->mov(*qw0, asmjit::Imm(0x0001020304050607ull));
|
||||
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
|
||||
}
|
||||
|
||||
@ -3028,7 +3022,7 @@ void spu_recompiler::CBD(spu_opcode_t op)
|
||||
//}
|
||||
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
if (op.i7) c->add(*addr, op.i7);
|
||||
if (op.i7) c->add(*addr, +op.i7);
|
||||
c->not_(*addr);
|
||||
c->and_(*addr, 0xf);
|
||||
|
||||
@ -3052,7 +3046,7 @@ void spu_recompiler::CHD(spu_opcode_t op)
|
||||
//}
|
||||
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
if (op.i7) c->add(*addr, op.i7);
|
||||
if (op.i7) c->add(*addr, +op.i7);
|
||||
c->not_(*addr);
|
||||
c->and_(*addr, 0xe);
|
||||
|
||||
@ -3076,7 +3070,7 @@ void spu_recompiler::CWD(spu_opcode_t op)
|
||||
//}
|
||||
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
if (op.i7) c->add(*addr, op.i7);
|
||||
if (op.i7) c->add(*addr, +op.i7);
|
||||
c->not_(*addr);
|
||||
c->and_(*addr, 0xc);
|
||||
|
||||
@ -3100,14 +3094,14 @@ void spu_recompiler::CDD(spu_opcode_t op)
|
||||
//}
|
||||
|
||||
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
|
||||
if (op.i7) c->add(*addr, op.i7);
|
||||
if (op.i7) c->add(*addr, +op.i7);
|
||||
c->not_(*addr);
|
||||
c->and_(*addr, 0x8);
|
||||
|
||||
const XmmLink& vr = XmmAlloc();
|
||||
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
||||
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
|
||||
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
|
||||
c->mov(*qw0, asmjit::Imm(0x0001020304050607ull));
|
||||
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
|
||||
}
|
||||
|
||||
@ -4048,7 +4042,7 @@ void spu_recompiler::BRZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this]()
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
branch_fixed(target);
|
||||
});
|
||||
@ -4088,7 +4082,7 @@ void spu_recompiler::BRNZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this]()
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
branch_fixed(target);
|
||||
});
|
||||
@ -4109,7 +4103,7 @@ void spu_recompiler::BRHZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this]()
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
branch_fixed(target);
|
||||
});
|
||||
@ -4130,7 +4124,7 @@ void spu_recompiler::BRHNZ(spu_opcode_t op)
|
||||
|
||||
after.emplace_back([=, this]()
|
||||
{
|
||||
c->align(asmjit::kAlignCode, 16);
|
||||
c->align(asmjit::AlignMode::kCode, 16);
|
||||
c->bind(branch_label);
|
||||
branch_fixed(target);
|
||||
});
|
||||
@ -4459,7 +4453,7 @@ void spu_recompiler::CGTBI(spu_opcode_t op)
|
||||
|
||||
void spu_recompiler::HGTI(spu_opcode_t op)
|
||||
{
|
||||
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), op.si10);
|
||||
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), +op.si10);
|
||||
|
||||
asmjit::Label label = c->newLabel();
|
||||
asmjit::Label ret = c->newLabel();
|
||||
@ -4503,7 +4497,7 @@ void spu_recompiler::CLGTBI(spu_opcode_t op)
|
||||
|
||||
void spu_recompiler::HLGTI(spu_opcode_t op)
|
||||
{
|
||||
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10);
|
||||
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10);
|
||||
|
||||
asmjit::Label label = c->newLabel();
|
||||
asmjit::Label ret = c->newLabel();
|
||||
@ -4565,7 +4559,7 @@ void spu_recompiler::CEQBI(spu_opcode_t op)
|
||||
|
||||
void spu_recompiler::HEQI(spu_opcode_t op)
|
||||
{
|
||||
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10);
|
||||
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10);
|
||||
|
||||
asmjit::Label label = c->newLabel();
|
||||
asmjit::Label ret = c->newLabel();
|
||||
@ -4636,12 +4630,12 @@ void spu_recompiler::SHUFB(spu_opcode_t op)
|
||||
c->vpcmpub(asmjit::x86::k1, vc, XmmConst(_mm_set1_epi8(-0x40)), 5 /* GE */);
|
||||
c->vpxor(vm, vc, XmmConst(_mm_set1_epi8(0xf)));
|
||||
c->setExtraReg(asmjit::x86::k1);
|
||||
c->z().vblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1}
|
||||
c->z().vpblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1}
|
||||
c->vpcmpub(asmjit::x86::k2, vm, XmmConst(_mm_set1_epi8(-0x20)), 5 /* GE */);
|
||||
c->vptestmb(asmjit::x86::k1, vm, XmmConst(_mm_set1_epi8(0x10)));
|
||||
c->vpshufb(vt, va, vm);
|
||||
c->setExtraReg(asmjit::x86::k2);
|
||||
c->z().vblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2}
|
||||
c->z().vpblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2}
|
||||
c->setExtraReg(asmjit::x86::k1);
|
||||
c->vpshufb(vt, vb, vm); // {k1}
|
||||
c->vpternlogd(vt, va, vc, 0xf6 /* orAxorBC */);
|
||||
|
@ -24,23 +24,23 @@ private:
|
||||
u32 m_base;
|
||||
|
||||
// emitter:
|
||||
asmjit::X86Assembler* c;
|
||||
asmjit::x86::Assembler* c;
|
||||
|
||||
// arguments:
|
||||
const asmjit::X86Gp* cpu;
|
||||
const asmjit::X86Gp* ls;
|
||||
const asmjit::X86Gp* rip;
|
||||
const asmjit::X86Gp* pc0;
|
||||
const asmjit::x86::Gp* cpu;
|
||||
const asmjit::x86::Gp* ls;
|
||||
const asmjit::x86::Gp* rip;
|
||||
const asmjit::x86::Gp* pc0;
|
||||
|
||||
// Native args or temp variables:
|
||||
const asmjit::X86Gp* arg0;
|
||||
const asmjit::X86Gp* arg1;
|
||||
const asmjit::X86Gp* qw0;
|
||||
const asmjit::X86Gp* qw1;
|
||||
const asmjit::x86::Gp* arg0;
|
||||
const asmjit::x86::Gp* arg1;
|
||||
const asmjit::x86::Gp* qw0;
|
||||
const asmjit::x86::Gp* qw1;
|
||||
|
||||
// temporary:
|
||||
const asmjit::X86Gp* addr;
|
||||
std::array<const asmjit::X86Xmm*, 16> vec;
|
||||
const asmjit::x86::Gp* addr;
|
||||
std::array<const asmjit::x86::Xmm*, 16> vec;
|
||||
|
||||
// workload for the end of function:
|
||||
std::vector<std::function<void()>> after;
|
||||
@ -60,10 +60,10 @@ private:
|
||||
|
||||
class XmmLink
|
||||
{
|
||||
const asmjit::X86Xmm* m_var;
|
||||
const asmjit::x86::Xmm* m_var;
|
||||
|
||||
public:
|
||||
XmmLink(const asmjit::X86Xmm*& xmm_var)
|
||||
XmmLink(const asmjit::x86::Xmm*& xmm_var)
|
||||
: m_var(xmm_var)
|
||||
{
|
||||
xmm_var = nullptr;
|
||||
@ -71,7 +71,7 @@ private:
|
||||
|
||||
XmmLink(XmmLink&&) = default; // MoveConstructible + delete copy constructor and copy/move operators
|
||||
|
||||
operator const asmjit::X86Xmm&() const
|
||||
operator const asmjit::x86::Xmm&() const
|
||||
{
|
||||
return *m_var;
|
||||
}
|
||||
@ -87,11 +87,11 @@ private:
|
||||
XmmLink XmmAlloc();
|
||||
XmmLink XmmGet(s8 reg, XmmType type);
|
||||
|
||||
asmjit::X86Mem XmmConst(const v128& data);
|
||||
asmjit::X86Mem XmmConst(const __m128& data);
|
||||
asmjit::X86Mem XmmConst(const __m128i& data);
|
||||
asmjit::x86::Mem XmmConst(const v128& data);
|
||||
asmjit::x86::Mem XmmConst(const __m128& data);
|
||||
asmjit::x86::Mem XmmConst(const __m128i& data);
|
||||
|
||||
asmjit::X86Mem get_pc(u32 addr);
|
||||
asmjit::x86::Mem get_pc(u32 addr);
|
||||
void branch_fixed(u32 target, bool absolute = false);
|
||||
void branch_indirect(spu_opcode_t op, bool jt = false, bool ret = true);
|
||||
void branch_set_link(u32 target);
|
||||
|
@ -43,7 +43,7 @@ namespace asmjit
|
||||
static constexpr spu_opcode_t s_op{};
|
||||
|
||||
template <uint I, uint N>
|
||||
static void build_spu_gpr_load(X86Assembler& c, X86Xmm x, const bf_t<u32, I, N>&, bool store = false)
|
||||
static void build_spu_gpr_load(x86::Assembler& c, x86::Xmm x, const bf_t<u32, I, N>&, bool store = false)
|
||||
{
|
||||
static_assert(N == 7, "Invalid bitfield");
|
||||
|
||||
@ -87,7 +87,7 @@ namespace asmjit
|
||||
}
|
||||
|
||||
template <uint I, uint N>
|
||||
static void build_spu_gpr_store(X86Assembler& c, X86Xmm x, const bf_t<u32, I, N>&, bool store = true)
|
||||
static void build_spu_gpr_store(x86::Assembler& c, x86::Xmm x, const bf_t<u32, I, N>&, bool store = true)
|
||||
{
|
||||
build_spu_gpr_load(c, x, bf_t<u32, I, N>{}, store);
|
||||
}
|
||||
@ -1733,7 +1733,7 @@ bool spu_interpreter::SHUFB(spu_thread& spu, spu_opcode_t op)
|
||||
return true;
|
||||
}
|
||||
|
||||
const spu_inter_func_t optimized_shufb = build_function_asm<spu_inter_func_t>("spu_shufb", [](asmjit::X86Assembler& c, auto& /*args*/)
|
||||
const spu_inter_func_t optimized_shufb = build_function_asm<spu_inter_func_t>("spu_shufb", [](asmjit::x86::Assembler& c, auto& /*args*/)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -1793,7 +1793,7 @@ const spu_inter_func_t optimized_shufb = build_function_asm<spu_inter_func_t>("s
|
||||
c.mov(x86::eax, 1);
|
||||
c.ret();
|
||||
|
||||
c.align(kAlignData, 16);
|
||||
c.align(AlignMode::kData, 16);
|
||||
c.bind(xc0);
|
||||
c.dq(0xc0c0c0c0c0c0c0c0);
|
||||
c.dq(0xc0c0c0c0c0c0c0c0);
|
||||
|
@ -160,7 +160,7 @@ DECLARE(spu_runtime::tr_all) = []
|
||||
return reinterpret_cast<spu_function_t>(trptr);
|
||||
}();
|
||||
|
||||
DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway", [](asmjit::X86Assembler& c, auto& args)
|
||||
DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
// Gateway for SPU dispatcher, converts from native to GHC calling convention, also saves RSP value for spu_escape
|
||||
using namespace asmjit;
|
||||
@ -249,7 +249,7 @@ DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway",
|
||||
c.ret();
|
||||
});
|
||||
|
||||
DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_escape", [](asmjit::X86Assembler& c, auto& args)
|
||||
DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_escape", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -261,7 +261,7 @@ DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_e
|
||||
c.ret();
|
||||
});
|
||||
|
||||
DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, spu_function_t, u8*)>("spu_tail_escape", [](asmjit::X86Assembler& c, auto& args)
|
||||
DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, spu_function_t, u8*)>("spu_tail_escape", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
|
@ -405,7 +405,7 @@ std::array<u32, 2> op_branch_targets(u32 pc, spu_opcode_t op)
|
||||
return res;
|
||||
}
|
||||
|
||||
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -455,14 +455,14 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(args[2], 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(args[2], 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(args[2], 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(args[2], 96));
|
||||
c.vmovups(x86::ymm4, x86::yword_ptr(args[3], 0));
|
||||
c.vmovups(x86::ymm5, x86::yword_ptr(args[3], 32));
|
||||
c.vmovups(x86::ymm6, x86::yword_ptr(args[3], 64));
|
||||
c.vmovups(x86::ymm7, x86::yword_ptr(args[3], 96));
|
||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[2], 0));
|
||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[2], 32));
|
||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[2], 64));
|
||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[2], 96));
|
||||
c.vmovups(x86::ymm4, x86::ymmword_ptr(args[3], 0));
|
||||
c.vmovups(x86::ymm5, x86::ymmword_ptr(args[3], 32));
|
||||
c.vmovups(x86::ymm6, x86::ymmword_ptr(args[3], 64));
|
||||
c.vmovups(x86::ymm7, x86::ymmword_ptr(args[3], 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -506,10 +506,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(args[1], 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(args[1], 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(args[1], 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(args[1], 96));
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(args[1], 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::ymmword_ptr(args[1], 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::ymmword_ptr(args[1], 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::ymmword_ptr(args[1], 96));
|
||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
||||
@ -539,10 +539,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(args[1], 0), x86::ymm4);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 32), x86::ymm5);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 64), x86::ymm6);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 96), x86::ymm7);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 0), x86::ymm4);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 32), x86::ymm5);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 64), x86::ymm6);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 96), x86::ymm7);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -569,10 +569,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
||||
// Load previous data to store back to rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::ymm0, x86::yword_ptr(args[1], 0));
|
||||
c.vmovaps(x86::ymm1, x86::yword_ptr(args[1], 32));
|
||||
c.vmovaps(x86::ymm2, x86::yword_ptr(args[1], 64));
|
||||
c.vmovaps(x86::ymm3, x86::yword_ptr(args[1], 96));
|
||||
c.vmovaps(x86::ymm0, x86::ymmword_ptr(args[1], 0));
|
||||
c.vmovaps(x86::ymm1, x86::ymmword_ptr(args[1], 32));
|
||||
c.vmovaps(x86::ymm2, x86::ymmword_ptr(args[1], 64));
|
||||
c.vmovaps(x86::ymm3, x86::ymmword_ptr(args[1], 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -601,10 +601,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
||||
// Store previous data back to rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(args[2], 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 96), x86::ymm3);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 0), x86::ymm0);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 32), x86::ymm1);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 64), x86::ymm2);
|
||||
c.vmovaps(x86::ymmword_ptr(args[2], 96), x86::ymm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -663,7 +663,7 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -694,10 +694,10 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(args[1], 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(args[1], 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(args[1], 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(args[1], 96));
|
||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[1], 0));
|
||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[1], 32));
|
||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[1], 64));
|
||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[1], 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -733,10 +733,10 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 96), x86::ymm3);
|
||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 0), x86::ymm0);
|
||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 32), x86::ymm1);
|
||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 64), x86::ymm2);
|
||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 96), x86::ymm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -789,7 +789,7 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](asmjit::x86::Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -847,10 +847,10 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
|
||||
// Just read data to registers
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
c.vmovups(x86::ymm1, x86::yword_ptr(x86::rbp, 32));
|
||||
c.vmovups(x86::ymm2, x86::yword_ptr(x86::rbp, 64));
|
||||
c.vmovups(x86::ymm3, x86::yword_ptr(x86::rbp, 96));
|
||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
|
||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
|
||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
|
||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -872,10 +872,10 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
|
||||
// Store data
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(args[1], 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 96), x86::ymm3);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 0), x86::ymm0);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 32), x86::ymm1);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 64), x86::ymm2);
|
||||
c.vmovaps(x86::ymmword_ptr(args[1], 96), x86::ymm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -144,7 +144,7 @@ namespace
|
||||
}
|
||||
|
||||
template <bool Compare, int Size, typename RT>
|
||||
void build_copy_data_swap_u32_avx3(asmjit::X86Assembler& c, std::array<asmjit::X86Gp, 4>& args, const RT& rmask, const RT& rload, const RT& rtest)
|
||||
void build_copy_data_swap_u32_avx3(asmjit::x86::Assembler& c, std::array<asmjit::x86::Gp, 4>& args, const RT& rmask, const RT& rload, const RT& rtest)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
@ -173,26 +173,26 @@ namespace
|
||||
c.and_(args[0], -Size * 4);
|
||||
c.add(args[2].r32(), args[3].r32());
|
||||
|
||||
c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
|
||||
c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
|
||||
c.vpshufb(rload, rload, rmask);
|
||||
if (Compare)
|
||||
c.k(x86::k1).z().vpxord(rtest, rload, X86Mem(args[0], 0, Size * 4u));
|
||||
c.k(x86::k1).vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload);
|
||||
c.k(x86::k1).z().vpxord(rtest, rload, x86::Mem(args[0], 0, Size * 4u));
|
||||
c.k(x86::k1).vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload);
|
||||
c.lea(args[0], x86::qword_ptr(args[0], Size * 4));
|
||||
c.lea(args[1], x86::qword_ptr(args[1], Size * 4));
|
||||
c.sub(args[2].r32(), Size);
|
||||
|
||||
c.or_(x86::eax, -1);
|
||||
c.align(kAlignCode, 16);
|
||||
c.align(AlignMode::kCode, 16);
|
||||
|
||||
c.bind(loop);
|
||||
c.cmp(args[2].r32(), Size);
|
||||
c.jbe(tail);
|
||||
c.vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
|
||||
c.vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
|
||||
c.vpshufb(rload, rload, rmask);
|
||||
if (Compare)
|
||||
c.vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC
|
||||
c.vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload);
|
||||
c.vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC
|
||||
c.vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload);
|
||||
c.lea(args[0], x86::qword_ptr(args[0], Size * 4));
|
||||
c.lea(args[1], x86::qword_ptr(args[1], Size * 4));
|
||||
c.sub(args[2].r32(), Size);
|
||||
@ -202,11 +202,11 @@ namespace
|
||||
c.shlx(x86::eax, x86::eax, args[2].r32());
|
||||
c.not_(x86::eax);
|
||||
c.kmovw(x86::k1, x86::eax);
|
||||
c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
|
||||
c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
|
||||
c.vpshufb(rload, rload, rmask);
|
||||
if (Compare)
|
||||
c.k(x86::k1).vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6);
|
||||
c.k(x86::k1).vmovdqu32(X86Mem(args[0], 0, Size * 4u), rload);
|
||||
c.k(x86::k1).vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6);
|
||||
c.k(x86::k1).vmovdqu32(x86::Mem(args[0], 0, Size * 4u), rload);
|
||||
|
||||
if (Compare)
|
||||
{
|
||||
@ -230,7 +230,7 @@ namespace
|
||||
}
|
||||
|
||||
template <bool Compare>
|
||||
void build_copy_data_swap_u32(asmjit::X86Assembler& c, std::array<asmjit::X86Gp, 4>& args)
|
||||
void build_copy_data_swap_u32(asmjit::x86::Assembler& c, std::array<asmjit::x86::Gp, 4>& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user