diff --git a/.gitmodules b/.gitmodules
index 0ac7381b73..84eabb87dd 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -42,3 +42,6 @@
[submodule "3rdparty/pugixml"]
path = 3rdparty/pugixml
url = https://github.com/zeux/pugixml
+[submodule "3rdparty/xxHash"]
+ path = 3rdparty/xxHash
+ url = https://github.com/Cyan4973/xxHash
diff --git a/.travis.yml b/.travis.yml
index 6796d453e3..952b33741f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,7 +47,7 @@ install:
fi;
before_script:
- - git submodule update --init asmjit 3rdparty/ffmpeg 3rdparty/pugixml 3rdparty/GSL 3rdparty/libpng Utilities/yaml-cpp 3rdparty/cereal 3rdparty/hidapi 3rdparty/Optional Vulkan/glslang Vulkan/Vulkan-LoaderAndValidationLayers
+ - git submodule update --init asmjit 3rdparty/ffmpeg 3rdparty/pugixml 3rdparty/GSL 3rdparty/libpng Utilities/yaml-cpp 3rdparty/cereal 3rdparty/hidapi 3rdparty/Optional 3rdparty/xxHash Vulkan/glslang Vulkan/Vulkan-LoaderAndValidationLayers
- mkdir build ; cd build
- export CMAKE_PREFIX_PATH=~/Qt/${QTVER}/gcc_64/lib/cmake
- if [ "$TRAVIS_PULL_REQUEST" = false ]; then
diff --git a/3rdparty/xxHash b/3rdparty/xxHash
new file mode 160000
index 0000000000..3064d42e7d
--- /dev/null
+++ b/3rdparty/xxHash
@@ -0,0 +1 @@
+Subproject commit 3064d42e7d74b0921bdd1818395d9cb37bb8976a
diff --git a/3rdparty/xxhash.vcxproj b/3rdparty/xxhash.vcxproj
new file mode 100644
index 0000000000..e0a8ca3064
--- /dev/null
+++ b/3rdparty/xxhash.vcxproj
@@ -0,0 +1,59 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ {939FE206-1182-ABC3-1234-FEAB88E98404}
+ 8.1
+
+
+
+ StaticLibrary
+ v140
+ Unicode
+
+
+ true
+
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1ff45f2ea9..aafee50393 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${PROJECT_BINARY_DIR}/bin")
add_subdirectory( Vulkan )
add_subdirectory( rpcs3 )
+add_subdirectory(3rdparty/xxHash/cmake_unofficial)
include_directories(3rdparty/hidapi/hidapi)
if(APPLE)
diff --git a/appveyor.yml b/appveyor.yml
index ec50e036e2..04272acae9 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -46,6 +46,7 @@ install:
3rdparty/libpng `
3rdparty/Optional `
3rdparty/pugixml `
+ 3rdparty/xxHash `
3rdparty/zlib `
asmjit `
Utilities/yaml-cpp `
diff --git a/rpcs3.sln b/rpcs3.sln
index 1c17a89b66..e7b81d3bf2 100644
--- a/rpcs3.sln
+++ b/rpcs3.sln
@@ -74,6 +74,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hidapi", "hidapi", "{FA1E6C
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hidapi", "3rdparty\hidapi\windows\hidapi.vcxproj", "{A107C21C-418A-4697-BB10-20C3AA60E2E4}"
EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "xxhash", "xxhash", "{D16E245C-CC5A-4B9A-8BAB-1176F02C1631}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xxhash", "3rdparty\xxhash.vcxproj", "{939FE206-1182-ABC3-1234-FEAB88E98404}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug - LLVM|x64 = Debug - LLVM|x64
@@ -223,6 +227,16 @@ Global
{A107C21C-418A-4697-BB10-20C3AA60E2E4}.Release - LLVM|x64.Build.0 = Release|x64
{A107C21C-418A-4697-BB10-20C3AA60E2E4}.Release|x64.ActiveCfg = Release|x64
{A107C21C-418A-4697-BB10-20C3AA60E2E4}.Release|x64.Build.0 = Release|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - LLVM|x64.ActiveCfg = Debug|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - LLVM|x64.Build.0 = Debug|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - MemLeak|x64.ActiveCfg = Debug|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - MemLeak|x64.Build.0 = Debug|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug|x64.ActiveCfg = Debug|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug|x64.Build.0 = Debug|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Release - LLVM|x64.ActiveCfg = Release|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Release - LLVM|x64.Build.0 = Release|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Release|x64.ActiveCfg = Release|x64
+ {939FE206-1182-ABC3-1234-FEAB88E98404}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -242,5 +256,6 @@ Global
{3EE5F075-B546-42C4-B6A8-E3CCEF38B78D} = {10FBF193-D532-4CCF-B875-4C7091A7F6C2}
{FDC361C5-7734-493B-8CFB-037308B35122} = {DDF904CA-2771-441A-8629-5DF2EB922A79}
{A107C21C-418A-4697-BB10-20C3AA60E2E4} = {FA1E6C16-CA63-45F8-8D52-E21DF396BE36}
+ {939FE206-1182-ABC3-1234-FEAB88E98404} = {D16E245C-CC5A-4B9A-8BAB-1176F02C1631}
EndGlobalSection
EndGlobal
diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt
index 7bc7aa1f9b..0e4c7630b7 100644
--- a/rpcs3/CMakeLists.txt
+++ b/rpcs3/CMakeLists.txt
@@ -300,6 +300,7 @@ ${LLVM_INCLUDE_DIRS}
"${RPCS3_SRC_DIR}/../3rdparty/cereal/include"
"${RPCS3_SRC_DIR}/../3rdparty/Optional"
"${RPCS3_SRC_DIR}/../3rdparty/discord-rpc/include"
+"${RPCS3_SRC_DIR}/../3rdparty/xxHash"
)
if(WIN32)
@@ -408,6 +409,8 @@ if(UNIX)
target_link_libraries(rpcs3 ${X11_LIBRARIES})
endif()
+target_link_libraries(rpcs3 xxhash)
+
if(WIN32)
target_link_libraries(rpcs3 ws2_32.lib Winmm.lib Psapi.lib gdi32.lib VKstatic.1 glslang OSDependent OGLCompiler SPIRV HLSL setupapi.lib hidapi-hid Shlwapi.lib)
if(NOT MSVC)
diff --git a/rpcs3/Emu/RSX/Capture/rsx_capture.cpp b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp
new file mode 100644
index 0000000000..f782c02bbe
--- /dev/null
+++ b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp
@@ -0,0 +1,627 @@
+#include "stdafx.h"
+#include "rsx_capture.h"
+#include "Emu/RSX/Common/BufferUtils.h"
+#include "Emu/RSX/Common/TextureUtils.h"
+#include "Emu/RSX/Common/surface_store.h"
+#include "Emu/RSX/GCM.h"
+#include "Emu/RSX/RSXThread.h"
+#include "Emu/Memory/Memory.h"
+
+#include "xxhash.h"
+
+namespace rsx
+{
+ namespace capture
+ {
+ u32 get_io_offset(u32 offset, u32 location)
+ {
+ switch (location)
+ {
+ case CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER:
+ case CELL_GCM_LOCATION_MAIN:
+ {
+ if (u32 result = RSXIOMem.RealAddr(offset))
+ {
+ return offset;
+ }
+ }
+ case CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN:
+ {
+ if (u32 result = RSXIOMem.RealAddr(0x0e000000 + offset))
+ {
+ return 0x0e000000 + offset;
+ }
+ }
+ default: return 0xFFFFFFFFu;
+ }
+ }
+
+ void insert_mem_block_in_map(std::unordered_set& mem_changes, frame_capture_data::memory_block&& block, frame_capture_data::memory_block_data&& data)
+ {
+ u64 data_hash = 0;
+ if (data.data.size() > 0)
+ {
+ data_hash = XXH64(data.data.data(), data.data.size(), 0);
+ // using 0 to signify no block in use, so this one is 'reserved'
+ if (data_hash == 0)
+ fmt::throw_exception("Memory block data hash equal to 0");
+
+ block.size = data.data.size();
+ block.data_state = data_hash;
+
+ auto it = frame_capture.memory_data_map.find(data_hash);
+ if (it != frame_capture.memory_data_map.end())
+ {
+ if (it->second.data != data.data)
+ // screw this
+ fmt::throw_exception("Memory map hash collision detected...cant capture");
+ }
+ else
+ frame_capture.memory_data_map.insert(std::make_pair(data_hash, std::move(data)));
+ }
+
+ u64 block_hash = XXH64(&block, sizeof(frame_capture_data::memory_block), 0);
+ mem_changes.insert(block_hash);
+ if (frame_capture.memory_map.find(block_hash) == frame_capture.memory_map.end())
+ frame_capture.memory_map.insert(std::make_pair(block_hash, std::move(block)));
+ }
+
+ void capture_draw_memory(thread* rsx)
+ {
+ // the idea here is to copy any memory that is needed to make the calls work
+ // todo:
+ // - tile / zcull state changing during other commands
+ // - track memory that is rendered into and ignore saving it later, this one will be tough
+
+ if (frame_capture.replay_commands.empty())
+ fmt::throw_exception("no replay commands to attach memory state to");
+
+ // shove the mem_changes onto the last issued command
+ std::unordered_set& mem_changes = frame_capture.replay_commands.back().memory_state;
+
+ // capture fragment shader mem
+ const u32 shader_program = method_registers.shader_program_address();
+ if (shader_program != 0)
+ {
+ const u32 program_location = (shader_program & 0x3) - 1;
+ const u32 program_offset = (shader_program & ~0x3);
+
+ const u32 addr = get_address(program_offset, program_location);
+ const u32 program_start = program_hash_util::fragment_program_utils::get_fragment_program_start(vm::base(addr));
+ const u32 ucode_size = program_hash_util::fragment_program_utils::get_fragment_program_ucode_size(vm::base(addr + program_start));
+
+ frame_capture_data::memory_block block;
+ block.addr = addr;
+ block.ioOffset = get_io_offset(program_offset, program_location);
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(ucode_size + program_start);
+ std::memcpy(block_data.data.data(), vm::base(addr), ucode_size + program_start);
+ insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
+ }
+
+ // vertex shader is passed in registers, so it can be ignored
+
+ // save fragment tex mem
+ for (const auto& tex : method_registers.fragment_textures)
+ {
+ if (!tex.enabled())
+ continue;
+
+ const u32 texaddr = get_address(tex.offset(), tex.location());
+ auto layout = get_subresources_layout(tex);
+
+ // todo: dont use this function and just get size somehow
+ size_t texSize = 0;
+ for (const auto& l : layout)
+ texSize += l.data.size();
+
+ if (!texaddr || !texSize)
+ continue;
+
+ frame_capture_data::memory_block block;
+ block.addr = texaddr;
+ block.ioOffset = get_io_offset(tex.offset(), tex.location());
+
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(texSize);
+ std::memcpy(block_data.data.data(), vm::base(texaddr), texSize);
+ insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
+ }
+
+ // save vertex texture mem
+ for (const auto& tex : method_registers.vertex_textures)
+ {
+ if (!tex.enabled())
+ continue;
+
+ const u32 texaddr = get_address(tex.offset(), tex.location());
+ auto layout = get_subresources_layout(tex);
+
+ // todo: dont use this function and just get size somehow
+ size_t texSize = 0;
+ for (const auto& l : layout)
+ texSize += l.data.size();
+
+ if (!texaddr || !texSize)
+ continue;
+
+ frame_capture_data::memory_block block;
+ block.addr = texaddr;
+ block.ioOffset = get_io_offset(tex.offset(), tex.location());
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(texSize);
+ std::memcpy(block_data.data.data(), vm::base(texaddr), texSize);
+ insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
+ }
+
+ // save vertex buffer memory
+ if (method_registers.current_draw_clause.command == draw_command::array)
+ {
+ const u32 input_mask = method_registers.vertex_attrib_input_mask();
+ for (u8 index = 0; index < limits::vertex_count; ++index)
+ {
+ const bool enabled = !!(input_mask & (1 << index));
+ if (!enabled)
+ continue;
+
+ const auto& info = method_registers.vertex_arrays_info[index];
+ if (!info.size())
+ continue;
+
+ // vert buffer
+ const u32 base_address = get_vertex_offset_from_base(method_registers.vertex_data_base_offset(), info.offset() & 0x7fffffff);
+ const u32 memory_location = info.offset() >> 31;
+
+ const u32 addr = get_address(base_address, memory_location);
+ const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size());
+ const u32 vertStride = info.stride();
+
+ for (const auto& count : method_registers.current_draw_clause.first_count_commands)
+ {
+ const u32 vertCount = count.second;
+ const size_t bufferSize = vertCount * vertStride + vertSize;
+
+ frame_capture_data::memory_block block;
+ block.addr = addr;
+ block.ioOffset = get_io_offset(base_address, memory_location);
+ block.offset = (count.first * vertStride);
+
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(bufferSize);
+ std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize);
+ insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
+ }
+ }
+ }
+ // save index buffer if used
+ else if (method_registers.current_draw_clause.command == draw_command::indexed)
+ {
+ const u32 input_mask = method_registers.vertex_attrib_input_mask();
+
+ const u32 base_address = method_registers.index_array_address();
+ const u32 memory_location = method_registers.index_array_location();
+
+ const u32 base_addr = get_address(base_address, memory_location);
+ const u32 type_size = get_index_type_size(method_registers.index_type());
+ const auto index_type = method_registers.index_type();
+
+ // manually parse index buffer and copy vertex buffer
+ u32 min_index = 0xFFFF, max_index = 0;
+ if (index_type == index_array_type::u32)
+ min_index = 0xFFFFFFFF;
+
+ const bool is_primitive_restart_enabled = method_registers.restart_index_enabled();
+ const u32 primitive_restart_index = method_registers.restart_index();
+
+ for (const auto& count : method_registers.current_draw_clause.first_count_commands)
+ {
+ const u32 idxFirst = count.first;
+ const u32 idxCount = count.second;
+ const u32 idxAddr = base_addr + (idxFirst * type_size);
+
+ const size_t bufferSize = idxCount * type_size;
+
+ frame_capture_data::memory_block block;
+ block.addr = base_addr;
+ block.ioOffset = get_io_offset(base_address, memory_location);
+ block.offset = (idxFirst * type_size);
+
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(bufferSize);
+ std::memcpy(block_data.data.data(), vm::base(idxAddr), bufferSize);
+ insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
+
+ switch (index_type)
+ {
+ case index_array_type::u16:
+ {
+ auto fifo = vm::ptr::make(idxAddr);
+ for (u32 i = 0; i < idxCount; ++i)
+ {
+ u16 index = fifo[i];
+ if (is_primitive_restart_enabled && index == (u16)primitive_restart_index)
+ continue;
+ index = (u16)get_index_from_base(index, method_registers.vertex_data_base_index());
+ min_index = (u16)std::min(index, (u16)min_index);
+ max_index = (u16)std::max(index, (u16)max_index);
+ }
+ break;
+ }
+ case index_array_type::u32:
+ {
+ auto fifo = vm::ptr::make(idxAddr);
+ for (u32 i = 0; i < idxCount; ++i)
+ {
+ u32 index = fifo[i];
+ if (is_primitive_restart_enabled && index == primitive_restart_index)
+ continue;
+ index = get_index_from_base(index, method_registers.vertex_data_base_index());
+ min_index = std::min(index, min_index);
+ max_index = std::max(index, max_index);
+ }
+ break;
+ }
+ }
+ }
+
+ if (min_index > max_index)
+ {
+ // ignore?
+ }
+
+ for (u8 index = 0; index < limits::vertex_count; ++index)
+ {
+ const bool enabled = !!(input_mask & (1 << index));
+ if (!enabled)
+ continue;
+
+ const auto& info = method_registers.vertex_arrays_info[index];
+ if (!info.size())
+ continue;
+
+ // vert buffer
+ const u32 vertStride = info.stride();
+ const u32 base_address = get_vertex_offset_from_base(method_registers.vertex_data_base_offset(), info.offset() & 0x7fffffff);
+ const u32 memory_location = info.offset() >> 31;
+
+ const u32 addr = get_address(base_address, memory_location);
+ const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size());
+
+ const u32 bufferSize = vertStride * (max_index - min_index + 1) + vertSize;
+
+ frame_capture_data::memory_block block;
+ block.addr = addr;
+ block.ioOffset = get_io_offset(base_address, memory_location);
+ block.offset = (min_index * vertStride);
+
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(bufferSize);
+ std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize);
+ insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
+ }
+ }
+
+ capture_display_tile_state(rsx, frame_capture.replay_commands.back());
+ capture_surface_state(rsx, frame_capture.replay_commands.back());
+ }
+
+ // i realize these are a slight copy pasta of the rsx_method implementations but its kinda unavoidable currently
+ void capture_image_in(thread* rsx, frame_capture_data::replay_command& replay_command)
+ {
+ const rsx::blit_engine::transfer_operation operation = method_registers.blit_engine_operation();
+
+ const u16 out_x = method_registers.blit_engine_output_x();
+ const u16 out_y = method_registers.blit_engine_output_y();
+ const u16 out_w = method_registers.blit_engine_output_width();
+ const u16 out_h = method_registers.blit_engine_output_height();
+
+ const u16 in_w = method_registers.blit_engine_input_width();
+ const u16 in_h = method_registers.blit_engine_input_height();
+
+ const blit_engine::transfer_origin in_origin = method_registers.blit_engine_input_origin();
+ const blit_engine::transfer_interpolator in_inter = method_registers.blit_engine_input_inter();
+ const rsx::blit_engine::transfer_source_format src_color_format = method_registers.blit_engine_src_color_format();
+
+ const f32 in_x = std::ceil(method_registers.blit_engine_in_x());
+ const f32 in_y = std::ceil(method_registers.blit_engine_in_y());
+
+ u16 in_pitch = method_registers.blit_engine_input_pitch();
+
+ if (in_w == 0 || in_h == 0 || out_w == 0 || out_h == 0)
+ {
+ return;
+ }
+
+ const u32 src_offset = method_registers.blit_engine_input_offset();
+ const u32 src_dma = method_registers.blit_engine_input_location();
+
+ const u32 in_bpp = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? 2 : 4; // bytes per pixel
+ const u32 in_offset = u32(in_x * in_bpp + in_pitch * in_y);
+ const tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);
+
+ frame_capture_data::memory_block block;
+ block.addr = src_region.address;
+ block.ioOffset = get_io_offset(src_region.tile ? src_region.base : src_offset + in_offset, src_dma & 0xf);
+
+ u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
+
+ if (in_pitch == 0)
+ {
+ in_pitch = in_bpp * in_w;
+ }
+
+ rsx->read_barrier(src_region.address, in_pitch * in_h);
+
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(in_pitch * in_h);
+ std::memcpy(block_data.data.data(), pixels_src, in_pitch * in_h);
+ insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data));
+
+ // 'capture' destination to ensure memory is alloc'd and usable in replay
+ u32 dst_offset = 0;
+ u32 dst_dma = 0;
+ rsx::blit_engine::transfer_destination_format dst_color_format;
+ u32 out_pitch = 0;
+ u32 out_aligment = 64;
+
+ switch (method_registers.blit_engine_context_surface())
+ {
+ case blit_engine::context_surface::surface2d:
+ dst_dma = method_registers.blit_engine_output_location_nv3062();
+ dst_offset = method_registers.blit_engine_output_offset_nv3062();
+ dst_color_format = method_registers.blit_engine_nv3062_color_format();
+ out_pitch = method_registers.blit_engine_output_pitch_nv3062();
+ out_aligment = method_registers.blit_engine_output_alignment_nv3062();
+ break;
+
+ case blit_engine::context_surface::swizzle2d:
+ dst_dma = method_registers.blit_engine_nv309E_location();
+ dst_offset = method_registers.blit_engine_nv309E_offset();
+ dst_color_format = method_registers.blit_engine_output_format_nv309E();
+ break;
+
+ default: break;
+ }
+
+ const u32 out_bpp = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? 2 : 4;
+ const s32 out_offset = out_x * out_bpp + out_pitch * out_y;
+ const tiled_region dst_region = rsx->get_tiled_address(dst_offset + out_offset, dst_dma & 0xf);
+
+ frame_capture_data::memory_block blockDst;
+ blockDst.addr = dst_region.address;
+ blockDst.ioOffset = get_io_offset(dst_region.tile ? dst_region.base : dst_offset + out_offset, dst_dma & 0xf);
+ if (blockDst.ioOffset != -1)
+ {
+ u32 blockSize = method_registers.blit_engine_context_surface() != blit_engine::context_surface::swizzle2d ? out_pitch * out_h : out_bpp * next_pow2(out_w) * next_pow2(out_h);
+
+ blockDst.size = blockSize;
+ frame_capture_data::memory_block_data block_data;
+ insert_mem_block_in_map(replay_command.memory_state, std::move(blockDst), std::move(block_data));
+ }
+
+ capture_display_tile_state(rsx, replay_command);
+ }
+
+ void capture_buffer_notify(thread* rsx, frame_capture_data::replay_command& replay_command)
+ {
+ s32 in_pitch = method_registers.nv0039_input_pitch();
+ const u32 line_length = method_registers.nv0039_line_length();
+ const u32 line_count = method_registers.nv0039_line_count();
+ const u8 in_format = method_registers.nv0039_input_format();
+
+ if (!in_pitch)
+ {
+ in_pitch = line_length;
+ }
+
+ u32 src_offset = method_registers.nv0039_input_offset();
+ u32 src_dma = method_registers.nv0039_input_location();
+ u32 src_addr = get_address(src_offset, src_dma);
+
+ rsx->read_barrier(src_addr, in_pitch * line_count);
+
+ const u8* src = (u8*)vm::base(src_addr);
+
+ frame_capture_data::memory_block block;
+ block.addr = src_addr;
+ block.ioOffset = get_io_offset(src_offset, src_dma);
+
+ frame_capture_data::memory_block_data block_data;
+ block_data.data.resize(in_pitch * line_count);
+
+ for (u32 i = 0; i < line_count; ++i)
+ {
+ std::memcpy(block_data.data.data() + (line_length * i), src, line_length);
+ src += in_pitch;
+ }
+
+ insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data));
+
+ // we 'capture' destination mostly to ensure that the location is allocated when replaying
+ u32 dst_offset = method_registers.nv0039_output_offset();
+ u32 dst_dma = method_registers.nv0039_output_location();
+ u32 dst_addr = get_address(dst_offset, dst_dma);
+
+ s32 out_pitch = method_registers.nv0039_output_pitch();
+ if (!out_pitch)
+ {
+ out_pitch = line_length;
+ }
+
+ frame_capture_data::memory_block blockDst;
+ blockDst.addr = dst_addr;
+ blockDst.ioOffset = get_io_offset(dst_offset, dst_dma);
+
+ // only check for iooffset'd data
+ if (blockDst.ioOffset != -1)
+ {
+ frame_capture_data::memory_block_data block_data;
+ blockDst.size = out_pitch * line_count;
+ insert_mem_block_in_map(replay_command.memory_state, std::move(blockDst), std::move(block_data));
+ }
+
+ capture_display_tile_state(rsx, replay_command);
+ }
+
+ void capture_display_tile_state(thread* rsx, frame_capture_data::replay_command& replay_command)
+ {
+ frame_capture_data::display_buffers_state dbstate;
+ dbstate.count = rsx->display_buffers_count;
+ // should this only happen on flip?
+ for (u32 i = 0; i < rsx->display_buffers_count; ++i)
+ {
+ const auto& db = rsx->display_buffers[i];
+ dbstate.buffers[i].height = db.height;
+ dbstate.buffers[i].width = db.width;
+ dbstate.buffers[i].offset = db.offset;
+ dbstate.buffers[i].pitch = db.pitch;
+ }
+
+ const u64 dbnum = XXH64(&dbstate, sizeof(frame_capture_data::display_buffers_state), 0);
+ if (frame_capture.display_buffers_map.find(dbnum) == frame_capture.display_buffers_map.end())
+ frame_capture.display_buffers_map.insert(std::make_pair(dbnum, std::move(dbstate)));
+
+ // todo: hook tile call sys_rsx call or something
+ frame_capture_data::tile_state tilestate;
+ for (u32 i = 0; i < limits::tiles_count; ++i)
+ {
+ const auto& tile = rsx->tiles[i];
+ auto& tstate = tilestate.tiles[i];
+ tstate.bank = tile.bank;
+ tstate.base = tile.base;
+ tstate.binded = tile.binded;
+ tstate.comp = tile.comp;
+ tstate.location = tile.location;
+ tstate.offset = tile.offset;
+ tstate.pitch = tile.pitch;
+ tstate.size = tile.size;
+ }
+
+ for (u32 i = 0; i < limits::zculls_count; ++i)
+ {
+ const auto& zc = rsx->zculls[i];
+ auto& zcstate = tilestate.zculls[i];
+ zcstate.aaFormat = zc.aaFormat;
+ zcstate.binded = zc.binded;
+ zcstate.cullStart = zc.cullStart;
+ zcstate.height = zc.height;
+ zcstate.offset = zc.offset;
+ zcstate.sFunc = zc.sFunc;
+ zcstate.sMask = zc.sMask;
+ zcstate.sRef = zc.sRef;
+ zcstate.width = zc.width;
+ zcstate.zcullDir = zc.zcullDir;
+ zcstate.zcullFormat = zc.zcullFormat;
+ zcstate.zFormat = zc.zFormat;
+ }
+
+ const u64 tsnum = XXH64(&tilestate, sizeof(frame_capture_data::tile_state), 0);
+
+ if (frame_capture.tile_map.find(tsnum) == frame_capture.tile_map.end())
+ frame_capture.tile_map.insert(std::make_pair(tsnum, std::move(tilestate)));
+
+ replay_command.display_buffer_state = dbnum;
+ replay_command.tile_state = tsnum;
+ }
+
+ // for the most part capturing this is just to make sure the iomemory is recorded/allocated and accounted for in replay
+ void capture_get_report(thread* rsx, frame_capture_data::replay_command& replay_command, u32 arg)
+ {
+ u32 location = 0;
+ u32 offset = arg & 0xffffff;
+
+ blit_engine::context_dma report_dma = method_registers.context_dma_report();
+
+ switch (report_dma)
+ {
+ // ignore regular report location
+ case blit_engine::context_dma::to_memory_get_report: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_LOCAL; return;
+ case blit_engine::context_dma::report_location_main: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN; break;
+ case blit_engine::context_dma::memory_host_buffer: location = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER; break;
+ default: return;
+ }
+
+ u32 addr = get_address(offset, location);
+
+ frame_capture_data::memory_block block;
+ block.addr = addr;
+ block.ioOffset = get_io_offset(offset, location);
+ block.size = 16;
+
+ frame_capture_data::memory_block_data block_data;
+ insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data));
+ }
+
+ // This just checks the color/depth addresses and makes sure they are accounted for in io allocations
+ // Hopefully this works without fully having to calculate actual size
+ void capture_surface_state(thread* rsx, frame_capture_data::replay_command& replay_command)
+ {
+ const auto target = rsx::method_registers.surface_color_target();
+
+ u32 offset_color[] =
+ {
+ rsx::method_registers.surface_a_offset(),
+ rsx::method_registers.surface_b_offset(),
+ rsx::method_registers.surface_c_offset(),
+ rsx::method_registers.surface_d_offset(),
+ };
+
+ u32 context_dma_color[] =
+ {
+ rsx::method_registers.surface_a_dma(),
+ rsx::method_registers.surface_b_dma(),
+ rsx::method_registers.surface_c_dma(),
+ rsx::method_registers.surface_d_dma(),
+ };
+
+ auto check_add = [&replay_command](u32 offset, u32 dma) -> void
+ {
+ u32 ioOffset = get_io_offset(offset, dma);
+ if (ioOffset == -1)
+ return;
+
+ u32 addr = get_address(offset, dma);
+
+ frame_capture_data::memory_block block;
+ block.addr = addr;
+ block.ioOffset = ioOffset;
+ block.size = 64;
+
+ frame_capture_data::memory_block_data block_data;
+ insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data));
+ };
+
+ for (const auto& index : utility::get_rtt_indexes(target))
+ {
+ check_add(offset_color[index], context_dma_color[index]);
+ }
+
+ check_add(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
+ }
+
+ void capture_inline_transfer(thread* rsx, frame_capture_data::replay_command& replay_command, u32 idx, u32 arg)
+ {
+ const u16 x = method_registers.nv308a_x();
+ const u16 y = method_registers.nv308a_y();
+
+ const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2);
+ const u32 addr_offset = method_registers.blit_engine_output_offset_nv3062() + pixel_offset + idx * 4;
+
+ // just need to capture dst for allocation later if in iomem
+
+ u32 ioOffset = get_io_offset(addr_offset, method_registers.blit_engine_output_location_nv3062());
+ if (ioOffset == -1)
+ return;
+
+ u32 addr = get_address(addr_offset, method_registers.blit_engine_output_location_nv3062());
+
+ frame_capture_data::memory_block block;
+ block.addr = addr;
+ block.ioOffset = ioOffset;
+ block.size = 4;
+
+ frame_capture_data::memory_block_data block_data;
+ insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data));
+ }
+ }
+}
diff --git a/rpcs3/Emu/RSX/Capture/rsx_capture.h b/rpcs3/Emu/RSX/Capture/rsx_capture.h
new file mode 100644
index 0000000000..ad2a047eda
--- /dev/null
+++ b/rpcs3/Emu/RSX/Capture/rsx_capture.h
@@ -0,0 +1,17 @@
+#pragma once
+#include "rsx_replay.h"
+
+namespace rsx
+{
+ class thread;
+ namespace capture
+ {
+ void capture_draw_memory(thread* rsx);
+ void capture_image_in(thread* rsx, frame_capture_data::replay_command& replay_command);
+ void capture_buffer_notify(thread* rsx, frame_capture_data::replay_command& replay_command);
+ void capture_display_tile_state(thread* rsx, frame_capture_data::replay_command& replay_command);
+ void capture_surface_state(thread* rsx, frame_capture_data::replay_command& replay_command);
+ void capture_get_report(thread* rsx, frame_capture_data::replay_command& replay_command, u32 arg);
+ void capture_inline_transfer(thread* rsx, frame_capture_data::replay_command& replay_command, u32 idx, u32 arg);
+ }
+}
diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp
new file mode 100644
index 0000000000..fd10bd1959
--- /dev/null
+++ b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp
@@ -0,0 +1,340 @@
+#include "stdafx.h"
+#include "rsx_replay.h"
+
+#include "Emu/System.h"
+#include "Emu/Cell/lv2/sys_rsx.h"
+#include "Emu/Memory/Memory.h"
+#include "Emu/RSX/GSRender.h"
+
+#include