diff --git a/.gitmodules b/.gitmodules index 0ac7381b73..84eabb87dd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -42,3 +42,6 @@ [submodule "3rdparty/pugixml"] path = 3rdparty/pugixml url = https://github.com/zeux/pugixml +[submodule "3rdparty/xxHash"] + path = 3rdparty/xxHash + url = https://github.com/Cyan4973/xxHash diff --git a/.travis.yml b/.travis.yml index 6796d453e3..952b33741f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -47,7 +47,7 @@ install: fi; before_script: - - git submodule update --init asmjit 3rdparty/ffmpeg 3rdparty/pugixml 3rdparty/GSL 3rdparty/libpng Utilities/yaml-cpp 3rdparty/cereal 3rdparty/hidapi 3rdparty/Optional Vulkan/glslang Vulkan/Vulkan-LoaderAndValidationLayers + - git submodule update --init asmjit 3rdparty/ffmpeg 3rdparty/pugixml 3rdparty/GSL 3rdparty/libpng Utilities/yaml-cpp 3rdparty/cereal 3rdparty/hidapi 3rdparty/Optional 3rdparty/xxHash Vulkan/glslang Vulkan/Vulkan-LoaderAndValidationLayers - mkdir build ; cd build - export CMAKE_PREFIX_PATH=~/Qt/${QTVER}/gcc_64/lib/cmake - if [ "$TRAVIS_PULL_REQUEST" = false ]; then diff --git a/3rdparty/xxHash b/3rdparty/xxHash new file mode 160000 index 0000000000..3064d42e7d --- /dev/null +++ b/3rdparty/xxHash @@ -0,0 +1 @@ +Subproject commit 3064d42e7d74b0921bdd1818395d9cb37bb8976a diff --git a/3rdparty/xxhash.vcxproj b/3rdparty/xxhash.vcxproj new file mode 100644 index 0000000000..e0a8ca3064 --- /dev/null +++ b/3rdparty/xxhash.vcxproj @@ -0,0 +1,59 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {939FE206-1182-ABC3-1234-FEAB88E98404} + 8.1 + + + + StaticLibrary + v140 + Unicode + + + true + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ff45f2ea9..aafee50393 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${PROJECT_BINARY_DIR}/bin") add_subdirectory( Vulkan ) add_subdirectory( rpcs3 ) +add_subdirectory(3rdparty/xxHash/cmake_unofficial) include_directories(3rdparty/hidapi/hidapi) if(APPLE) diff --git a/appveyor.yml b/appveyor.yml index ec50e036e2..04272acae9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -46,6 +46,7 @@ install: 3rdparty/libpng ` 3rdparty/Optional ` 3rdparty/pugixml ` + 3rdparty/xxHash ` 3rdparty/zlib ` asmjit ` Utilities/yaml-cpp ` diff --git a/rpcs3.sln b/rpcs3.sln index 1c17a89b66..e7b81d3bf2 100644 --- a/rpcs3.sln +++ b/rpcs3.sln @@ -74,6 +74,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hidapi", "hidapi", "{FA1E6C EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hidapi", "3rdparty\hidapi\windows\hidapi.vcxproj", "{A107C21C-418A-4697-BB10-20C3AA60E2E4}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "xxhash", "xxhash", "{D16E245C-CC5A-4B9A-8BAB-1176F02C1631}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xxhash", "3rdparty\xxhash.vcxproj", "{939FE206-1182-ABC3-1234-FEAB88E98404}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug - LLVM|x64 = Debug - LLVM|x64 @@ -223,6 +227,16 @@ Global {A107C21C-418A-4697-BB10-20C3AA60E2E4}.Release - LLVM|x64.Build.0 = Release|x64 {A107C21C-418A-4697-BB10-20C3AA60E2E4}.Release|x64.ActiveCfg = Release|x64 {A107C21C-418A-4697-BB10-20C3AA60E2E4}.Release|x64.Build.0 = Release|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - LLVM|x64.ActiveCfg = Debug|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - LLVM|x64.Build.0 = Debug|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug - MemLeak|x64.Build.0 = Debug|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug|x64.ActiveCfg = Debug|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Debug|x64.Build.0 = Debug|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Release - LLVM|x64.ActiveCfg = Release|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Release - LLVM|x64.Build.0 = Release|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Release|x64.ActiveCfg = Release|x64 + {939FE206-1182-ABC3-1234-FEAB88E98404}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -242,5 +256,6 @@ Global {3EE5F075-B546-42C4-B6A8-E3CCEF38B78D} = {10FBF193-D532-4CCF-B875-4C7091A7F6C2} {FDC361C5-7734-493B-8CFB-037308B35122} = {DDF904CA-2771-441A-8629-5DF2EB922A79} {A107C21C-418A-4697-BB10-20C3AA60E2E4} = {FA1E6C16-CA63-45F8-8D52-E21DF396BE36} + {939FE206-1182-ABC3-1234-FEAB88E98404} = {D16E245C-CC5A-4B9A-8BAB-1176F02C1631} EndGlobalSection EndGlobal diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 7bc7aa1f9b..0e4c7630b7 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -300,6 +300,7 @@ ${LLVM_INCLUDE_DIRS} "${RPCS3_SRC_DIR}/../3rdparty/cereal/include" "${RPCS3_SRC_DIR}/../3rdparty/Optional" "${RPCS3_SRC_DIR}/../3rdparty/discord-rpc/include" +"${RPCS3_SRC_DIR}/../3rdparty/xxHash" ) if(WIN32) @@ -408,6 +409,8 @@ if(UNIX) target_link_libraries(rpcs3 ${X11_LIBRARIES}) endif() +target_link_libraries(rpcs3 xxhash) + if(WIN32) target_link_libraries(rpcs3 ws2_32.lib Winmm.lib Psapi.lib gdi32.lib VKstatic.1 glslang OSDependent OGLCompiler SPIRV HLSL setupapi.lib hidapi-hid Shlwapi.lib) if(NOT MSVC) diff --git a/rpcs3/Emu/RSX/Capture/rsx_capture.cpp b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp new file mode 100644 index 0000000000..f782c02bbe --- /dev/null +++ b/rpcs3/Emu/RSX/Capture/rsx_capture.cpp @@ -0,0 +1,627 @@ +#include "stdafx.h" +#include "rsx_capture.h" +#include "Emu/RSX/Common/BufferUtils.h" +#include "Emu/RSX/Common/TextureUtils.h" +#include "Emu/RSX/Common/surface_store.h" +#include "Emu/RSX/GCM.h" +#include "Emu/RSX/RSXThread.h" +#include "Emu/Memory/Memory.h" + +#include "xxhash.h" + +namespace rsx +{ + namespace capture + { + u32 get_io_offset(u32 offset, u32 location) + { + switch (location) + { + case CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER: + case CELL_GCM_LOCATION_MAIN: + { + if (u32 result = RSXIOMem.RealAddr(offset)) + { + return offset; + } + } + case CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN: + { + if (u32 result = RSXIOMem.RealAddr(0x0e000000 + offset)) + { + return 0x0e000000 + offset; + } + } + default: return 0xFFFFFFFFu; + } + } + + void insert_mem_block_in_map(std::unordered_set& mem_changes, frame_capture_data::memory_block&& block, frame_capture_data::memory_block_data&& data) + { + u64 data_hash = 0; + if (data.data.size() > 0) + { + data_hash = XXH64(data.data.data(), data.data.size(), 0); + // using 0 to signify no block in use, so this one is 'reserved' + if (data_hash == 0) + fmt::throw_exception("Memory block data hash equal to 0"); + + block.size = data.data.size(); + block.data_state = data_hash; + + auto it = frame_capture.memory_data_map.find(data_hash); + if (it != frame_capture.memory_data_map.end()) + { + if (it->second.data != data.data) + // screw this + fmt::throw_exception("Memory map hash collision detected...cant capture"); + } + else + frame_capture.memory_data_map.insert(std::make_pair(data_hash, std::move(data))); + } + + u64 block_hash = XXH64(&block, sizeof(frame_capture_data::memory_block), 0); + mem_changes.insert(block_hash); + if (frame_capture.memory_map.find(block_hash) == frame_capture.memory_map.end()) + frame_capture.memory_map.insert(std::make_pair(block_hash, std::move(block))); + } + + void capture_draw_memory(thread* rsx) + { + // the idea here is to copy any memory that is needed to make the calls work + // todo: + // - tile / zcull state changing during other commands + // - track memory that is rendered into and ignore saving it later, this one will be tough + + if (frame_capture.replay_commands.empty()) + fmt::throw_exception("no replay commands to attach memory state to"); + + // shove the mem_changes onto the last issued command + std::unordered_set& mem_changes = frame_capture.replay_commands.back().memory_state; + + // capture fragment shader mem + const u32 shader_program = method_registers.shader_program_address(); + if (shader_program != 0) + { + const u32 program_location = (shader_program & 0x3) - 1; + const u32 program_offset = (shader_program & ~0x3); + + const u32 addr = get_address(program_offset, program_location); + const u32 program_start = program_hash_util::fragment_program_utils::get_fragment_program_start(vm::base(addr)); + const u32 ucode_size = program_hash_util::fragment_program_utils::get_fragment_program_ucode_size(vm::base(addr + program_start)); + + frame_capture_data::memory_block block; + block.addr = addr; + block.ioOffset = get_io_offset(program_offset, program_location); + frame_capture_data::memory_block_data block_data; + block_data.data.resize(ucode_size + program_start); + std::memcpy(block_data.data.data(), vm::base(addr), ucode_size + program_start); + insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data)); + } + + // vertex shader is passed in registers, so it can be ignored + + // save fragment tex mem + for (const auto& tex : method_registers.fragment_textures) + { + if (!tex.enabled()) + continue; + + const u32 texaddr = get_address(tex.offset(), tex.location()); + auto layout = get_subresources_layout(tex); + + // todo: dont use this function and just get size somehow + size_t texSize = 0; + for (const auto& l : layout) + texSize += l.data.size(); + + if (!texaddr || !texSize) + continue; + + frame_capture_data::memory_block block; + block.addr = texaddr; + block.ioOffset = get_io_offset(tex.offset(), tex.location()); + + frame_capture_data::memory_block_data block_data; + block_data.data.resize(texSize); + std::memcpy(block_data.data.data(), vm::base(texaddr), texSize); + insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data)); + } + + // save vertex texture mem + for (const auto& tex : method_registers.vertex_textures) + { + if (!tex.enabled()) + continue; + + const u32 texaddr = get_address(tex.offset(), tex.location()); + auto layout = get_subresources_layout(tex); + + // todo: dont use this function and just get size somehow + size_t texSize = 0; + for (const auto& l : layout) + texSize += l.data.size(); + + if (!texaddr || !texSize) + continue; + + frame_capture_data::memory_block block; + block.addr = texaddr; + block.ioOffset = get_io_offset(tex.offset(), tex.location()); + frame_capture_data::memory_block_data block_data; + block_data.data.resize(texSize); + std::memcpy(block_data.data.data(), vm::base(texaddr), texSize); + insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data)); + } + + // save vertex buffer memory + if (method_registers.current_draw_clause.command == draw_command::array) + { + const u32 input_mask = method_registers.vertex_attrib_input_mask(); + for (u8 index = 0; index < limits::vertex_count; ++index) + { + const bool enabled = !!(input_mask & (1 << index)); + if (!enabled) + continue; + + const auto& info = method_registers.vertex_arrays_info[index]; + if (!info.size()) + continue; + + // vert buffer + const u32 base_address = get_vertex_offset_from_base(method_registers.vertex_data_base_offset(), info.offset() & 0x7fffffff); + const u32 memory_location = info.offset() >> 31; + + const u32 addr = get_address(base_address, memory_location); + const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size()); + const u32 vertStride = info.stride(); + + for (const auto& count : method_registers.current_draw_clause.first_count_commands) + { + const u32 vertCount = count.second; + const size_t bufferSize = vertCount * vertStride + vertSize; + + frame_capture_data::memory_block block; + block.addr = addr; + block.ioOffset = get_io_offset(base_address, memory_location); + block.offset = (count.first * vertStride); + + frame_capture_data::memory_block_data block_data; + block_data.data.resize(bufferSize); + std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize); + insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data)); + } + } + } + // save index buffer if used + else if (method_registers.current_draw_clause.command == draw_command::indexed) + { + const u32 input_mask = method_registers.vertex_attrib_input_mask(); + + const u32 base_address = method_registers.index_array_address(); + const u32 memory_location = method_registers.index_array_location(); + + const u32 base_addr = get_address(base_address, memory_location); + const u32 type_size = get_index_type_size(method_registers.index_type()); + const auto index_type = method_registers.index_type(); + + // manually parse index buffer and copy vertex buffer + u32 min_index = 0xFFFF, max_index = 0; + if (index_type == index_array_type::u32) + min_index = 0xFFFFFFFF; + + const bool is_primitive_restart_enabled = method_registers.restart_index_enabled(); + const u32 primitive_restart_index = method_registers.restart_index(); + + for (const auto& count : method_registers.current_draw_clause.first_count_commands) + { + const u32 idxFirst = count.first; + const u32 idxCount = count.second; + const u32 idxAddr = base_addr + (idxFirst * type_size); + + const size_t bufferSize = idxCount * type_size; + + frame_capture_data::memory_block block; + block.addr = base_addr; + block.ioOffset = get_io_offset(base_address, memory_location); + block.offset = (idxFirst * type_size); + + frame_capture_data::memory_block_data block_data; + block_data.data.resize(bufferSize); + std::memcpy(block_data.data.data(), vm::base(idxAddr), bufferSize); + insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data)); + + switch (index_type) + { + case index_array_type::u16: + { + auto fifo = vm::ptr::make(idxAddr); + for (u32 i = 0; i < idxCount; ++i) + { + u16 index = fifo[i]; + if (is_primitive_restart_enabled && index == (u16)primitive_restart_index) + continue; + index = (u16)get_index_from_base(index, method_registers.vertex_data_base_index()); + min_index = (u16)std::min(index, (u16)min_index); + max_index = (u16)std::max(index, (u16)max_index); + } + break; + } + case index_array_type::u32: + { + auto fifo = vm::ptr::make(idxAddr); + for (u32 i = 0; i < idxCount; ++i) + { + u32 index = fifo[i]; + if (is_primitive_restart_enabled && index == primitive_restart_index) + continue; + index = get_index_from_base(index, method_registers.vertex_data_base_index()); + min_index = std::min(index, min_index); + max_index = std::max(index, max_index); + } + break; + } + } + } + + if (min_index > max_index) + { + // ignore? + } + + for (u8 index = 0; index < limits::vertex_count; ++index) + { + const bool enabled = !!(input_mask & (1 << index)); + if (!enabled) + continue; + + const auto& info = method_registers.vertex_arrays_info[index]; + if (!info.size()) + continue; + + // vert buffer + const u32 vertStride = info.stride(); + const u32 base_address = get_vertex_offset_from_base(method_registers.vertex_data_base_offset(), info.offset() & 0x7fffffff); + const u32 memory_location = info.offset() >> 31; + + const u32 addr = get_address(base_address, memory_location); + const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size()); + + const u32 bufferSize = vertStride * (max_index - min_index + 1) + vertSize; + + frame_capture_data::memory_block block; + block.addr = addr; + block.ioOffset = get_io_offset(base_address, memory_location); + block.offset = (min_index * vertStride); + + frame_capture_data::memory_block_data block_data; + block_data.data.resize(bufferSize); + std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize); + insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data)); + } + } + + capture_display_tile_state(rsx, frame_capture.replay_commands.back()); + capture_surface_state(rsx, frame_capture.replay_commands.back()); + } + + // i realize these are a slight copy pasta of the rsx_method implementations but its kinda unavoidable currently + void capture_image_in(thread* rsx, frame_capture_data::replay_command& replay_command) + { + const rsx::blit_engine::transfer_operation operation = method_registers.blit_engine_operation(); + + const u16 out_x = method_registers.blit_engine_output_x(); + const u16 out_y = method_registers.blit_engine_output_y(); + const u16 out_w = method_registers.blit_engine_output_width(); + const u16 out_h = method_registers.blit_engine_output_height(); + + const u16 in_w = method_registers.blit_engine_input_width(); + const u16 in_h = method_registers.blit_engine_input_height(); + + const blit_engine::transfer_origin in_origin = method_registers.blit_engine_input_origin(); + const blit_engine::transfer_interpolator in_inter = method_registers.blit_engine_input_inter(); + const rsx::blit_engine::transfer_source_format src_color_format = method_registers.blit_engine_src_color_format(); + + const f32 in_x = std::ceil(method_registers.blit_engine_in_x()); + const f32 in_y = std::ceil(method_registers.blit_engine_in_y()); + + u16 in_pitch = method_registers.blit_engine_input_pitch(); + + if (in_w == 0 || in_h == 0 || out_w == 0 || out_h == 0) + { + return; + } + + const u32 src_offset = method_registers.blit_engine_input_offset(); + const u32 src_dma = method_registers.blit_engine_input_location(); + + const u32 in_bpp = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? 2 : 4; // bytes per pixel + const u32 in_offset = u32(in_x * in_bpp + in_pitch * in_y); + const tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf); + + frame_capture_data::memory_block block; + block.addr = src_region.address; + block.ioOffset = get_io_offset(src_region.tile ? src_region.base : src_offset + in_offset, src_dma & 0xf); + + u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; + + if (in_pitch == 0) + { + in_pitch = in_bpp * in_w; + } + + rsx->read_barrier(src_region.address, in_pitch * in_h); + + frame_capture_data::memory_block_data block_data; + block_data.data.resize(in_pitch * in_h); + std::memcpy(block_data.data.data(), pixels_src, in_pitch * in_h); + insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data)); + + // 'capture' destination to ensure memory is alloc'd and usable in replay + u32 dst_offset = 0; + u32 dst_dma = 0; + rsx::blit_engine::transfer_destination_format dst_color_format; + u32 out_pitch = 0; + u32 out_aligment = 64; + + switch (method_registers.blit_engine_context_surface()) + { + case blit_engine::context_surface::surface2d: + dst_dma = method_registers.blit_engine_output_location_nv3062(); + dst_offset = method_registers.blit_engine_output_offset_nv3062(); + dst_color_format = method_registers.blit_engine_nv3062_color_format(); + out_pitch = method_registers.blit_engine_output_pitch_nv3062(); + out_aligment = method_registers.blit_engine_output_alignment_nv3062(); + break; + + case blit_engine::context_surface::swizzle2d: + dst_dma = method_registers.blit_engine_nv309E_location(); + dst_offset = method_registers.blit_engine_nv309E_offset(); + dst_color_format = method_registers.blit_engine_output_format_nv309E(); + break; + + default: break; + } + + const u32 out_bpp = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? 2 : 4; + const s32 out_offset = out_x * out_bpp + out_pitch * out_y; + const tiled_region dst_region = rsx->get_tiled_address(dst_offset + out_offset, dst_dma & 0xf); + + frame_capture_data::memory_block blockDst; + blockDst.addr = dst_region.address; + blockDst.ioOffset = get_io_offset(dst_region.tile ? dst_region.base : dst_offset + out_offset, dst_dma & 0xf); + if (blockDst.ioOffset != -1) + { + u32 blockSize = method_registers.blit_engine_context_surface() != blit_engine::context_surface::swizzle2d ? out_pitch * out_h : out_bpp * next_pow2(out_w) * next_pow2(out_h); + + blockDst.size = blockSize; + frame_capture_data::memory_block_data block_data; + insert_mem_block_in_map(replay_command.memory_state, std::move(blockDst), std::move(block_data)); + } + + capture_display_tile_state(rsx, replay_command); + } + + void capture_buffer_notify(thread* rsx, frame_capture_data::replay_command& replay_command) + { + s32 in_pitch = method_registers.nv0039_input_pitch(); + const u32 line_length = method_registers.nv0039_line_length(); + const u32 line_count = method_registers.nv0039_line_count(); + const u8 in_format = method_registers.nv0039_input_format(); + + if (!in_pitch) + { + in_pitch = line_length; + } + + u32 src_offset = method_registers.nv0039_input_offset(); + u32 src_dma = method_registers.nv0039_input_location(); + u32 src_addr = get_address(src_offset, src_dma); + + rsx->read_barrier(src_addr, in_pitch * line_count); + + const u8* src = (u8*)vm::base(src_addr); + + frame_capture_data::memory_block block; + block.addr = src_addr; + block.ioOffset = get_io_offset(src_offset, src_dma); + + frame_capture_data::memory_block_data block_data; + block_data.data.resize(in_pitch * line_count); + + for (u32 i = 0; i < line_count; ++i) + { + std::memcpy(block_data.data.data() + (line_length * i), src, line_length); + src += in_pitch; + } + + insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data)); + + // we 'capture' destination mostly to ensure that the location is allocated when replaying + u32 dst_offset = method_registers.nv0039_output_offset(); + u32 dst_dma = method_registers.nv0039_output_location(); + u32 dst_addr = get_address(dst_offset, dst_dma); + + s32 out_pitch = method_registers.nv0039_output_pitch(); + if (!out_pitch) + { + out_pitch = line_length; + } + + frame_capture_data::memory_block blockDst; + blockDst.addr = dst_addr; + blockDst.ioOffset = get_io_offset(dst_offset, dst_dma); + + // only check for iooffset'd data + if (blockDst.ioOffset != -1) + { + frame_capture_data::memory_block_data block_data; + blockDst.size = out_pitch * line_count; + insert_mem_block_in_map(replay_command.memory_state, std::move(blockDst), std::move(block_data)); + } + + capture_display_tile_state(rsx, replay_command); + } + + void capture_display_tile_state(thread* rsx, frame_capture_data::replay_command& replay_command) + { + frame_capture_data::display_buffers_state dbstate; + dbstate.count = rsx->display_buffers_count; + // should this only happen on flip? + for (u32 i = 0; i < rsx->display_buffers_count; ++i) + { + const auto& db = rsx->display_buffers[i]; + dbstate.buffers[i].height = db.height; + dbstate.buffers[i].width = db.width; + dbstate.buffers[i].offset = db.offset; + dbstate.buffers[i].pitch = db.pitch; + } + + const u64 dbnum = XXH64(&dbstate, sizeof(frame_capture_data::display_buffers_state), 0); + if (frame_capture.display_buffers_map.find(dbnum) == frame_capture.display_buffers_map.end()) + frame_capture.display_buffers_map.insert(std::make_pair(dbnum, std::move(dbstate))); + + // todo: hook tile call sys_rsx call or something + frame_capture_data::tile_state tilestate; + for (u32 i = 0; i < limits::tiles_count; ++i) + { + const auto& tile = rsx->tiles[i]; + auto& tstate = tilestate.tiles[i]; + tstate.bank = tile.bank; + tstate.base = tile.base; + tstate.binded = tile.binded; + tstate.comp = tile.comp; + tstate.location = tile.location; + tstate.offset = tile.offset; + tstate.pitch = tile.pitch; + tstate.size = tile.size; + } + + for (u32 i = 0; i < limits::zculls_count; ++i) + { + const auto& zc = rsx->zculls[i]; + auto& zcstate = tilestate.zculls[i]; + zcstate.aaFormat = zc.aaFormat; + zcstate.binded = zc.binded; + zcstate.cullStart = zc.cullStart; + zcstate.height = zc.height; + zcstate.offset = zc.offset; + zcstate.sFunc = zc.sFunc; + zcstate.sMask = zc.sMask; + zcstate.sRef = zc.sRef; + zcstate.width = zc.width; + zcstate.zcullDir = zc.zcullDir; + zcstate.zcullFormat = zc.zcullFormat; + zcstate.zFormat = zc.zFormat; + } + + const u64 tsnum = XXH64(&tilestate, sizeof(frame_capture_data::tile_state), 0); + + if (frame_capture.tile_map.find(tsnum) == frame_capture.tile_map.end()) + frame_capture.tile_map.insert(std::make_pair(tsnum, std::move(tilestate))); + + replay_command.display_buffer_state = dbnum; + replay_command.tile_state = tsnum; + } + + // for the most part capturing this is just to make sure the iomemory is recorded/allocated and accounted for in replay + void capture_get_report(thread* rsx, frame_capture_data::replay_command& replay_command, u32 arg) + { + u32 location = 0; + u32 offset = arg & 0xffffff; + + blit_engine::context_dma report_dma = method_registers.context_dma_report(); + + switch (report_dma) + { + // ignore regular report location + case blit_engine::context_dma::to_memory_get_report: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_LOCAL; return; + case blit_engine::context_dma::report_location_main: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN; break; + case blit_engine::context_dma::memory_host_buffer: location = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER; break; + default: return; + } + + u32 addr = get_address(offset, location); + + frame_capture_data::memory_block block; + block.addr = addr; + block.ioOffset = get_io_offset(offset, location); + block.size = 16; + + frame_capture_data::memory_block_data block_data; + insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data)); + } + + // This just checks the color/depth addresses and makes sure they are accounted for in io allocations + // Hopefully this works without fully having to calculate actual size + void capture_surface_state(thread* rsx, frame_capture_data::replay_command& replay_command) + { + const auto target = rsx::method_registers.surface_color_target(); + + u32 offset_color[] = + { + rsx::method_registers.surface_a_offset(), + rsx::method_registers.surface_b_offset(), + rsx::method_registers.surface_c_offset(), + rsx::method_registers.surface_d_offset(), + }; + + u32 context_dma_color[] = + { + rsx::method_registers.surface_a_dma(), + rsx::method_registers.surface_b_dma(), + rsx::method_registers.surface_c_dma(), + rsx::method_registers.surface_d_dma(), + }; + + auto check_add = [&replay_command](u32 offset, u32 dma) -> void + { + u32 ioOffset = get_io_offset(offset, dma); + if (ioOffset == -1) + return; + + u32 addr = get_address(offset, dma); + + frame_capture_data::memory_block block; + block.addr = addr; + block.ioOffset = ioOffset; + block.size = 64; + + frame_capture_data::memory_block_data block_data; + insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data)); + }; + + for (const auto& index : utility::get_rtt_indexes(target)) + { + check_add(offset_color[index], context_dma_color[index]); + } + + check_add(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); + } + + void capture_inline_transfer(thread* rsx, frame_capture_data::replay_command& replay_command, u32 idx, u32 arg) + { + const u16 x = method_registers.nv308a_x(); + const u16 y = method_registers.nv308a_y(); + + const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2); + const u32 addr_offset = method_registers.blit_engine_output_offset_nv3062() + pixel_offset + idx * 4; + + // just need to capture dst for allocation later if in iomem + + u32 ioOffset = get_io_offset(addr_offset, method_registers.blit_engine_output_location_nv3062()); + if (ioOffset == -1) + return; + + u32 addr = get_address(addr_offset, method_registers.blit_engine_output_location_nv3062()); + + frame_capture_data::memory_block block; + block.addr = addr; + block.ioOffset = ioOffset; + block.size = 4; + + frame_capture_data::memory_block_data block_data; + insert_mem_block_in_map(replay_command.memory_state, std::move(block), std::move(block_data)); + } + } +} diff --git a/rpcs3/Emu/RSX/Capture/rsx_capture.h b/rpcs3/Emu/RSX/Capture/rsx_capture.h new file mode 100644 index 0000000000..ad2a047eda --- /dev/null +++ b/rpcs3/Emu/RSX/Capture/rsx_capture.h @@ -0,0 +1,17 @@ +#pragma once +#include "rsx_replay.h" + +namespace rsx +{ + class thread; + namespace capture + { + void capture_draw_memory(thread* rsx); + void capture_image_in(thread* rsx, frame_capture_data::replay_command& replay_command); + void capture_buffer_notify(thread* rsx, frame_capture_data::replay_command& replay_command); + void capture_display_tile_state(thread* rsx, frame_capture_data::replay_command& replay_command); + void capture_surface_state(thread* rsx, frame_capture_data::replay_command& replay_command); + void capture_get_report(thread* rsx, frame_capture_data::replay_command& replay_command, u32 arg); + void capture_inline_transfer(thread* rsx, frame_capture_data::replay_command& replay_command, u32 idx, u32 arg); + } +} diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp new file mode 100644 index 0000000000..fd10bd1959 --- /dev/null +++ b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp @@ -0,0 +1,340 @@ +#include "stdafx.h" +#include "rsx_replay.h" + +#include "Emu/System.h" +#include "Emu/Cell/lv2/sys_rsx.h" +#include "Emu/Memory/Memory.h" +#include "Emu/RSX/GSRender.h" + +#include + +namespace rsx +{ + be_t rsx_replay_thread::allocate_context() + { + const u32 contextAddr = vm::alloc(sizeof(rsx_context), vm::main); + if (contextAddr == 0) + fmt::throw_exception("Capture Replay: context alloc failed"); + const auto& contextInfo = vm::_ref(contextAddr); + + if (sys_rsx_memory_allocate(vm::get_addr(&contextInfo.mem_handle), vm::get_addr(&contextInfo.mem_addr), 0x0F900000, 0, 0, 0, 0) != CELL_OK) + fmt::throw_exception("Capture Replay: sys_rsx_memory_allocate failed!"); + + if (sys_rsx_context_allocate(vm::get_addr(&contextInfo.context_id), vm::get_addr(&contextInfo.dma_addr), vm::get_addr(&contextInfo.driver_info), vm::get_addr(&contextInfo.reports_addr), contextInfo.mem_handle, 0) != CELL_OK) + fmt::throw_exception("Capture Replay: sys_rsx_context_allocate failed!"); + + // 'fake' initialize usermemory + // todo: seriously, need to probly watch the replay memory map and just make sure its mapped before we copy rather than do this + vm::falloc(0x20000000, 0x10000000, vm::user_space); + + return contextInfo.context_id; + } + + std::tuple rsx_replay_thread::get_usable_fifo_range() + { + u32 fifo_size = 4; + + // run through replay commands to figure out how big command buffer needs to be + // technically we could do this in batches if it gets too big, but we should be fine + // as we aren't allocating anything on main memory, although it may make issues with iooffset later + for (const auto& rc : frame->replay_commands) + { + const u32 count = (rc.rsx_command.first >> 18) & 0x7ff; + // allocate for register plus w/e number of arguments it has + fifo_size += (count + 1) * 4; + } + + // safety check for now + // since we are allocating iobuffer, we need to make sure that any memory we use is not being used by the replay + std::map ioOffsets; + u32 lowest_iooffset = 0xFFFFFFFF; + for (const auto& mm : frame->memory_map) + { + u32 offset = mm.second.ioOffset; + lowest_iooffset = std::min(lowest_iooffset, offset); + if (offset != 0xFFFFFFFF) + { + u32 iosize = mm.second.size + mm.second.offset; + auto it = ioOffsets.find(offset); + if (it == ioOffsets.end()) + ioOffsets[offset] = iosize; + else + ioOffsets[offset] = std::max(ioOffsets[offset], iosize); + } + } + + // if we cant use fifo starting at 0, try to find a block between them + u32 fifo_start_addr = 0; + if (fifo_size >= lowest_iooffset) + { + u32 largest_free_block = 0; + u32 largest_cur_end = 0; // this keeps track of largest end, in case allocations 'overlap' + for (auto io = ioOffsets.begin(); io != ioOffsets.end(); ++io) + { + auto next = std::next(io); + + // last 'offset' is just end of memory + u32 nextOffset = 0x0F900000; + if (next != ioOffsets.end()) + nextOffset = next->first - 4; + + largest_cur_end = std::max(largest_cur_end, io->first + io->second + 4); + if (largest_cur_end < nextOffset) + { + u32 freeSize = nextOffset - largest_cur_end; + if (freeSize > largest_free_block) + { + fifo_start_addr = largest_cur_end; + largest_free_block = freeSize; + } + + if (largest_free_block > fifo_size) + break; + } + + if (next == ioOffsets.end()) + break; + } + + // todo: figure out *another* way to inject fifo if both ideas above fail + if (largest_free_block < fifo_size) + fmt::throw_exception("Capture Replay: no space in io for fifo commands! size: 0x%x, lowest in capture: 0x%x, largest_free_block: 0x%x", fifo_size, lowest_iooffset, largest_free_block); + } + + return std::make_tuple(fifo_start_addr, fifo_size); + } + + std::vector rsx_replay_thread::alloc_write_fifo(be_t context_id, u32 fifo_start_addr, u32 fifo_size) + { + const u32 fifo_mem = vm::alloc(fifo_size, vm::main); + if (fifo_mem == 0) + fmt::throw_exception("Capture Replay: fifo alloc failed! size: 0x%x", fifo_size); + + // copy commands into fifo buffer + // todo: could change rsx_command to just be values to avoid this loop, + auto fifo_addr = vm::ptr::make(fifo_mem); + u32 count = 0; + std::vector fifo_stops; + u32 currentOffset = fifo_start_addr; + for (const auto& rc : frame->replay_commands) + { + bool hasState = (rc.memory_state.size() > 0) || (rc.display_buffer_state != 0) || (rc.tile_state != 0); + if (hasState) + { + if (count != 0) + { + // todo: support memory state in the middle of incrememented command + // This shouldn't ever happen as long as captures stay in 'strict' aka non-multidraw mode + fmt::throw_exception("capture replay: state change not supported between increment commands"); + } + + fifo_stops.emplace_back(currentOffset); + } + + // spit out command + if (count == 0) + { + count = (rc.rsx_command.first >> 18) & 0x7ff; + *fifo_addr = rc.rsx_command.first; + fifo_addr++; + currentOffset += 4; + } + + if (count != 0) + { + *fifo_addr = rc.rsx_command.second; + fifo_addr++; + count--; + currentOffset += 4; + } + } + + fifo_stops.emplace_back(currentOffset); + + if (sys_rsx_context_iomap(context_id, fifo_start_addr, fifo_mem, fifo_size, 0) != CELL_OK) + fmt::throw_exception("Capture Replay: fifo mapping failed"); + + return fifo_stops; + } + + void rsx_replay_thread::apply_frame_state(be_t context_id, const frame_capture_data::replay_command& replay_cmd) + { + // apply memory needed for command + for (const auto& state : replay_cmd.memory_state) + { + auto it = frame->memory_map.find(state); + if (it == frame->memory_map.end()) + fmt::throw_exception("requested memory state for command not found in memory_map"); + + if (it->second.data_state != 0) + { + const auto& memblock = it->second; + auto it_data = frame->memory_data_map.find(it->second.data_state); + if (it_data == frame->memory_data_map.end()) + fmt::throw_exception("requested memory data state for command not found in memory_data_map"); + + const auto& data_block = it_data->second; + std::memcpy(vm::base(memblock.addr + memblock.offset), data_block.data.data(), data_block.data.size()); + } + } + + if (replay_cmd.display_buffer_state != 0 && replay_cmd.display_buffer_state != cs.display_buffer_hash) + { + auto it = frame->display_buffers_map.find(replay_cmd.display_buffer_state); + if (it == frame->display_buffers_map.end()) + fmt::throw_exception("requested display buffer for command not found"); + + const auto& dbstate = it->second; + for (u32 i = 0; i < dbstate.count; ++i) + { + const auto& buf = dbstate.buffers[i]; + if (cs.display_buffer_hash != 0 && memcmp(&cs.buffer_state.buffers[i], &buf, sizeof(rsx::frame_capture_data::buffer_state)) == 0) + continue; + + cs.buffer_state.buffers[i] = buf; + sys_rsx_context_attribute(context_id, 0x104, i, + (u64)dbstate.buffers[i].width << 32 | dbstate.buffers[i].height, (u64)dbstate.buffers[i].pitch << 32 | dbstate.buffers[i].offset, 0); + } + cs.display_buffer_hash = replay_cmd.display_buffer_state; + } + + if (replay_cmd.tile_state != 0 && replay_cmd.tile_state != cs.tile_hash) + { + auto it = frame->tile_map.find(replay_cmd.tile_state); + if (it == frame->tile_map.end()) + fmt::throw_exception("requested tile state command not found"); + + const auto& tstate = it->second; + for (u32 i = 0; i < limits::tiles_count; ++i) + { + const auto& tstile = tstate.tiles[i]; + if (cs.tile_hash != 0 && memcmp(&cs.tile_state.tiles[i], &tstile, sizeof(rsx::frame_capture_data::tile_info)) == 0) + continue; + + cs.tile_state.tiles[i] = tstile; + + GcmTileInfo t; + t.bank = tstile.bank; + t.base = tstile.base; + t.binded = tstile.binded; + t.comp = tstile.comp; + t.location = tstile.location; + t.offset = tstile.offset; + t.pitch = tstile.pitch; + t.size = tstile.size; + + const auto& ti = t.pack(); + sys_rsx_context_attribute(context_id, 0x300, i, (u64)ti.tile << 32 | ti.limit, (u64)ti.pitch << 32 | ti.format, 0); + } + + for (u32 i = 0; i < limits::zculls_count; ++i) + { + const auto& zctile = tstate.zculls[i]; + if (cs.tile_hash != 0 && memcmp(&cs.tile_state.zculls[i], &zctile, sizeof(rsx::frame_capture_data::zcull_info)) == 0) + continue; + + cs.tile_state.zculls[i] = zctile; + + GcmZcullInfo zc; + zc.aaFormat = zctile.aaFormat; + zc.binded = zctile.binded; + zc.cullStart = zctile.cullStart; + zc.height = zctile.height; + zc.offset = zctile.offset; + zc.sFunc = zctile.sFunc; + zc.sMask = zctile.sMask; + zc.sRef = zctile.sRef; + zc.width = zctile.width; + zc.zcullDir = zctile.zcullDir; + zc.zcullFormat = zctile.zcullFormat; + zc.zFormat = zctile.zFormat; + + const auto& zci = zc.pack(); + sys_rsx_context_attribute(context_id, 0x301, i, (u64)zci.region << 32 | zci.size, (u64)zci.start << 32 | zci.offset, (u64)zci.status0 << 32 | zci.status1); + } + + cs.tile_hash = replay_cmd.tile_state; + } + } + + void rsx_replay_thread::cpu_task() + { + be_t context_id = allocate_context(); + + auto fifo_info = get_usable_fifo_range(); + + const u32 fifo_start_addr = std::get<0>(fifo_info); + const u32 fifo_size = std::get<1>(fifo_info); + + auto fifo_stops = alloc_write_fifo(context_id, fifo_start_addr, fifo_size); + + // map game io + for (const auto it : frame->memory_map) + { + const auto& memblock = it.second; + if (memblock.ioOffset == 0xFFFFFFFF) + continue; + + // sanity check + if (memblock.ioOffset <= fifo_start_addr + fifo_size && fifo_start_addr <= memblock.size + memblock.offset) + fmt::throw_exception("Capture Replay: overlap detected between game io allocs and fifo alloc, algorithms botched."); + + if (sys_rsx_context_iomap(context_id, memblock.ioOffset, memblock.addr, memblock.size + memblock.offset, 0) != CELL_OK) + fmt::throw_exception("rsx io map failed for block"); + } + + while (!Emu.IsStopped()) + { + // start up fifo buffer by dumping the put ptr to first stop + sys_rsx_context_attribute(context_id, 0x001, fifo_start_addr, fifo_stops[0], 0, 0); + + auto renderer = fxm::get(); + size_t stopIdx = 0; + for (const auto& replay_cmd : frame->replay_commands) + { + while (Emu.IsPaused()) + std::this_thread::sleep_for(10ms); + + if (Emu.IsStopped()) + break; + + // Loop and hunt down our next state change that needs to be done + if (!((replay_cmd.memory_state.size() > 0) || (replay_cmd.display_buffer_state != 0) || (replay_cmd.tile_state != 0))) + continue; + + // wait until rsx idle and at our first 'stop' to apply state + while (!Emu.IsStopped() && (renderer->ctrl->get != renderer->ctrl->put) && (renderer->ctrl->get != fifo_stops[stopIdx])) + { + while (Emu.IsPaused()) + std::this_thread::sleep_for(10ms); + std::this_thread::yield(); + } + + stopIdx++; + + apply_frame_state(context_id, replay_cmd); + + // move put ptr to next stop + if (stopIdx >= fifo_stops.size()) + fmt::throw_exception("Capture Replay: StopIdx greater than size of fifo_stops"); + + renderer->ctrl->put = fifo_stops[stopIdx]; + } + + // dump put to end of stops, which should have actual end + u32 end = fifo_stops.back(); + renderer->ctrl->put = end; + + while (renderer->ctrl->get != end && !Emu.IsStopped()) + { + while (Emu.IsPaused()) + std::this_thread::sleep_for(10ms); + } + + // random pause to not destroy gpu + std::this_thread::sleep_for(10ms); + } + + state += cpu_flag::exit; + } +} diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.h b/rpcs3/Emu/RSX/Capture/rsx_replay.h new file mode 100644 index 0000000000..eb0977ebf0 --- /dev/null +++ b/rpcs3/Emu/RSX/Capture/rsx_replay.h @@ -0,0 +1,241 @@ +#pragma once + +#include "Emu/System.h" +#include "Emu/IdManager.h" +#include "Emu/Cell/PPUModule.h" +#include "Emu/Cell/lv2/sys_sync.h" +#include "Emu/Cell/lv2/sys_ppu_thread.h" + +#include +#include +#include +#include +#include + +namespace rsx +{ + constexpr u32 FRAME_CAPTURE_MAGIC = 0x52524300; // ascii 'RRC/0' + constexpr u32 FRAME_CAPTURE_VERSION = 0x1; + struct frame_capture_data + { + + struct memory_block_data + { + std::vector data; + template + void serialize(Archive& ar) + { + ar(data); + } + }; + + // simple block to hold ps3 address and data + struct memory_block + { + u32 addr{0}; + u32 ioOffset{0xFFFFFFFF}; // rsx ioOffset, -1 signifies unused + u32 offset{0}; // offset into addr/ioOffset to copy state into + u32 size{0}; // size of block needed + u64 data_state{0}; // this can be 0, in which case its just needed as an alloc + template + void serialize(Archive & ar) + { + ar(addr); + ar(ioOffset); + ar(offset); + ar(size); + ar(data_state); + } + }; + + struct replay_command + { + std::pair rsx_command; // fifo command + std::unordered_set memory_state; // index into memory_map for the various memory blocks that need applying before this command can run + u64 tile_state{0}; // tile state for this command + u64 display_buffer_state{0}; + + template + void serialize(Archive & ar) + { + ar(rsx_command); + ar(memory_state); + ar(tile_state); + ar(display_buffer_state); + } + }; + + // same thing as gcmtileinfo + struct tile_info + { + u32 location{0}; + u32 offset{0}; + u32 size{0}; + u32 pitch{0}; + u32 comp{0}; + u32 base{0}; + u32 bank{0}; + bool binded{false}; + + template + void serialize(Archive & ar) + { + ar(location); + ar(offset); + ar(size); + ar(pitch); + ar(comp); + ar(base); + ar(bank); + ar(binded); + } + }; + + // same thing as gcmzcullinfo + struct zcull_info + { + u32 offset{0}; + u32 width{0}; + u32 height{0}; + u32 cullStart{0}; + u32 zFormat{0}; + u32 aaFormat{0}; + u32 zcullDir{0}; + u32 zcullFormat{0}; + u32 sFunc{0}; + u32 sRef{0}; + u32 sMask{0}; + bool binded{false}; + + template + void serialize(Archive & ar) + { + ar(offset); + ar(width); + ar(cullStart); + ar(zFormat); + ar(aaFormat); + ar(zcullDir); + ar(zcullFormat); + ar(sFunc); + ar(sRef); + ar(sMask); + ar(binded); + } + }; + + // bleh, may need to break these out, might be unneccessary to do both always + struct tile_state + { + tile_info tiles[15]; + zcull_info zculls[8]; + + template + void serialize(Archive & ar) + { + ar(tiles); + ar(zculls); + } + }; + + struct buffer_state + { + u32 width{0}; + u32 height{0}; + u32 pitch{0}; + u32 offset{0}; + + template + void serialize(Archive & ar) + { + ar(width); + ar(height); + ar(pitch); + ar(offset); + } + }; + + struct display_buffers_state + { + std::array buffers; + u32 count{0}; + + template + void serialize(Archive & ar) + { + ar(buffers); + ar(count); + } + }; + + u32 magic; + u32 version; + // hashmap of holding various states for tile + std::unordered_map tile_map; + // hashmap of various memory 'changes' that can be applied to ps3 memory + std::unordered_map memory_map; + // hashmap of memory blocks that can be applied, this is split from above for size decrease + std::unordered_map memory_data_map; + // display buffer state map + std::unordered_map display_buffers_map; + // actual command queue to hold everything above + std::vector replay_commands; + + template + void serialize(Archive & ar) + { + ar(magic); + ar(version); + ar(tile_map); + ar(memory_map); + ar(memory_data_map); + ar(display_buffers_map); + ar(replay_commands); + } + + void reset() + { + magic = FRAME_CAPTURE_MAGIC; + version = FRAME_CAPTURE_VERSION; + tile_map.clear(); + memory_map.clear(); + replay_commands.clear(); + } + }; + + + class rsx_replay_thread : public ppu_thread + { + struct rsx_context + { + be_t mem_handle; + be_t context_id; + be_t mem_addr; + be_t dma_addr; + be_t reports_addr; + be_t driver_info; + }; + + struct current_state + { + u64 tile_hash{0}; + u64 display_buffer_hash{0}; + frame_capture_data::display_buffers_state buffer_state; + frame_capture_data::tile_state tile_state; + }; + + current_state cs; + std::unique_ptr frame; + + public: + rsx_replay_thread(std::unique_ptr&& frame_data) + : ppu_thread("Rsx Capture Replay Thread"), frame(std::move(frame_data)) {}; + + virtual void cpu_task() override; + private: + be_t allocate_context(); + std::tuple get_usable_fifo_range(); + std::vector alloc_write_fifo(be_t context_id, u32 fifo_start_addr, u32 fifo_size); + void apply_frame_state(be_t context_id, const frame_capture_data::replay_command& replay_cmd); + }; +} diff --git a/rpcs3/Emu/RSX/Capture/rsx_trace.h b/rpcs3/Emu/RSX/Capture/rsx_trace.h new file mode 100644 index 0000000000..1b13ca2781 --- /dev/null +++ b/rpcs3/Emu/RSX/Capture/rsx_trace.h @@ -0,0 +1,33 @@ +#pragma once +#include +#include +#include +#include "Utilities/types.h" +#include "Emu/RSX/rsx_methods.h" + +namespace rsx +{ +struct frame_trace_data +{ + struct draw_state + { + std::string name; + std::pair programs; + rsx::rsx_state state; + std::array, 4> color_buffer; + std::array, 2> depth_stencil; + std::vector index; + u32 vertex_count; + + }; + + std::vector> command_queue; + std::vector draw_calls; + + void reset() + { + command_queue.clear(); + draw_calls.clear(); + } +}; +} diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index d643bff0f1..b0bc2b6755 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -8,6 +8,7 @@ #include "Common/BufferUtils.h" #include "Common/texture_cache.h" +#include "Capture/rsx_capture.h" #include "rsx_methods.h" #include "rsx_utils.h" @@ -15,6 +16,7 @@ #include "Utilities/StrUtil.h" #include +#include #include class GSRender; @@ -22,9 +24,8 @@ class GSRender; #define CMD_DEBUG 0 bool user_asked_for_frame_capture = false; -rsx::frame_capture_data frame_debug; - - +rsx::frame_trace_data frame_debug; +rsx::frame_capture_data frame_capture; namespace rsx { @@ -255,22 +256,7 @@ namespace rsx void thread::capture_frame(const std::string &name) { - frame_capture_data::draw_state draw_state = {}; - - int clip_w = rsx::method_registers.surface_clip_width(); - int clip_h = rsx::method_registers.surface_clip_height(); - draw_state.state = rsx::method_registers; - draw_state.color_buffer = std::move(copy_render_targets_to_memory()); - draw_state.depth_stencil = std::move(copy_depth_stencil_buffer_to_memory()); - - if (draw_state.state.current_draw_clause.command == rsx::draw_command::indexed) - { - draw_state.vertex_count = 0; - draw_state.vertex_count = draw_state.state.current_draw_clause.get_elements_count(); - auto index_raw_data_ptr = get_raw_index_array(draw_state.state.current_draw_clause.first_count_commands); - draw_state.index.resize(index_raw_data_ptr.size_bytes()); - std::copy(index_raw_data_ptr.begin(), index_raw_data_ptr.end(), draw_state.index.begin()); - } + frame_trace_data::draw_state draw_state = {}; draw_state.programs = get_programs(); draw_state.name = name; @@ -330,6 +316,9 @@ namespace rsx void thread::end() { + if (capture_current_frame) + capture::capture_draw_memory(this); + in_begin_end = false; for (u8 index = 0; index < rsx::limits::vertex_count; ++index) @@ -847,13 +836,45 @@ namespace rsx } } - method_registers.decode(reg, value); - if (capture_current_frame) { frame_debug.command_queue.push_back(std::make_pair(reg, value)); + + if (!(reg == NV406E_SET_REFERENCE || reg == NV406E_SEMAPHORE_RELEASE || reg == NV406E_SEMAPHORE_ACQUIRE)) + { + // todo: handle nv406e methods better?, do we care about call/jumps? + rsx::frame_capture_data::replay_command replay_cmd; + replay_cmd.rsx_command = std::make_pair(i == 0 ? cmd : 0, value); + + frame_capture.replay_commands.push_back(replay_cmd); + + // to make this easier, use the replay command 'i' positions back + auto it = std::prev(frame_capture.replay_commands.end(), i + 1); + + switch (reg) + { + case NV4097_GET_REPORT: + capture::capture_get_report(this, *it, value); + break; + case NV3089_IMAGE_IN: + capture::capture_image_in(this, *it); + break; + case NV0039_BUFFER_NOTIFY: + capture::capture_buffer_notify(this, *it); + break; + case NV4097_CLEAR_SURFACE: + capture::capture_surface_state(this, *it); + break; + default: + if (reg >= NV308A_COLOR && reg < NV3089_SET_OBJECT) + capture::capture_inline_transfer(this, *it, reg - NV308A_COLOR, value); + break; + } + } } + method_registers.decode(reg, value); + if (execute_method_call) { if (auto method = methods[reg]) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 73cab9c067..5b663b3d06 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -17,7 +17,8 @@ #include "Utilities/Thread.h" #include "Utilities/geometry.h" -#include "rsx_trace.h" +#include "Capture/rsx_trace.h" +#include "Capture/rsx_replay.h" #include "restore_new.h" #include "Utilities/variant.hpp" #include "define_new_memleakdetect.h" @@ -27,7 +28,8 @@ extern u64 get_system_time(); extern bool user_asked_for_frame_capture; -extern rsx::frame_capture_data frame_debug; +extern rsx::frame_trace_data frame_debug; +extern rsx::frame_capture_data frame_capture; namespace rsx { diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index c84c584f62..5c72b64537 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -301,7 +301,7 @@ namespace rsx template void load(progress_dialog_helper* dlg, Args&& ...args) { - if (g_cfg.video.disable_on_disk_shader_cache) + if (g_cfg.video.disable_on_disk_shader_cache || Emu.GetCachePath() == "") { return; } @@ -382,7 +382,7 @@ namespace rsx void store(pipeline_storage_type &pipeline, RSXVertexProgram &vp, RSXFragmentProgram &fp) { - if (g_cfg.video.disable_on_disk_shader_cache) + if (g_cfg.video.disable_on_disk_shader_cache || Emu.GetCachePath() == "") { return; } diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 34239ff352..ef0264d42c 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -7,6 +7,7 @@ #include "rsx_decode.h" #include "Emu/Cell/PPUCallback.h" #include "Emu/Cell/lv2/sys_rsx.h" +#include "Capture/rsx_capture.h" #include #include @@ -990,22 +991,44 @@ namespace rsx void flip_command(thread* rsx, u32, u32 arg) { - if (user_asked_for_frame_capture) + if (user_asked_for_frame_capture && !g_cfg.video.strict_rendering_mode) + { + // not dealing with non-strict rendering capture for now + user_asked_for_frame_capture = false; + LOG_FATAL(RSX, "RSX Capture: Capture only supported when ran with strict rendering mode."); + } + else if (user_asked_for_frame_capture && !rsx->capture_current_frame) { rsx->capture_current_frame = true; user_asked_for_frame_capture = false; frame_debug.reset(); + frame_capture.reset(); + + // random number just to jumpstart the size + frame_capture.replay_commands.reserve(8000); + + // capture first tile state with nop cmd + rsx::frame_capture_data::replay_command replay_cmd; + replay_cmd.rsx_command = std::make_pair(NV4097_NO_OPERATION, 0); + frame_capture.replay_commands.push_back(replay_cmd); + capture::capture_display_tile_state(rsx, frame_capture.replay_commands.back()); } else if (rsx->capture_current_frame) { rsx->capture_current_frame = false; std::stringstream os; cereal::BinaryOutputArchive archive(os); - archive(frame_debug); + const std::string& filePath = fs::get_config_dir() + "capture.rrc"; + archive(frame_capture); { - fs::file f(fs::get_config_dir() + "capture.txt", fs::rewrite); + // todo: 'dynamicly' create capture filename, also may want to compress this data? + fs::file f(filePath, fs::rewrite); f.write(os.str()); } + + LOG_SUCCESS(RSX, "capture successful: %s", filePath.c_str()); + + frame_capture.reset(); Emu.Pause(); } diff --git a/rpcs3/Emu/RSX/rsx_trace.h b/rpcs3/Emu/RSX/rsx_trace.h deleted file mode 100644 index 8d5c424228..0000000000 --- a/rpcs3/Emu/RSX/rsx_trace.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once -#include -#include -#include -#include "Utilities/types.h" -#include "rsx_methods.h" - -#include -#include -#include -#include - -namespace rsx -{ -struct frame_capture_data -{ - struct draw_state - { - std::string name; - std::pair programs; - rsx::rsx_state state; - std::array, 4> color_buffer; - std::array, 2> depth_stencil; - std::vector index; - u32 vertex_count; - - template - void serialize(Archive & ar) - { - ar(name); - ar(programs); - ar(state); - ar(color_buffer); - ar(depth_stencil); - ar(index); - } - - }; - std::vector > command_queue; - std::vector draw_calls; - - template - void serialize(Archive & ar) - { - ar(command_queue); - ar(draw_calls); - } - - void reset() - { - command_queue.clear(); - draw_calls.clear(); - } -}; -} diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 5f4f0b0d2f..34168e60bc 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -13,9 +13,11 @@ #include "Emu/Cell/RawSPUThread.h" #include "Emu/Cell/lv2/sys_sync.h" #include "Emu/Cell/lv2/sys_prx.h" +#include "Emu/Cell/lv2/sys_rsx.h" #include "Emu/IdManager.h" #include "Emu/RSX/GSRender.h" +#include "Emu/RSX/Capture/rsx_replay.h" #include "Loader/PSF.h" #include "Loader/ELF.h" @@ -26,9 +28,13 @@ #include "../Crypto/unpkg.h" #include "yaml-cpp/yaml.h" +#include "cereal/archives/binary.hpp" + #include #include #include +#include +#include #include "Utilities/GDBDebugServer.h" @@ -233,6 +239,50 @@ void Emulator::Init() fxm::make_always()->append(fs::get_config_dir() + "/patch.yml"); } +bool Emulator::BootRsxCapture(const std::string& path) +{ + if (!fs::is_file(path)) + return false; + + std::fstream f(path, std::ios::in | std::ios::binary); + + cereal::BinaryInputArchive archive(f); + std::unique_ptr frame = std::make_unique(); + archive(*frame); + + if (frame->magic != rsx::FRAME_CAPTURE_MAGIC) + { + LOG_ERROR(LOADER, "Invalid rsx capture file!"); + return false; + } + + if (frame->version != rsx::FRAME_CAPTURE_VERSION) + { + LOG_ERROR(LOADER, "Rsx capture file version not supported! Expected %d, found %d", rsx::FRAME_CAPTURE_VERSION, frame->version); + return false; + } + + Init(); + + vm::init(); + + // PS3 'executable' + m_state = system_state::ready; + GetCallbacks().on_ready(); + + auto gsrender = fxm::import(Emu.GetCallbacks().get_gs_render); + if (gsrender.get() == nullptr) + return false; + + GetCallbacks().on_run(); + m_state = system_state::running; + + auto&& rsxcapture = idm::make_ptr(std::move(frame)); + rsxcapture->run(); + + return true; +} + bool Emulator::BootGame(const std::string& path, bool direct, bool add_only) { static const char* boot_list[] = diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 8f264d1a16..203073a013 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -260,6 +260,7 @@ public: } bool BootGame(const std::string& path, bool direct = false, bool add_only = false); + bool BootRsxCapture(const std::string& path); bool InstallPkg(const std::string& path); static std::string GetEmuDir(); diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 9eb0d7dcd4..b27dccc791 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -97,6 +97,7 @@ ..\Vulkan\Vulkan-LoaderAndValidationLayers\include;..\Vulkan\glslang\glslang\Public;..\Vulkan\glslang;%(AdditionalIncludeDirectories) + /bigobj %(AdditionalOptions) diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index e83d31fc05..b8ad54d9a6 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -277,6 +277,8 @@ + + @@ -523,6 +525,9 @@ + + + @@ -532,7 +537,6 @@ - diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 176552147f..fda167a3cd 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -63,6 +63,9 @@ {4317ac27-38e4-4f8d-9bac-496f9b00f615} + + {3f233c0b-4ee1-4c02-963f-0109d3d9c705} + @@ -743,6 +746,12 @@ Emu\Cell\lv2 + + Emu\GPU\RSX\Capture + + + Emu\GPU\RSX\Capture + @@ -1336,9 +1345,6 @@ Emu\GPU\RSX - - Emu\GPU\RSX - Emu\GPU\RSX @@ -1435,5 +1441,14 @@ Emu\Cell\Modules + + Emu\GPU\RSX\Capture + + + Emu\GPU\RSX\Capture + + + Emu\GPU\RSX\Capture + \ No newline at end of file diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 2523f86638..d1a5134abd 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -143,7 +143,7 @@ Level3 - ..\hidapi.lib;winmm.lib;OpenAL.lib;XAudio.lib;D3D12GSRender.lib;GLGSRender.lib;shlwapi.lib;VKGSRender.lib;VKstatic.1.lib;glslang.lib;OSDependent.lib;OGLCompiler.lib;SPIRV.lib;HLSL.lib;Advapi32.lib;user32.lib;zlib.lib;..\libpng.lib;asmjit.lib;yaml-cpp.lib;discord-rpc.lib;emucore.lib;dxgi.lib;$(QTDIR)\lib\qtmain.lib;shell32.lib;opengl32.lib;$(QTDIR)\lib\Qt5OpenGL.lib;$(QTDIR)\lib\Qt5Widgets.lib;$(QTDIR)\lib\Qt5Gui.lib;$(QTDIR)\lib\Qt5Qml.lib;$(QTDIR)\lib\Qt5Network.lib;$(QTDIR)\lib\Qt5Core.lib;Qt5Core.lib;Qt5Gui.lib;Qt5Widgets.lib;Qt5WinExtras.lib;%(AdditionalDependencies) + gdi32.lib;..\hidapi.lib;winmm.lib;OpenAL.lib;XAudio.lib;D3D12GSRender.lib;GLGSRender.lib;shlwapi.lib;VKGSRender.lib;VKstatic.1.lib;glslang.lib;OSDependent.lib;OGLCompiler.lib;SPIRV.lib;HLSL.lib;Advapi32.lib;user32.lib;zlib.lib;..\libpng.lib;asmjit.lib;yaml-cpp.lib;discord-rpc.lib;emucore.lib;dxgi.lib;$(QTDIR)\lib\qtmain.lib;shell32.lib;opengl32.lib;$(QTDIR)\lib\Qt5OpenGL.lib;$(QTDIR)\lib\Qt5Widgets.lib;$(QTDIR)\lib\Qt5Gui.lib;$(QTDIR)\lib\Qt5Qml.lib;$(QTDIR)\lib\Qt5Network.lib;$(QTDIR)\lib\Qt5Core.lib;Qt5Core.lib;Qt5Gui.lib;Qt5Widgets.lib;Qt5WinExtras.lib;%(AdditionalDependencies) ..\3rdparty\OpenAL\libs\Win64;..\Vulkan\glslang-build\hlsl\Release;..\Vulkan\glslang-build\SPIRV\Release;..\Vulkan\glslang-build\OGLCompilersDLL\Release;..\Vulkan\glslang-build\glslang\OSDependent\Windows\Release;..\Vulkan\Vulkan-build\loader\Release;..\Vulkan\glslang-build\glslang\Release;..\lib\$(CONFIGURATION)-$(PLATFORM);..\3rdparty\minidx12\Lib;..\3rdparty\discord-rpc\lib;$(QTDIR)\lib;%(AdditionalLibraryDirectories) "/MANIFESTDEPENDENCY:type='win32' name='Microsoft.Windows.Common-Controls' version='6.0.0.0' publicKeyToken='6595b64144ccf1df' language='*' processorArchitecture='*'" %(AdditionalOptions) true diff --git a/rpcs3/rpcs3qt/debugger_frame.cpp b/rpcs3/rpcs3qt/debugger_frame.cpp index 21f1b54adf..f5781632dc 100644 --- a/rpcs3/rpcs3qt/debugger_frame.cpp +++ b/rpcs3/rpcs3qt/debugger_frame.cpp @@ -45,7 +45,7 @@ debugger_frame::debugger_frame(std::shared_ptr settings, QWidget * m_go_to_addr = new QPushButton(tr("Go To Address"), this); m_go_to_pc = new QPushButton(tr("Go To PC"), this); - m_btn_capture = new QPushButton(tr("Capture"), this); + m_btn_capture = new QPushButton(tr("RSX Capture"), this); m_btn_step = new QPushButton(tr("Step"), this); m_btn_step_over = new QPushButton(tr("Step Over"), this); m_btn_run = new QPushButton(RunString, this); diff --git a/rpcs3/rpcs3qt/main_window.cpp b/rpcs3/rpcs3qt/main_window.cpp index e8baa7af70..91b7fdd8e4 100644 --- a/rpcs3/rpcs3qt/main_window.cpp +++ b/rpcs3/rpcs3qt/main_window.cpp @@ -331,6 +331,32 @@ void main_window::BootGame() Boot(path); } +void main_window::BootRsxCapture() +{ + bool stopped = false; + if (Emu.IsRunning()) + { + Emu.Pause(); + stopped = true; + } + + QString filePath = QFileDialog::getOpenFileName(this, tr("Select RSX Capture"), "", tr("RRC files (*.rrc);;All files (*.*)")); + if (filePath.isEmpty()) + { + if (stopped) Emu.Resume(); + return; + } + Emu.SetForceBoot(true); + Emu.Stop(); + + const std::string path = sstr(filePath); + + if (!Emu.BootRsxCapture(path)) + LOG_ERROR(GENERAL, "Capture Boot Failed"); + else + LOG_SUCCESS(LOADER, "Capture Boot Success"); +} + void main_window::InstallPkg(const QString& dropPath) { QString filePath = dropPath; @@ -1068,6 +1094,7 @@ void main_window::CreateConnects() { connect(ui->bootElfAct, &QAction::triggered, this, &main_window::BootElf); connect(ui->bootGameAct, &QAction::triggered, this, &main_window::BootGame); + connect(ui->actionopen_rsx_capture, &QAction::triggered, this, &main_window::BootRsxCapture); connect(ui->bootRecentMenu, &QMenu::aboutToShow, [=] { diff --git a/rpcs3/rpcs3qt/main_window.h b/rpcs3/rpcs3qt/main_window.h index 2aec1b60d8..0d90e66b4f 100644 --- a/rpcs3/rpcs3qt/main_window.h +++ b/rpcs3/rpcs3qt/main_window.h @@ -90,6 +90,7 @@ private Q_SLOTS: void Boot(const std::string& path, bool direct = false, bool add_only = false); void BootElf(); void BootGame(); + void BootRsxCapture(); void DecryptSPRXLibraries(); void SaveWindowState(); diff --git a/rpcs3/rpcs3qt/main_window.ui b/rpcs3/rpcs3qt/main_window.ui index 029aab5786..737e1f4646 100644 --- a/rpcs3/rpcs3qt/main_window.ui +++ b/rpcs3/rpcs3qt/main_window.ui @@ -221,6 +221,7 @@ + @@ -260,6 +261,7 @@ + @@ -951,6 +953,11 @@ Show Hidden Entries + + + Open RSX Capture + + diff --git a/rpcs3_default.props b/rpcs3_default.props index 9affed40d8..3dbcf77a13 100644 --- a/rpcs3_default.props +++ b/rpcs3_default.props @@ -3,7 +3,7 @@ - .\;..\;..\asmjit\src\asmjit;..\Utilities\yaml-cpp\include;..\3rdparty\ffmpeg\WindowsInclude;..\3rdparty\cereal\include;..\3rdparty\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);$(UniversalCRT_IncludePath);..\3rdparty\minidx12\Include;..\3rdparty\GSL\include;..\3rdparty\libpng;..\3rdparty\GL;..\3rdparty\stblib;..\3rdparty\OpenAL\include;..\3rdparty\pugixml\src;..\3rdparty\hidapi\hidapi;..\3rdparty\Optional + .\;..\;..\asmjit\src\asmjit;..\Utilities\yaml-cpp\include;..\3rdparty\ffmpeg\WindowsInclude;..\3rdparty\cereal\include;..\3rdparty\ffmpeg\Windows\x86_64\Include;$(VC_IncludePath);$(WindowsSDK_IncludePath);$(UniversalCRT_IncludePath);..\3rdparty\minidx12\Include;..\3rdparty\GSL\include;..\3rdparty\libpng;..\3rdparty\GL;..\3rdparty\stblib;..\3rdparty\OpenAL\include;..\3rdparty\pugixml\src;..\3rdparty\hidapi\hidapi;..\3rdparty\Optional;..\3rdparty\xxhash $(SolutionDir)lib\$(Configuration)-$(Platform)\ $(SolutionDir)lib\$(Configuration)-$(Platform)\;$(UniversalCRT_LibraryPath_x64);$(LibraryPath) $(SolutionDir)tmp\$(ProjectName)-$(Configuration)-$(Platform)\ @@ -21,7 +21,7 @@ /Zc:throwingNew - ws2_32.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib + xxhash.lib;ws2_32.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib ..\3rdparty\ffmpeg\Windows\x86_64\lib