mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-23 03:02:53 +01:00
vk: Reimplement DMA synchronization
This commit is contained in:
parent
f06559412e
commit
212ac19c11
@ -358,6 +358,7 @@ target_sources(rpcs3_emu PRIVATE
|
||||
if(TARGET 3rdparty_vulkan)
|
||||
target_sources(rpcs3_emu PRIVATE
|
||||
RSX/VK/VKCommonDecompiler.cpp
|
||||
RSX/VK/VKDMA.cpp
|
||||
RSX/VK/VKFormats.cpp
|
||||
RSX/VK/VKFragmentProgram.cpp
|
||||
RSX/VK/VKFramebuffer.cpp
|
||||
|
@ -1401,7 +1401,7 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
void imp_flush()
|
||||
virtual void imp_flush()
|
||||
{
|
||||
AUDIT(synchronized);
|
||||
|
||||
|
372
rpcs3/Emu/RSX/VK/VKDMA.cpp
Normal file
372
rpcs3/Emu/RSX/VK/VKDMA.cpp
Normal file
@ -0,0 +1,372 @@
|
||||
#include "stdafx.h"
|
||||
#include "VKHelpers.h"
|
||||
#include "VKResourceManager.h"
|
||||
#include "VKDMA.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
static constexpr size_t s_dma_block_length = 0x01000000;
|
||||
static constexpr u32 s_dma_block_mask = 0xFF000000;
|
||||
static constexpr u32 s_dma_offset_mask = 0x00FFFFFF;
|
||||
|
||||
static constexpr u32 s_page_size = 16384;
|
||||
static constexpr u32 s_page_align = s_page_size - 1;
|
||||
static constexpr u32 s_pages_per_entry = 32;
|
||||
static constexpr u32 s_bits_per_page = 2;
|
||||
static constexpr u32 s_bytes_per_entry = (s_page_size * s_pages_per_entry);
|
||||
|
||||
std::unordered_map<u32, dma_block> g_dma_pool;
|
||||
|
||||
void* dma_block::map_range(const utils::address_range& range)
|
||||
{
|
||||
if (inheritance_info.parent)
|
||||
{
|
||||
return inheritance_info.parent->map_range(range);
|
||||
}
|
||||
|
||||
verify(HERE), range.start >= base_address;
|
||||
u32 start = range.start;
|
||||
start -= base_address;
|
||||
return allocated_memory->map(start, range.length());
|
||||
}
|
||||
|
||||
void dma_block::unmap()
|
||||
{
|
||||
if (inheritance_info.parent)
|
||||
{
|
||||
inheritance_info.parent->unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
allocated_memory->unmap();
|
||||
}
|
||||
}
|
||||
|
||||
void dma_block::init(const render_device& dev, u32 addr, size_t size)
|
||||
{
|
||||
verify(HERE), size, !(size % s_dma_block_length);
|
||||
base_address = addr;
|
||||
|
||||
allocated_memory = std::make_unique<vk::buffer>(dev, size,
|
||||
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
|
||||
page_info.resize(size / s_bytes_per_entry, ~0ull);
|
||||
}
|
||||
|
||||
void dma_block::init(dma_block* parent, u32 addr, size_t size)
|
||||
{
|
||||
base_address = addr;
|
||||
inheritance_info.parent = parent;
|
||||
inheritance_info.block_offset = (addr - parent->base_address);
|
||||
}
|
||||
|
||||
void dma_block::set_page_bit(u32 offset, u64 bits)
|
||||
{
|
||||
const auto entry = (offset / s_bytes_per_entry);
|
||||
const auto word = entry / s_pages_per_entry;
|
||||
const auto shift = (entry % s_pages_per_entry) * s_bits_per_page;
|
||||
|
||||
page_info[word] &= ~(3 << shift);
|
||||
page_info[word] |= (bits << shift);
|
||||
}
|
||||
|
||||
bool dma_block::test_page_bit(u32 offset, u64 bits)
|
||||
{
|
||||
const auto entry = (offset / s_bytes_per_entry);
|
||||
const auto word = entry / s_pages_per_entry;
|
||||
const auto shift = (entry % s_pages_per_entry) * s_bits_per_page;
|
||||
|
||||
return !!(page_info[word] & (bits << shift));
|
||||
}
|
||||
|
||||
void dma_block::mark_dirty(const utils::address_range& range)
|
||||
{
|
||||
if (!inheritance_info.parent)
|
||||
{
|
||||
const u32 start = align(range.start, s_page_size);
|
||||
const u32 end = ((range.end + 1) & s_page_align);
|
||||
|
||||
for (u32 page = start; page < end; page += s_page_size)
|
||||
{
|
||||
set_page_bit(page - base_address, page_bits::dirty);
|
||||
}
|
||||
|
||||
if (UNLIKELY(start > range.start))
|
||||
{
|
||||
set_page_bit(start - s_page_size, page_bits::nocache);
|
||||
}
|
||||
|
||||
if (UNLIKELY(end < range.end))
|
||||
{
|
||||
set_page_bit(end + s_page_size, page_bits::nocache);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
inheritance_info.parent->mark_dirty(range);
|
||||
}
|
||||
}
|
||||
|
||||
void dma_block::set_page_info(u32 page_offset, const std::vector<u64>& bits)
|
||||
{
|
||||
if (!inheritance_info.parent)
|
||||
{
|
||||
auto bit_offset = page_offset / s_bytes_per_entry;
|
||||
verify(HERE), (bit_offset + bits.size()) <= page_info.size();
|
||||
std::memcpy(page_info.data() + bit_offset, bits.data(), bits.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
inheritance_info.parent->set_page_info(page_offset + inheritance_info.block_offset, bits);
|
||||
}
|
||||
}
|
||||
|
||||
void dma_block::flush(const utils::address_range& range)
|
||||
{
|
||||
auto src = map_range(range);
|
||||
auto dst = vm::get_super_ptr(range.start);
|
||||
std::memcpy(dst, src, range.length());
|
||||
|
||||
// TODO: Clear page bits
|
||||
unmap();
|
||||
}
|
||||
|
||||
void dma_block::load(const utils::address_range& range)
|
||||
{
|
||||
auto src = vm::get_super_ptr(range.start);
|
||||
auto dst = map_range(range);
|
||||
std::memcpy(dst, src, range.length());
|
||||
|
||||
// TODO: Clear page bits to sychronized
|
||||
unmap();
|
||||
}
|
||||
|
||||
std::pair<u32, buffer*> dma_block::get(const utils::address_range& range)
|
||||
{
|
||||
if (inheritance_info.parent)
|
||||
{
|
||||
return inheritance_info.parent->get(range);
|
||||
}
|
||||
|
||||
verify(HERE), range.start >= base_address, range.end <= end();
|
||||
|
||||
// mark_dirty(range);
|
||||
return { (range.start - base_address), allocated_memory.get() };
|
||||
}
|
||||
|
||||
dma_block* dma_block::head()
|
||||
{
|
||||
if (!inheritance_info.parent)
|
||||
return this;
|
||||
|
||||
return inheritance_info.parent->head();
|
||||
}
|
||||
|
||||
const dma_block* dma_block::head() const
|
||||
{
|
||||
if (!inheritance_info.parent)
|
||||
return this;
|
||||
|
||||
return inheritance_info.parent->head();
|
||||
}
|
||||
|
||||
void dma_block::set_parent(command_buffer& cmd, dma_block* parent)
|
||||
{
|
||||
verify(HERE), parent;
|
||||
if (inheritance_info.parent == parent)
|
||||
{
|
||||
// Nothing to do
|
||||
return;
|
||||
}
|
||||
|
||||
inheritance_info.parent = parent;
|
||||
inheritance_info.block_offset = (base_address - parent->base_address);
|
||||
|
||||
if (allocated_memory)
|
||||
{
|
||||
VkBufferCopy copy{};
|
||||
copy.srcOffset = 0;
|
||||
copy.dstOffset = inheritance_info.block_offset;
|
||||
copy.size = allocated_memory->size();
|
||||
vkCmdCopyBuffer(cmd, allocated_memory->value, parent->allocated_memory->value, 1, ©);
|
||||
|
||||
auto gc = vk::get_resource_manager();
|
||||
gc->dispose(allocated_memory);
|
||||
|
||||
parent->set_page_info(inheritance_info.block_offset, page_info);
|
||||
page_info.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void dma_block::extend(command_buffer& cmd, const render_device &dev, size_t new_size)
|
||||
{
|
||||
verify(HERE), allocated_memory;
|
||||
if (new_size <= allocated_memory->size())
|
||||
return;
|
||||
|
||||
const auto required_entries = new_size / s_bytes_per_entry;
|
||||
page_info.resize(required_entries, ~0ull);
|
||||
|
||||
auto new_allocation = std::make_unique<vk::buffer>(dev, new_size,
|
||||
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
|
||||
VkBufferCopy copy{};
|
||||
copy.size = allocated_memory->size();
|
||||
vkCmdCopyBuffer(cmd, allocated_memory->value, new_allocation->value, 1, ©);
|
||||
|
||||
auto gc = vk::get_resource_manager();
|
||||
gc->dispose(allocated_memory);
|
||||
allocated_memory = std::move(new_allocation);
|
||||
}
|
||||
|
||||
u32 dma_block::start() const
|
||||
{
|
||||
return base_address;
|
||||
}
|
||||
|
||||
u32 dma_block::end() const
|
||||
{
|
||||
auto source = head();
|
||||
return (source->base_address + source->allocated_memory->size() - 1);
|
||||
}
|
||||
|
||||
u32 dma_block::size() const
|
||||
{
|
||||
return (allocated_memory) ? allocated_memory->size() : 0;
|
||||
}
|
||||
|
||||
std::pair<u32, vk::buffer*> map_dma(command_buffer& cmd, u32 local_address, u32 length)
|
||||
{
|
||||
const auto map_range = utils::address_range::start_length(local_address, length);
|
||||
const auto first_block = (local_address & s_dma_block_mask);
|
||||
const auto limit = local_address + length - 1;
|
||||
auto last_block = (limit & s_dma_block_mask);
|
||||
|
||||
if (LIKELY(first_block == last_block))
|
||||
{
|
||||
if (auto found = g_dma_pool.find(first_block); found != g_dma_pool.end())
|
||||
{
|
||||
return found->second.get(map_range);
|
||||
}
|
||||
|
||||
auto &block_info = g_dma_pool[first_block];
|
||||
block_info.init(*vk::get_current_renderer(), first_block, s_dma_block_length);
|
||||
return block_info.get(map_range);
|
||||
}
|
||||
|
||||
dma_block* block_head = nullptr;
|
||||
auto block_end = align(limit, s_dma_block_length);
|
||||
|
||||
// Reverse scan to try and find the minimum required length in case of other chaining
|
||||
for (auto block = last_block; block != first_block; block -= s_dma_block_length)
|
||||
{
|
||||
if (auto found = g_dma_pool.find(block); found != g_dma_pool.end())
|
||||
{
|
||||
const auto end = found->second.end();
|
||||
last_block = std::max(last_block, end & s_dma_block_mask);
|
||||
block_end = std::max(block_end, end + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto block = first_block; block <= last_block; block += s_dma_block_length)
|
||||
{
|
||||
auto found = g_dma_pool.find(block);
|
||||
const bool exists = (found != g_dma_pool.end());
|
||||
auto entry = exists ? &found->second : &g_dma_pool[block];
|
||||
|
||||
if (block == first_block)
|
||||
{
|
||||
block_head = entry->head();
|
||||
|
||||
if (exists)
|
||||
{
|
||||
if (entry->end() < limit)
|
||||
{
|
||||
auto new_length = block_end - block_head->start();
|
||||
block_head->extend(cmd, *vk::get_current_renderer(), new_length);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto required_size = (block_end - block);
|
||||
block_head->init(*vk::get_current_renderer(), block, required_size);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (exists)
|
||||
{
|
||||
entry->set_parent(cmd, block_head);
|
||||
}
|
||||
else
|
||||
{
|
||||
entry->init(block_head, block, s_dma_block_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
verify(HERE), block_head;
|
||||
return block_head->get(map_range);
|
||||
}
|
||||
|
||||
template<bool load>
|
||||
void sync_dma_impl(u32 local_address, u32 length)
|
||||
{
|
||||
const auto limit = local_address + length - 1;
|
||||
while (length)
|
||||
{
|
||||
u32 block = (local_address & s_dma_block_mask);
|
||||
if (auto found = g_dma_pool.find(block); found != g_dma_pool.end())
|
||||
{
|
||||
const auto sync_end = std::min(limit, found->second.end());
|
||||
const auto range = utils::address_range::start_end(local_address, sync_end);
|
||||
|
||||
if constexpr (load)
|
||||
{
|
||||
found->second.load(range);
|
||||
}
|
||||
else
|
||||
{
|
||||
found->second.flush(range);
|
||||
}
|
||||
|
||||
if (UNLIKELY(sync_end < limit))
|
||||
{
|
||||
// Technically legal but assuming a map->flush usage, this shouldnot happen
|
||||
// Optimizations could in theory batch together multiple transfers though
|
||||
LOG_ERROR(RSX, "Sink request spans multiple allocated blocks!");
|
||||
const auto write_end = (sync_end + 1u);
|
||||
const auto written = (write_end - local_address);
|
||||
length -= written;
|
||||
local_address = write_end;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_ERROR(RSX, "Sync command on range not mapped!");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void load_dma(u32 local_address, u32 length)
|
||||
{
|
||||
sync_dma_impl<true>(local_address, length);
|
||||
}
|
||||
|
||||
void flush_dma(u32 local_address, u32 length)
|
||||
{
|
||||
sync_dma_impl<false>(local_address, length);
|
||||
}
|
||||
|
||||
void clear_dma_resources()
|
||||
{
|
||||
g_dma_pool.clear();
|
||||
}
|
||||
}
|
57
rpcs3/Emu/RSX/VK/VKDMA.h
Normal file
57
rpcs3/Emu/RSX/VK/VKDMA.h
Normal file
@ -0,0 +1,57 @@
|
||||
#pragma once
|
||||
#include "VKHelpers.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
std::pair<u32, vk::buffer*> map_dma(command_buffer& cmd, u32 local_address, u32 length);
|
||||
void load_dma(u32 local_address, u32 length);
|
||||
void flush_dma(u32 local_address, u32 length);
|
||||
|
||||
void clear_dma_resources();
|
||||
|
||||
class dma_block
|
||||
{
|
||||
enum page_bits
|
||||
{
|
||||
synchronized = 0,
|
||||
dirty = 1,
|
||||
nocache = 3
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
dma_block* parent = nullptr;
|
||||
u32 block_offset = 0;
|
||||
}
|
||||
inheritance_info;
|
||||
|
||||
u32 base_address = 0;
|
||||
std::unique_ptr<buffer> allocated_memory;
|
||||
std::vector<u64> page_info;
|
||||
|
||||
void* map_range(const utils::address_range& range);
|
||||
void unmap();
|
||||
|
||||
void set_page_bit(u32 page, u64 bits);
|
||||
bool test_page_bit(u32 page, u64 bits);
|
||||
void mark_dirty(const utils::address_range& range);
|
||||
void set_page_info(u32 page_offset, const std::vector<u64>& bits);
|
||||
|
||||
public:
|
||||
|
||||
void init(const render_device& dev, u32 addr, size_t size);
|
||||
void init(dma_block* parent, u32 addr, size_t size);
|
||||
void flush(const utils::address_range& range);
|
||||
void load(const utils::address_range& range);
|
||||
std::pair<u32, buffer*> get(const utils::address_range& range);
|
||||
|
||||
u32 start() const;
|
||||
u32 end() const;
|
||||
u32 size() const;
|
||||
|
||||
dma_block* head();
|
||||
const dma_block* head() const;
|
||||
void set_parent(command_buffer& cmd, dma_block* parent);
|
||||
void extend(command_buffer& cmd, const render_device& dev, size_t new_size);
|
||||
};
|
||||
}
|
@ -6,6 +6,7 @@
|
||||
#include "VKFramebuffer.h"
|
||||
#include "VKResolveHelper.h"
|
||||
#include "VKResourceManager.h"
|
||||
#include "VKDMA.h"
|
||||
#include "Utilities/mutex.h"
|
||||
|
||||
namespace vk
|
||||
@ -265,6 +266,7 @@ namespace vk
|
||||
vk::clear_renderpass_cache(dev);
|
||||
vk::clear_framebuffer_cache();
|
||||
vk::clear_resolve_helpers();
|
||||
vk::clear_dma_resources();
|
||||
vk::get_resource_manager()->destroy();
|
||||
|
||||
g_null_texture.reset();
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "VKGSRender.h"
|
||||
#include "VKCompute.h"
|
||||
#include "VKResourceManager.h"
|
||||
#include "VKDMA.h"
|
||||
#include "Emu/System.h"
|
||||
#include "../Common/TextureUtils.h"
|
||||
#include "Utilities/mutex.h"
|
||||
@ -39,7 +40,6 @@ namespace vk
|
||||
VkEvent dma_fence = VK_NULL_HANDLE;
|
||||
vk::render_device* m_device = nullptr;
|
||||
vk::viewable_image *vram_texture = nullptr;
|
||||
std::unique_ptr<vk::buffer> dma_buffer;
|
||||
|
||||
public:
|
||||
using baseclass::cached_texture_section;
|
||||
@ -73,7 +73,7 @@ namespace vk
|
||||
if (!flushed)
|
||||
{
|
||||
// Reset fence
|
||||
verify(HERE), m_device, dma_buffer, dma_fence;
|
||||
verify(HERE), m_device, dma_fence;
|
||||
vk::get_resource_manager()->dispose(dma_fence);
|
||||
}
|
||||
|
||||
@ -88,10 +88,9 @@ namespace vk
|
||||
|
||||
void release_dma_resources()
|
||||
{
|
||||
if (dma_buffer)
|
||||
if (dma_fence)
|
||||
{
|
||||
auto gc = vk::get_resource_manager();
|
||||
gc->dispose(dma_buffer);
|
||||
gc->dispose(dma_fence);
|
||||
}
|
||||
}
|
||||
@ -187,12 +186,6 @@ namespace vk
|
||||
vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence);
|
||||
}
|
||||
|
||||
if (!dma_buffer)
|
||||
{
|
||||
auto memory_type = m_device->get_memory_mapping().host_visible_coherent;
|
||||
dma_buffer = std::make_unique<vk::buffer>(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
}
|
||||
|
||||
vk::image *locked_resource = vram_texture;
|
||||
u32 transfer_width = width;
|
||||
u32 transfer_height = height;
|
||||
@ -230,21 +223,52 @@ namespace vk
|
||||
|
||||
verify(HERE), target->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
|
||||
// Handle any format conversions using compute tasks
|
||||
vk::cs_shuffle_base *shuffle_kernel = nullptr;
|
||||
// TODO: Read back stencil values (is this really necessary?)
|
||||
const auto internal_bpp = vk::get_format_texel_width(vram_texture->format());
|
||||
const auto valid_range = get_confirmed_range();
|
||||
real_pitch = internal_bpp * transfer_width;
|
||||
|
||||
if (vram_texture->format() == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
u32 transfer_x = 0, transfer_y = 0;
|
||||
if (const auto section_range = get_section_range(); section_range != valid_range)
|
||||
{
|
||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_se_d24x8>();
|
||||
if (const auto offset = (valid_range.start - get_section_base()))
|
||||
{
|
||||
transfer_y = offset / rsx_pitch;
|
||||
transfer_x = (offset % rsx_pitch) / internal_bpp;
|
||||
|
||||
verify(HERE), transfer_width >= transfer_x, transfer_height >= transfer_y;
|
||||
transfer_width -= transfer_x;
|
||||
transfer_height -= transfer_y;
|
||||
}
|
||||
|
||||
if (const auto tail = (section_range.end - valid_range.end))
|
||||
{
|
||||
const auto row_count = tail / rsx_pitch;
|
||||
|
||||
verify(HERE), transfer_height >= row_count;
|
||||
transfer_height -= row_count;
|
||||
}
|
||||
}
|
||||
else if (vram_texture->format() == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
{
|
||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_se_f32_d24x8>();
|
||||
}
|
||||
else if (pack_unpack_swap_bytes)
|
||||
|
||||
if ((vram_texture->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) ||
|
||||
pack_unpack_swap_bytes)
|
||||
{
|
||||
const auto section_length = valid_range.length();
|
||||
const auto transfer_pitch = transfer_width * internal_bpp;
|
||||
const auto task_length = transfer_pitch * transfer_height;
|
||||
|
||||
auto working_buffer = vk::get_scratch_buffer();
|
||||
auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length);
|
||||
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageSubresource = { vram_texture->aspect(), 0, 0, 1 };
|
||||
region.imageOffset = { (s32)transfer_x, (s32)transfer_y, 0 };
|
||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||
vk::copy_image_to_buffer(cmd, target, working_buffer, region);
|
||||
|
||||
const auto texel_layout = vk::get_format_element_size(vram_texture->format());
|
||||
const auto elem_size = texel_layout.first;
|
||||
vk::cs_shuffle_base *shuffle_kernel;
|
||||
|
||||
if (elem_size == 2)
|
||||
{
|
||||
@ -254,38 +278,60 @@ namespace vk
|
||||
{
|
||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
// Do not run the compute task on host visible memory
|
||||
vk::buffer* mem_target = shuffle_kernel ? vk::get_scratch_buffer() : dma_buffer.get();
|
||||
|
||||
// TODO: Read back stencil values (is this really necessary?)
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageSubresource = {vram_texture->aspect() & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1};
|
||||
region.imageExtent = {transfer_width, transfer_height, 1};
|
||||
vkCmdCopyImageToBuffer(cmd, target->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, mem_target->value, 1, ®ion);
|
||||
|
||||
locked_resource->pop_layout(cmd);
|
||||
real_pitch = vk::get_format_texel_width(vram_texture->format()) * transfer_width;
|
||||
|
||||
if (shuffle_kernel)
|
||||
{
|
||||
verify (HERE), mem_target->value != dma_buffer->value;
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
shuffle_kernel->run(cmd, mem_target, get_section_size());
|
||||
shuffle_kernel->run(cmd, working_buffer, task_length);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, mem_target->value, 0, get_section_size(),
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
VkBufferCopy copy = {};
|
||||
copy.size = get_section_size();
|
||||
vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©);
|
||||
if (LIKELY(rsx_pitch == real_pitch))
|
||||
{
|
||||
VkBufferCopy copy = {};
|
||||
copy.dstOffset = final_mapping.first;
|
||||
copy.size = section_length;
|
||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, 1, ©);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<VkBufferCopy> copy;
|
||||
copy.reserve(transfer_height);
|
||||
|
||||
u32 dst_offset = final_mapping.first;
|
||||
u32 src_offset = 0;
|
||||
|
||||
for (unsigned row = 0; row < transfer_height; ++row)
|
||||
{
|
||||
copy.push_back({src_offset, dst_offset, transfer_pitch});
|
||||
src_offset += real_pitch;
|
||||
dst_offset += rsx_pitch;
|
||||
}
|
||||
|
||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, transfer_height, copy.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
VkBufferImageCopy region = {};
|
||||
region.bufferRowLength = (rsx_pitch / internal_bpp);
|
||||
region.imageSubresource = { vram_texture->aspect(), 0, 0, 1 };
|
||||
region.imageOffset = { (s32)transfer_x, (s32)transfer_y, 0 };
|
||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||
|
||||
auto mapping = vk::map_dma(cmd, valid_range.start, valid_range.length());
|
||||
region.bufferOffset = mapping.first;
|
||||
vkCmdCopyImageToBuffer(cmd, target->value, target->current_layout, mapping.second->value, 1, ®ion);
|
||||
}
|
||||
|
||||
locked_resource->pop_layout(cmd);
|
||||
|
||||
if (UNLIKELY(synchronized))
|
||||
{
|
||||
@ -314,7 +360,7 @@ namespace vk
|
||||
/**
|
||||
* Flush
|
||||
*/
|
||||
void* map_synchronized(u32 offset, u32 size)
|
||||
void imp_flush() override
|
||||
{
|
||||
AUDIT(synchronized);
|
||||
|
||||
@ -322,12 +368,8 @@ namespace vk
|
||||
vk::wait_for_event(dma_fence, GENERAL_WAIT_TIMEOUT);
|
||||
vkResetEvent(*m_device, dma_fence);
|
||||
|
||||
return dma_buffer->map(offset, size);
|
||||
}
|
||||
|
||||
void finish_flush()
|
||||
{
|
||||
dma_buffer->unmap();
|
||||
const auto range = get_confirmed_range();
|
||||
vk::flush_dma(range.start, range.length());
|
||||
|
||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||
{
|
||||
@ -336,6 +378,11 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
void *map_synchronized(u32, u32)
|
||||
{ return nullptr; }
|
||||
|
||||
void finish_flush()
|
||||
{}
|
||||
|
||||
/**
|
||||
* Misc
|
||||
|
@ -46,7 +46,7 @@ namespace rsx
|
||||
u8* buf = buffer;
|
||||
|
||||
// Read the whole buffer from source
|
||||
for (u32 y = 0; y < clip_h; ++y)
|
||||
for (int y = 0; y < clip_h; ++y)
|
||||
{
|
||||
std::memcpy(buf, src, buffer_pitch);
|
||||
src += src_pitch;
|
||||
@ -56,7 +56,7 @@ namespace rsx
|
||||
buf = buffer;
|
||||
|
||||
// Write to destination
|
||||
for (u32 y = 0; y < clip_h; ++y)
|
||||
for (int y = 0; y < clip_h; ++y)
|
||||
{
|
||||
std::memcpy(dst, buf, buffer_pitch);
|
||||
dst += dst_pitch;
|
||||
|
@ -25,6 +25,7 @@
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKCompute.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKDMA.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKFormats.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKFragmentProgram.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKFramebuffer.h" />
|
||||
@ -43,6 +44,7 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommonDecompiler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKDMA.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFormats.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFragmentProgram.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
|
||||
|
@ -58,6 +58,9 @@
|
||||
<ClInclude Include="Emu\RSX\VK\VKFramebuffer.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\VK\VKDMA.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp">
|
||||
@ -105,5 +108,8 @@
|
||||
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Emu\RSX\VK\VKDMA.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue
Block a user