1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 20:22:30 +01:00

vk: Add host-imported DMA buffers

This commit is contained in:
kd-11 2021-01-10 20:25:02 +03:00 committed by kd-11
parent a1ab6c28c1
commit 58d367d704
8 changed files with 245 additions and 50 deletions

View File

@ -10,17 +10,17 @@
namespace vk
{
static constexpr usz s_dma_block_length = 0x01000000;
static constexpr u32 s_dma_block_mask = 0xFF000000;
//static constexpr u32 s_dma_offset_mask = 0x00FFFFFF;
static constexpr usz s_dma_block_length = 0x00001000;//0x01000000;
static constexpr u32 s_dma_block_mask = 0xFFFFF000;//0xFF000000;
//static constexpr u32 s_dma_offset_mask = 0x00000FFF;//0x00FFFFFF;
static constexpr u32 s_page_size = 16384;
static constexpr u32 s_page_align = s_page_size - 1;
static constexpr u32 s_pages_per_entry = 32;
static constexpr u32 s_bits_per_page = 2;
static constexpr u32 s_bytes_per_entry = (s_page_size * s_pages_per_entry);
static constexpr u32 s_page_size = 16384;
static constexpr u32 s_page_align = s_page_size - 1;
static constexpr u32 s_pages_per_entry = 32;
static constexpr u32 s_bits_per_page = 2;
static constexpr u32 s_bytes_per_entry = (s_page_size * s_pages_per_entry);
std::unordered_map<u32, dma_block> g_dma_pool;
std::unordered_map<u32, std::unique_ptr<dma_block>> g_dma_pool;
void* dma_block::map_range(const utils::address_range& range)
{
@ -47,16 +47,28 @@ namespace vk
}
}
void dma_block::allocate(const render_device& dev, usz size)
{
if (allocated_memory)
{
// Acquired blocks are always to be assumed dirty. It is not possible to synchronize host access and inline
// buffer copies without causing weird issues. Overlapped incomplete data ends up overwriting host-uploaded data.
auto gc = vk::get_resource_manager();
gc->dispose(allocated_memory);
}
allocated_memory = std::make_unique<vk::buffer>(dev, size,
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
}
void dma_block::init(const render_device& dev, u32 addr, usz size)
{
ensure(size);
ensure(!(size % s_dma_block_length));
base_address = addr;
allocated_memory = std::make_unique<vk::buffer>(dev, size,
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
allocate(dev, size);
page_info.resize(size / s_bytes_per_entry, ~0ull);
}
@ -70,7 +82,7 @@ namespace vk
void dma_block::set_page_bit(u32 offset, u64 bits)
{
const auto entry = (offset / s_bytes_per_entry);
const auto word = entry / s_pages_per_entry;
const auto word = entry / s_pages_per_entry;
const auto shift = (entry % s_pages_per_entry) * s_bits_per_page;
page_info[word] &= ~(3 << shift);
@ -202,24 +214,16 @@ namespace vk
}
}
void dma_block::extend(const command_buffer& cmd, const render_device &dev, usz new_size)
void dma_block::extend(const command_buffer& cmd, const render_device& dev, usz new_size)
{
ensure(allocated_memory);
if (new_size <= allocated_memory->size())
return;
allocate(dev, new_size);
const auto required_entries = new_size / s_bytes_per_entry;
page_info.resize(required_entries, ~0ull);
auto new_allocation = std::make_unique<vk::buffer>(dev, new_size,
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
// Acquired blocks are always to be assumed dirty. It is not possible to synchronize host access and inline
// buffer copies without causing weird issues. Overlapped incomplete data ends up overwriting host-uploaded data.
auto gc = vk::get_resource_manager();
gc->dispose(allocated_memory);
allocated_memory = std::move(new_allocation);
}
u32 dma_block::start() const
@ -238,6 +242,48 @@ namespace vk
return (allocated_memory) ? allocated_memory->size() : 0;
}
void dma_block_EXT::allocate(const render_device& dev, usz size)
{
if (allocated_memory)
{
// Acquired blocks are always to be assumed dirty. It is not possible to synchronize host access and inline
// buffer copies without causing weird issues. Overlapped incomplete data ends up overwriting host-uploaded data.
auto gc = vk::get_resource_manager();
gc->dispose(allocated_memory);
}
allocated_memory = std::make_unique<vk::buffer>(dev,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
vm::get_super_ptr<void>(base_address),
size);
}
void* dma_block_EXT::map_range(const utils::address_range& range)
{
return vm::get_super_ptr<void>(range.start);
}
void dma_block_EXT::unmap()
{
// NOP
}
void dma_block_EXT::flush(const utils::address_range& range)
{
// NOP
}
void dma_block_EXT::load(const utils::address_range& range)
{
// NOP
}
void create_dma_block(std::unique_ptr<dma_block>& block)
{
// TODO
block.reset(new dma_block_EXT());
}
std::pair<u32, vk::buffer*> map_dma(const command_buffer& cmd, u32 local_address, u32 length)
{
const auto map_range = utils::address_range::start_length(local_address, length);
@ -247,17 +293,19 @@ namespace vk
if (auto found = g_dma_pool.find(first_block); found != g_dma_pool.end())
{
if (found->second.end() >= limit)
if (found->second->end() >= limit)
{
return found->second.get(map_range);
return found->second->get(map_range);
}
}
if (first_block == last_block) [[likely]]
{
auto &block_info = g_dma_pool[first_block];
block_info.init(*g_render_device, first_block, s_dma_block_length);
return block_info.get(map_range);
if (!block_info) create_dma_block(block_info);
block_info->init(*g_render_device, first_block, s_dma_block_length);
return block_info->get(map_range);
}
dma_block* block_head = nullptr;
@ -268,7 +316,7 @@ namespace vk
{
if (auto found = g_dma_pool.find(block); found != g_dma_pool.end())
{
const auto end = found->second.end();
const auto end = found->second->end();
last_block = std::max(last_block, end & s_dma_block_mask);
block_end = std::max(block_end, end + 1);
@ -279,8 +327,10 @@ namespace vk
for (auto block = first_block; block <= last_block; block += s_dma_block_length)
{
auto found = g_dma_pool.find(block);
const bool exists = (found != g_dma_pool.end());
auto entry = exists ? &found->second : &g_dma_pool[block];
auto &entry = g_dma_pool[block];
const bool exists = !!entry;
if (!exists) create_dma_block(entry);
if (block == first_block)
{
@ -326,16 +376,16 @@ namespace vk
u32 block = (local_address & s_dma_block_mask);
if (auto found = g_dma_pool.find(block); found != g_dma_pool.end())
{
const auto sync_end = std::min(limit, found->second.end());
const auto sync_end = std::min(limit, found->second->end());
const auto range = utils::address_range::start_end(local_address, sync_end);
if constexpr (load)
{
found->second.load(range);
found->second->load(range);
}
else
{
found->second.flush(range);
found->second->flush(range);
}
if (sync_end < limit) [[unlikely]]

View File

@ -12,6 +12,7 @@ namespace vk
class dma_block
{
protected:
enum page_bits
{
synchronized = 0,
@ -30,8 +31,9 @@ namespace vk
std::unique_ptr<buffer> allocated_memory;
std::vector<u64> page_info;
void* map_range(const utils::address_range& range);
void unmap();
virtual void allocate(const render_device& dev, usz size);
virtual void* map_range(const utils::address_range& range);
virtual void unmap();
void set_page_bit(u32 page, u64 bits);
bool test_page_bit(u32 page, u64 bits);
@ -40,10 +42,10 @@ namespace vk
public:
void init(const render_device& dev, u32 addr, usz size);
void init(dma_block* parent, u32 addr, usz size);
void flush(const utils::address_range& range);
void load(const utils::address_range& range);
virtual void init(const render_device& dev, u32 addr, usz size);
virtual void init(dma_block* parent, u32 addr, usz size);
virtual void flush(const utils::address_range& range);
virtual void load(const utils::address_range& range);
std::pair<u32, buffer*> get(const utils::address_range& range);
u32 start() const;
@ -52,7 +54,19 @@ namespace vk
dma_block* head();
const dma_block* head() const;
void set_parent(const command_buffer& cmd, dma_block* parent);
void extend(const command_buffer& cmd, const render_device& dev, usz new_size);
virtual void set_parent(const command_buffer& cmd, dma_block* parent);
virtual void extend(const command_buffer& cmd, const render_device& dev, usz new_size);
};
class dma_block_EXT: public dma_block
{
private:
void allocate(const render_device& dev, usz size) override;
void* map_range(const utils::address_range& range) override;
void unmap() override;
public:
void flush(const utils::address_range& range) override;
void load(const utils::address_range& range) override;
};
}

View File

@ -66,6 +66,53 @@ namespace vk
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset());
}
buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size)
: m_device(dev)
{
info.size = size;
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
info.flags = 0;
info.usage = usage;
VkExternalMemoryBufferCreateInfoKHR ex_info;
ex_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR;
ex_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
ex_info.pNext = nullptr;
info.pNext = &ex_info;
CHECK_RESULT(vkCreateBuffer(m_device, &info, nullptr, &value));
auto& memory_map = dev.get_memory_mapping();
u32 memory_type_index = memory_map.host_visible_coherent;
VkFlags access_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
ensure(memory_map.getMemoryHostPointerPropertiesEXT);
VkMemoryHostPointerPropertiesEXT memory_properties{};
memory_properties.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT;
memory_map.getMemoryHostPointerPropertiesEXT(dev, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, host_pointer, &memory_properties);
VkMemoryRequirements memory_reqs;
vkGetBufferMemoryRequirements(m_device, value, &memory_reqs);
auto required_memory_type_bits = memory_reqs.memoryTypeBits & memory_properties.memoryTypeBits;
if (!required_memory_type_bits)
{
// AMD driver bug. Buffers created with external memory extension return type bits of 0
rsx_log.warning("Could not match buffer requirements and host pointer properties.");
required_memory_type_bits = memory_properties.memoryTypeBits;
}
if (!dev.get_compatible_memory_type(required_memory_type_bits, access_flags, &memory_type_index))
{
fmt::throw_exception("No compatible memory type was found!");
}
memory = std::make_unique<memory_block_host>(m_device, host_pointer, size, memory_type_index);
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset());
}
buffer::~buffer()
{
vkDestroyBuffer(m_device, value, nullptr);

View File

@ -30,6 +30,7 @@ namespace vk
std::unique_ptr<vk::memory_block> memory;
buffer(const vk::render_device& dev, u64 size, u32 memory_type_index, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags);
buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size);
~buffer();
void* map(u64 offset, u64 size);

View File

@ -56,6 +56,7 @@ namespace vk
stencil_export_support = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
conditional_render_support = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
external_memory_host_support = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
unrestricted_depth_range_support = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
}
@ -262,6 +263,11 @@ namespace vk
requested_extensions.push_back(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
}
if (pgpu->external_memory_host_support)
{
requested_extensions.push_back(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
@ -362,6 +368,11 @@ namespace vk
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
if (pgpu->external_memory_host_support)
{
memory_map.getMemoryHostPointerPropertiesEXT = reinterpret_cast<PFN_vkGetMemoryHostPointerPropertiesEXT>(vkGetDeviceProcAddr(dev, "vkGetMemoryHostPointerPropertiesEXT"));
}
if (g_cfg.video.disable_vulkan_mem_allocator)
m_allocator = std::make_unique<vk::mem_allocator_vk>(dev, pdev);
else

View File

@ -30,6 +30,8 @@ namespace vk
{
u32 host_visible_coherent;
u32 device_local;
PFN_vkGetMemoryHostPointerPropertiesEXT getMemoryHostPointerPropertiesEXT;
};
class physical_device
@ -47,6 +49,7 @@ namespace vk
bool stencil_export_support = false;
bool conditional_render_support = false;
bool external_memory_host_support = false;
bool unrestricted_depth_range_support = false;
friend class render_device;

View File

@ -189,7 +189,53 @@ namespace vk
memory_block::~memory_block()
{
m_mem_allocator->free(m_mem_handle);
if (m_mem_allocator)
{
m_mem_allocator->free(m_mem_handle);
}
}
memory_block_host::memory_block_host(VkDevice dev, void* host_pointer, u64 size, u32 memory_type_index) :
m_device(dev), m_mem_handle(VK_NULL_HANDLE), m_host_pointer(host_pointer)
{
VkMemoryAllocateInfo alloc_info{};
VkImportMemoryHostPointerInfoEXT import_info{};
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
alloc_info.memoryTypeIndex = memory_type_index;
alloc_info.allocationSize = size;
alloc_info.pNext = &import_info;
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT;
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
import_info.pHostPointer = host_pointer;
CHECK_RESULT(vkAllocateMemory(m_device, &alloc_info, nullptr, &m_mem_handle));
}
memory_block_host::~memory_block_host()
{
vkFreeMemory(m_device, m_mem_handle, nullptr);
}
VkDeviceMemory memory_block_host::get_vk_device_memory()
{
return m_mem_handle;
}
u64 memory_block_host::get_vk_device_memory_offset()
{
return 0ull;
}
void* memory_block_host::map(u64 offset, u64 size)
{
return reinterpret_cast<char*>(m_host_pointer) + offset;
}
void memory_block_host::unmap()
{
// NOP
}
VkDeviceMemory memory_block::get_vk_device_memory()

View File

@ -82,23 +82,46 @@ namespace vk
struct memory_block
{
memory_block(VkDevice dev, u64 block_sz, u64 alignment, u32 memory_type_index);
~memory_block();
virtual ~memory_block();
VkDeviceMemory get_vk_device_memory();
u64 get_vk_device_memory_offset();
virtual VkDeviceMemory get_vk_device_memory();
virtual u64 get_vk_device_memory_offset();
void* map(u64 offset, u64 size);
void unmap();
virtual void* map(u64 offset, u64 size);
virtual void unmap();
memory_block(const memory_block&) = delete;
memory_block(memory_block&&) = delete;
protected:
memory_block() = default;
private:
VkDevice m_device;
vk::mem_allocator_base* m_mem_allocator;
vk::mem_allocator_base* m_mem_allocator = nullptr;
mem_allocator_base::mem_handle_t m_mem_handle;
};
struct memory_block_host : public memory_block
{
memory_block_host(VkDevice dev, void* host_pointer, u64 size, u32 memory_type_index);
~memory_block_host();
VkDeviceMemory get_vk_device_memory() override;
u64 get_vk_device_memory_offset() override;
void* map(u64 offset, u64 size) override;
void unmap() override;
memory_block_host(const memory_block_host&) = delete;
memory_block_host(memory_block_host&&) = delete;
memory_block_host() = delete;
private:
VkDevice m_device;
VkDeviceMemory m_mem_handle;
void* m_host_pointer;
};
void vmm_notify_memory_allocated(void* handle, u32 memory_type, u64 memory_size);
void vmm_notify_memory_freed(void* handle);
void vmm_reset();