mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 20:22:30 +01:00
vk: Add host-imported DMA buffers
This commit is contained in:
parent
a1ab6c28c1
commit
58d367d704
@ -10,17 +10,17 @@
|
||||
|
||||
namespace vk
|
||||
{
|
||||
static constexpr usz s_dma_block_length = 0x01000000;
|
||||
static constexpr u32 s_dma_block_mask = 0xFF000000;
|
||||
//static constexpr u32 s_dma_offset_mask = 0x00FFFFFF;
|
||||
static constexpr usz s_dma_block_length = 0x00001000;//0x01000000;
|
||||
static constexpr u32 s_dma_block_mask = 0xFFFFF000;//0xFF000000;
|
||||
//static constexpr u32 s_dma_offset_mask = 0x00000FFF;//0x00FFFFFF;
|
||||
|
||||
static constexpr u32 s_page_size = 16384;
|
||||
static constexpr u32 s_page_align = s_page_size - 1;
|
||||
static constexpr u32 s_pages_per_entry = 32;
|
||||
static constexpr u32 s_bits_per_page = 2;
|
||||
static constexpr u32 s_bytes_per_entry = (s_page_size * s_pages_per_entry);
|
||||
static constexpr u32 s_page_size = 16384;
|
||||
static constexpr u32 s_page_align = s_page_size - 1;
|
||||
static constexpr u32 s_pages_per_entry = 32;
|
||||
static constexpr u32 s_bits_per_page = 2;
|
||||
static constexpr u32 s_bytes_per_entry = (s_page_size * s_pages_per_entry);
|
||||
|
||||
std::unordered_map<u32, dma_block> g_dma_pool;
|
||||
std::unordered_map<u32, std::unique_ptr<dma_block>> g_dma_pool;
|
||||
|
||||
void* dma_block::map_range(const utils::address_range& range)
|
||||
{
|
||||
@ -47,16 +47,28 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
void dma_block::allocate(const render_device& dev, usz size)
|
||||
{
|
||||
if (allocated_memory)
|
||||
{
|
||||
// Acquired blocks are always to be assumed dirty. It is not possible to synchronize host access and inline
|
||||
// buffer copies without causing weird issues. Overlapped incomplete data ends up overwriting host-uploaded data.
|
||||
auto gc = vk::get_resource_manager();
|
||||
gc->dispose(allocated_memory);
|
||||
}
|
||||
|
||||
allocated_memory = std::make_unique<vk::buffer>(dev, size,
|
||||
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
}
|
||||
|
||||
void dma_block::init(const render_device& dev, u32 addr, usz size)
|
||||
{
|
||||
ensure(size);
|
||||
ensure(!(size % s_dma_block_length));
|
||||
base_address = addr;
|
||||
|
||||
allocated_memory = std::make_unique<vk::buffer>(dev, size,
|
||||
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
|
||||
allocate(dev, size);
|
||||
page_info.resize(size / s_bytes_per_entry, ~0ull);
|
||||
}
|
||||
|
||||
@ -70,7 +82,7 @@ namespace vk
|
||||
void dma_block::set_page_bit(u32 offset, u64 bits)
|
||||
{
|
||||
const auto entry = (offset / s_bytes_per_entry);
|
||||
const auto word = entry / s_pages_per_entry;
|
||||
const auto word = entry / s_pages_per_entry;
|
||||
const auto shift = (entry % s_pages_per_entry) * s_bits_per_page;
|
||||
|
||||
page_info[word] &= ~(3 << shift);
|
||||
@ -202,24 +214,16 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
void dma_block::extend(const command_buffer& cmd, const render_device &dev, usz new_size)
|
||||
void dma_block::extend(const command_buffer& cmd, const render_device& dev, usz new_size)
|
||||
{
|
||||
ensure(allocated_memory);
|
||||
if (new_size <= allocated_memory->size())
|
||||
return;
|
||||
|
||||
allocate(dev, new_size);
|
||||
|
||||
const auto required_entries = new_size / s_bytes_per_entry;
|
||||
page_info.resize(required_entries, ~0ull);
|
||||
|
||||
auto new_allocation = std::make_unique<vk::buffer>(dev, new_size,
|
||||
dev.get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||
|
||||
// Acquired blocks are always to be assumed dirty. It is not possible to synchronize host access and inline
|
||||
// buffer copies without causing weird issues. Overlapped incomplete data ends up overwriting host-uploaded data.
|
||||
auto gc = vk::get_resource_manager();
|
||||
gc->dispose(allocated_memory);
|
||||
allocated_memory = std::move(new_allocation);
|
||||
}
|
||||
|
||||
u32 dma_block::start() const
|
||||
@ -238,6 +242,48 @@ namespace vk
|
||||
return (allocated_memory) ? allocated_memory->size() : 0;
|
||||
}
|
||||
|
||||
void dma_block_EXT::allocate(const render_device& dev, usz size)
|
||||
{
|
||||
if (allocated_memory)
|
||||
{
|
||||
// Acquired blocks are always to be assumed dirty. It is not possible to synchronize host access and inline
|
||||
// buffer copies without causing weird issues. Overlapped incomplete data ends up overwriting host-uploaded data.
|
||||
auto gc = vk::get_resource_manager();
|
||||
gc->dispose(allocated_memory);
|
||||
}
|
||||
|
||||
allocated_memory = std::make_unique<vk::buffer>(dev,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
vm::get_super_ptr<void>(base_address),
|
||||
size);
|
||||
}
|
||||
|
||||
void* dma_block_EXT::map_range(const utils::address_range& range)
|
||||
{
|
||||
return vm::get_super_ptr<void>(range.start);
|
||||
}
|
||||
|
||||
void dma_block_EXT::unmap()
|
||||
{
|
||||
// NOP
|
||||
}
|
||||
|
||||
void dma_block_EXT::flush(const utils::address_range& range)
|
||||
{
|
||||
// NOP
|
||||
}
|
||||
|
||||
void dma_block_EXT::load(const utils::address_range& range)
|
||||
{
|
||||
// NOP
|
||||
}
|
||||
|
||||
void create_dma_block(std::unique_ptr<dma_block>& block)
|
||||
{
|
||||
// TODO
|
||||
block.reset(new dma_block_EXT());
|
||||
}
|
||||
|
||||
std::pair<u32, vk::buffer*> map_dma(const command_buffer& cmd, u32 local_address, u32 length)
|
||||
{
|
||||
const auto map_range = utils::address_range::start_length(local_address, length);
|
||||
@ -247,17 +293,19 @@ namespace vk
|
||||
|
||||
if (auto found = g_dma_pool.find(first_block); found != g_dma_pool.end())
|
||||
{
|
||||
if (found->second.end() >= limit)
|
||||
if (found->second->end() >= limit)
|
||||
{
|
||||
return found->second.get(map_range);
|
||||
return found->second->get(map_range);
|
||||
}
|
||||
}
|
||||
|
||||
if (first_block == last_block) [[likely]]
|
||||
{
|
||||
auto &block_info = g_dma_pool[first_block];
|
||||
block_info.init(*g_render_device, first_block, s_dma_block_length);
|
||||
return block_info.get(map_range);
|
||||
if (!block_info) create_dma_block(block_info);
|
||||
|
||||
block_info->init(*g_render_device, first_block, s_dma_block_length);
|
||||
return block_info->get(map_range);
|
||||
}
|
||||
|
||||
dma_block* block_head = nullptr;
|
||||
@ -268,7 +316,7 @@ namespace vk
|
||||
{
|
||||
if (auto found = g_dma_pool.find(block); found != g_dma_pool.end())
|
||||
{
|
||||
const auto end = found->second.end();
|
||||
const auto end = found->second->end();
|
||||
last_block = std::max(last_block, end & s_dma_block_mask);
|
||||
block_end = std::max(block_end, end + 1);
|
||||
|
||||
@ -279,8 +327,10 @@ namespace vk
|
||||
for (auto block = first_block; block <= last_block; block += s_dma_block_length)
|
||||
{
|
||||
auto found = g_dma_pool.find(block);
|
||||
const bool exists = (found != g_dma_pool.end());
|
||||
auto entry = exists ? &found->second : &g_dma_pool[block];
|
||||
auto &entry = g_dma_pool[block];
|
||||
|
||||
const bool exists = !!entry;
|
||||
if (!exists) create_dma_block(entry);
|
||||
|
||||
if (block == first_block)
|
||||
{
|
||||
@ -326,16 +376,16 @@ namespace vk
|
||||
u32 block = (local_address & s_dma_block_mask);
|
||||
if (auto found = g_dma_pool.find(block); found != g_dma_pool.end())
|
||||
{
|
||||
const auto sync_end = std::min(limit, found->second.end());
|
||||
const auto sync_end = std::min(limit, found->second->end());
|
||||
const auto range = utils::address_range::start_end(local_address, sync_end);
|
||||
|
||||
if constexpr (load)
|
||||
{
|
||||
found->second.load(range);
|
||||
found->second->load(range);
|
||||
}
|
||||
else
|
||||
{
|
||||
found->second.flush(range);
|
||||
found->second->flush(range);
|
||||
}
|
||||
|
||||
if (sync_end < limit) [[unlikely]]
|
||||
|
@ -12,6 +12,7 @@ namespace vk
|
||||
|
||||
class dma_block
|
||||
{
|
||||
protected:
|
||||
enum page_bits
|
||||
{
|
||||
synchronized = 0,
|
||||
@ -30,8 +31,9 @@ namespace vk
|
||||
std::unique_ptr<buffer> allocated_memory;
|
||||
std::vector<u64> page_info;
|
||||
|
||||
void* map_range(const utils::address_range& range);
|
||||
void unmap();
|
||||
virtual void allocate(const render_device& dev, usz size);
|
||||
virtual void* map_range(const utils::address_range& range);
|
||||
virtual void unmap();
|
||||
|
||||
void set_page_bit(u32 page, u64 bits);
|
||||
bool test_page_bit(u32 page, u64 bits);
|
||||
@ -40,10 +42,10 @@ namespace vk
|
||||
|
||||
public:
|
||||
|
||||
void init(const render_device& dev, u32 addr, usz size);
|
||||
void init(dma_block* parent, u32 addr, usz size);
|
||||
void flush(const utils::address_range& range);
|
||||
void load(const utils::address_range& range);
|
||||
virtual void init(const render_device& dev, u32 addr, usz size);
|
||||
virtual void init(dma_block* parent, u32 addr, usz size);
|
||||
virtual void flush(const utils::address_range& range);
|
||||
virtual void load(const utils::address_range& range);
|
||||
std::pair<u32, buffer*> get(const utils::address_range& range);
|
||||
|
||||
u32 start() const;
|
||||
@ -52,7 +54,19 @@ namespace vk
|
||||
|
||||
dma_block* head();
|
||||
const dma_block* head() const;
|
||||
void set_parent(const command_buffer& cmd, dma_block* parent);
|
||||
void extend(const command_buffer& cmd, const render_device& dev, usz new_size);
|
||||
virtual void set_parent(const command_buffer& cmd, dma_block* parent);
|
||||
virtual void extend(const command_buffer& cmd, const render_device& dev, usz new_size);
|
||||
};
|
||||
|
||||
class dma_block_EXT: public dma_block
|
||||
{
|
||||
private:
|
||||
void allocate(const render_device& dev, usz size) override;
|
||||
void* map_range(const utils::address_range& range) override;
|
||||
void unmap() override;
|
||||
|
||||
public:
|
||||
void flush(const utils::address_range& range) override;
|
||||
void load(const utils::address_range& range) override;
|
||||
};
|
||||
}
|
||||
|
@ -66,6 +66,53 @@ namespace vk
|
||||
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset());
|
||||
}
|
||||
|
||||
buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size)
|
||||
: m_device(dev)
|
||||
{
|
||||
info.size = size;
|
||||
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
info.flags = 0;
|
||||
info.usage = usage;
|
||||
|
||||
VkExternalMemoryBufferCreateInfoKHR ex_info;
|
||||
ex_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR;
|
||||
ex_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
|
||||
ex_info.pNext = nullptr;
|
||||
|
||||
info.pNext = &ex_info;
|
||||
CHECK_RESULT(vkCreateBuffer(m_device, &info, nullptr, &value));
|
||||
|
||||
auto& memory_map = dev.get_memory_mapping();
|
||||
u32 memory_type_index = memory_map.host_visible_coherent;
|
||||
VkFlags access_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
|
||||
|
||||
ensure(memory_map.getMemoryHostPointerPropertiesEXT);
|
||||
|
||||
VkMemoryHostPointerPropertiesEXT memory_properties{};
|
||||
memory_properties.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT;
|
||||
memory_map.getMemoryHostPointerPropertiesEXT(dev, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, host_pointer, &memory_properties);
|
||||
|
||||
VkMemoryRequirements memory_reqs;
|
||||
vkGetBufferMemoryRequirements(m_device, value, &memory_reqs);
|
||||
|
||||
auto required_memory_type_bits = memory_reqs.memoryTypeBits & memory_properties.memoryTypeBits;
|
||||
if (!required_memory_type_bits)
|
||||
{
|
||||
// AMD driver bug. Buffers created with external memory extension return type bits of 0
|
||||
rsx_log.warning("Could not match buffer requirements and host pointer properties.");
|
||||
required_memory_type_bits = memory_properties.memoryTypeBits;
|
||||
}
|
||||
|
||||
if (!dev.get_compatible_memory_type(required_memory_type_bits, access_flags, &memory_type_index))
|
||||
{
|
||||
fmt::throw_exception("No compatible memory type was found!");
|
||||
}
|
||||
|
||||
memory = std::make_unique<memory_block_host>(m_device, host_pointer, size, memory_type_index);
|
||||
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset());
|
||||
}
|
||||
|
||||
buffer::~buffer()
|
||||
{
|
||||
vkDestroyBuffer(m_device, value, nullptr);
|
||||
|
@ -30,6 +30,7 @@ namespace vk
|
||||
std::unique_ptr<vk::memory_block> memory;
|
||||
|
||||
buffer(const vk::render_device& dev, u64 size, u32 memory_type_index, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags);
|
||||
buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size);
|
||||
~buffer();
|
||||
|
||||
void* map(u64 offset, u64 size);
|
||||
|
@ -56,6 +56,7 @@ namespace vk
|
||||
|
||||
stencil_export_support = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
|
||||
conditional_render_support = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
|
||||
external_memory_host_support = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
||||
unrestricted_depth_range_support = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
@ -262,6 +263,11 @@ namespace vk
|
||||
requested_extensions.push_back(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (pgpu->external_memory_host_support)
|
||||
{
|
||||
requested_extensions.push_back(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
enabled_features.robustBufferAccess = VK_TRUE;
|
||||
enabled_features.fullDrawIndexUint32 = VK_TRUE;
|
||||
enabled_features.independentBlend = VK_TRUE;
|
||||
@ -362,6 +368,11 @@ namespace vk
|
||||
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
|
||||
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
|
||||
|
||||
if (pgpu->external_memory_host_support)
|
||||
{
|
||||
memory_map.getMemoryHostPointerPropertiesEXT = reinterpret_cast<PFN_vkGetMemoryHostPointerPropertiesEXT>(vkGetDeviceProcAddr(dev, "vkGetMemoryHostPointerPropertiesEXT"));
|
||||
}
|
||||
|
||||
if (g_cfg.video.disable_vulkan_mem_allocator)
|
||||
m_allocator = std::make_unique<vk::mem_allocator_vk>(dev, pdev);
|
||||
else
|
||||
|
@ -30,6 +30,8 @@ namespace vk
|
||||
{
|
||||
u32 host_visible_coherent;
|
||||
u32 device_local;
|
||||
|
||||
PFN_vkGetMemoryHostPointerPropertiesEXT getMemoryHostPointerPropertiesEXT;
|
||||
};
|
||||
|
||||
class physical_device
|
||||
@ -47,6 +49,7 @@ namespace vk
|
||||
|
||||
bool stencil_export_support = false;
|
||||
bool conditional_render_support = false;
|
||||
bool external_memory_host_support = false;
|
||||
bool unrestricted_depth_range_support = false;
|
||||
|
||||
friend class render_device;
|
||||
|
@ -189,7 +189,53 @@ namespace vk
|
||||
|
||||
memory_block::~memory_block()
|
||||
{
|
||||
m_mem_allocator->free(m_mem_handle);
|
||||
if (m_mem_allocator)
|
||||
{
|
||||
m_mem_allocator->free(m_mem_handle);
|
||||
}
|
||||
}
|
||||
|
||||
memory_block_host::memory_block_host(VkDevice dev, void* host_pointer, u64 size, u32 memory_type_index) :
|
||||
m_device(dev), m_mem_handle(VK_NULL_HANDLE), m_host_pointer(host_pointer)
|
||||
{
|
||||
VkMemoryAllocateInfo alloc_info{};
|
||||
VkImportMemoryHostPointerInfoEXT import_info{};
|
||||
|
||||
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
alloc_info.memoryTypeIndex = memory_type_index;
|
||||
alloc_info.allocationSize = size;
|
||||
alloc_info.pNext = &import_info;
|
||||
|
||||
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT;
|
||||
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
|
||||
import_info.pHostPointer = host_pointer;
|
||||
|
||||
CHECK_RESULT(vkAllocateMemory(m_device, &alloc_info, nullptr, &m_mem_handle));
|
||||
}
|
||||
|
||||
memory_block_host::~memory_block_host()
|
||||
{
|
||||
vkFreeMemory(m_device, m_mem_handle, nullptr);
|
||||
}
|
||||
|
||||
VkDeviceMemory memory_block_host::get_vk_device_memory()
|
||||
{
|
||||
return m_mem_handle;
|
||||
}
|
||||
|
||||
u64 memory_block_host::get_vk_device_memory_offset()
|
||||
{
|
||||
return 0ull;
|
||||
}
|
||||
|
||||
void* memory_block_host::map(u64 offset, u64 size)
|
||||
{
|
||||
return reinterpret_cast<char*>(m_host_pointer) + offset;
|
||||
}
|
||||
|
||||
void memory_block_host::unmap()
|
||||
{
|
||||
// NOP
|
||||
}
|
||||
|
||||
VkDeviceMemory memory_block::get_vk_device_memory()
|
||||
|
@ -82,23 +82,46 @@ namespace vk
|
||||
struct memory_block
|
||||
{
|
||||
memory_block(VkDevice dev, u64 block_sz, u64 alignment, u32 memory_type_index);
|
||||
~memory_block();
|
||||
virtual ~memory_block();
|
||||
|
||||
VkDeviceMemory get_vk_device_memory();
|
||||
u64 get_vk_device_memory_offset();
|
||||
virtual VkDeviceMemory get_vk_device_memory();
|
||||
virtual u64 get_vk_device_memory_offset();
|
||||
|
||||
void* map(u64 offset, u64 size);
|
||||
void unmap();
|
||||
virtual void* map(u64 offset, u64 size);
|
||||
virtual void unmap();
|
||||
|
||||
memory_block(const memory_block&) = delete;
|
||||
memory_block(memory_block&&) = delete;
|
||||
|
||||
protected:
|
||||
memory_block() = default;
|
||||
|
||||
private:
|
||||
VkDevice m_device;
|
||||
vk::mem_allocator_base* m_mem_allocator;
|
||||
vk::mem_allocator_base* m_mem_allocator = nullptr;
|
||||
mem_allocator_base::mem_handle_t m_mem_handle;
|
||||
};
|
||||
|
||||
struct memory_block_host : public memory_block
|
||||
{
|
||||
memory_block_host(VkDevice dev, void* host_pointer, u64 size, u32 memory_type_index);
|
||||
~memory_block_host();
|
||||
|
||||
VkDeviceMemory get_vk_device_memory() override;
|
||||
u64 get_vk_device_memory_offset() override;
|
||||
void* map(u64 offset, u64 size) override;
|
||||
void unmap() override;
|
||||
|
||||
memory_block_host(const memory_block_host&) = delete;
|
||||
memory_block_host(memory_block_host&&) = delete;
|
||||
memory_block_host() = delete;
|
||||
|
||||
private:
|
||||
VkDevice m_device;
|
||||
VkDeviceMemory m_mem_handle;
|
||||
void* m_host_pointer;
|
||||
};
|
||||
|
||||
void vmm_notify_memory_allocated(void* handle, u32 memory_type, u64 memory_size);
|
||||
void vmm_notify_memory_freed(void* handle);
|
||||
void vmm_reset();
|
||||
|
Loading…
Reference in New Issue
Block a user