1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2025-01-31 12:31:45 +01:00

rsx: Preliminary support for format conversions using typeless resolve

This commit is contained in:
kd-11 2019-03-29 22:04:54 +03:00 committed by kd-11
parent b7470cfc1a
commit 366e4c2422
6 changed files with 481 additions and 342 deletions

View File

@ -951,14 +951,14 @@ namespace rsx
}
protected:
inline bool is_hw_blit_engine_compatible(u32 format) const
inline bool is_gcm_depth_format(u32 format) const
{
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
return true;
default:
return false;
@ -976,12 +976,12 @@ namespace rsx
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_G8B8:
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_R6G5B5:
//case CELL_GCM_TEXTURE_A4R4G4B4:
//case CELL_GCM_TEXTURE_G8B8:
//case CELL_GCM_TEXTURE_A1R5G5B5:
//case CELL_GCM_TEXTURE_R5G5B5A1:
//case CELL_GCM_TEXTURE_R5G6B5:
//case CELL_GCM_TEXTURE_R6G5B5:
return CELL_GCM_TEXTURE_DEPTH16;
}
@ -1731,8 +1731,9 @@ namespace rsx
// Intersect this resource with the original one
const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format());
const auto normalized_width = (section->get_width() * section_bpp) / bpp;
const auto clipped = rsx::intersect_region(address, slice_w, slice_h, bpp,
section->get_section_base(), section->get_width(), section->get_height(), section_bpp, pitch);
section->get_section_base(), normalized_width, section->get_height(), section_bpp, pitch);
const auto slice_begin = u32(slice * src_slice_h);
const auto slice_end = u32(slice_begin + slice_h);
@ -1747,6 +1748,7 @@ namespace rsx
return;
}
const u16 internal_clip_width = u16(std::get<2>(clipped).width * bpp) / section_bpp;
if (scaling)
{
// Since output is upscaled, also upscale on dst
@ -1759,15 +1761,15 @@ namespace rsx
rsx::apply_resolution_scale((u16)std::get<1>(clipped).x, true),
rsx::apply_resolution_scale((u16)std::get<1>(clipped).y, true),
slice,
(u16)std::get<2>(clipped).width,
internal_clip_width,
(u16)std::get<2>(clipped).height,
rsx::apply_resolution_scale((u16)std::get<2>(clipped).width, true),
rsx::apply_resolution_scale(internal_clip_width, true),
rsx::apply_resolution_scale((u16)std::get<2>(clipped).height, true),
});
}
else
{
const auto src_width = (u16)std::get<2>(clipped).width, dst_width = src_width;
const auto src_width = internal_clip_width, dst_width = src_width;
const auto src_height = (u16)std::get<2>(clipped).height, dst_height = src_height;
surfaces.push_back
({
@ -1893,7 +1895,6 @@ namespace rsx
{
texptr->read_barrier(cmd);
const bool is_depth = texptr->is_depth_surface();
const auto surface_width = texptr->get_surface_width();
const auto surface_height = texptr->get_surface_height();
@ -1901,6 +1902,25 @@ namespace rsx
u32 internal_height = tex_height;
get_native_dimensions(internal_width, internal_height, texptr);
bool is_depth = texptr->is_depth_surface();
const bool force_convert = !render_target_format_is_compatible(texptr, format);
if (const bool gcm_format_is_depth = is_gcm_depth_format(format);
gcm_format_is_depth != is_depth)
{
if (force_convert)
{
is_depth = gcm_format_is_depth;
}
else
{
format = get_compatible_depth_format(format);
}
// Always make sure the conflict is resolved!
verify(HERE), is_gcm_depth_format(format) == is_depth;
}
if (LIKELY(extended_dimension == rsx::texture_dimension_extended::texture_dimension_2d ||
extended_dimension == rsx::texture_dimension_extended::texture_dimension_1d))
{
@ -1912,12 +1932,12 @@ namespace rsx
if ((assume_bound && g_cfg.video.strict_rendering_mode) ||
internal_width < surface_width ||
internal_height < surface_height ||
!render_target_format_is_compatible(texptr, format))
force_convert)
{
const auto scaled_w = rsx::apply_resolution_scale(internal_width, true);
const auto scaled_h = rsx::apply_resolution_scale(internal_height, true);
auto command = assume_bound ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static;
const auto command = assume_bound ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static;
return { texptr->get_surface(), command, texaddr, format, 0, 0, scaled_w, scaled_h, 1,
texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y,
extended_dimension, decoded_remap };
@ -1965,7 +1985,16 @@ namespace rsx
if (is_depth = (select_hint == 0) ? fbos.back().is_depth : local.back()->is_depth_texture();
is_depth)
{
format = get_compatible_depth_format(format);
if (const auto suggested_format = get_compatible_depth_format(format);
!is_gcm_depth_format(suggested_format))
{
// Failed!
is_depth = false;
}
else
{
format = suggested_format;
}
}
// If this method was called, there is no easy solution, likely means atlas gather is needed
@ -2147,13 +2176,14 @@ namespace rsx
{
// Surface cache data is newer, check if this thing fits our search parameters
const auto& last = overlapping_fbos.back();
if (last.src_x == 0 && last.src_y == 0 && last.surface->get_bpp() == bpp)
if (last.src_x == 0 && last.src_y == 0)
{
u16 internal_width = tex_width;
u16 internal_height = required_surface_height;
get_native_dimensions(internal_width, internal_height, last.surface);
if (last.width >= internal_width && last.height >= internal_height)
u16 normalized_width = u16(last.width * last.surface->get_bpp()) / bpp;
if (normalized_width >= internal_width && last.height >= internal_height)
{
return process_framebuffer_resource_fast(cmd, last.surface, texaddr, format, tex_width, tex_height, depth,
scale_x, scale_y, extended_dimension, tex.remap(), tex.decoded_remap(), false);
@ -2163,12 +2193,66 @@ namespace rsx
else if (extended_dimension <= rsx::texture_dimension_extended::texture_dimension_2d)
{
const auto last = overlapping_locals.back();
const auto normalized_width = u16(last->get_width() * get_format_block_size_in_bytes(last->get_gcm_format())) / bpp;
if (last->get_section_base() == texaddr &&
get_format_block_size_in_bytes(last->get_gcm_format()) == bpp &&
last->get_width() >= tex_width && last->get_height() >= tex_height)
normalized_width >= tex_width && last->get_height() >= tex_height)
{
return { last->get_raw_texture(), deferred_request_command::copy_image_static, texaddr, format, 0, 0,
tex_width, tex_height, 1, last->get_context(), last->is_depth_texture(),
bool is_depth = last->is_depth_texture();
u32 gcm_format = format;
if (const auto gcm_format_is_depth = is_gcm_depth_format(format);
is_depth != gcm_format_is_depth)
{
// Conflict, resolve
if (gcm_format_is_depth)
{
is_depth = true;
}
else
{
const auto actual_format = last->get_gcm_format();
bool resolved = false;
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_D8R8G8B8:
{
// Compatible with D24S8_UINT
if (actual_format == CELL_GCM_TEXTURE_DEPTH24_D8)
{
gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8;
resolved = true;
is_depth = true;
}
break;
}
case CELL_GCM_TEXTURE_X16:
{
// Compatible with DEPTH16_UNORM
if (actual_format == CELL_GCM_TEXTURE_DEPTH16)
{
gcm_format = CELL_GCM_TEXTURE_DEPTH16;
resolved = true;
is_depth = true;
}
break;
}
}
if (!resolved)
{
LOG_ERROR(RSX, "Reading texture with gcm format 0x%x as unexpected cast with format 0x%x",
actual_format, format);
is_depth = gcm_format_is_depth;
}
}
}
return { last->get_raw_texture(), deferred_request_command::copy_image_static, texaddr, gcm_format, 0, 0,
tex_width, tex_height, 1, last->get_context(), is_depth,
scale_x, scale_y, extended_dimension, tex.decoded_remap() };
}
}

View File

@ -1322,6 +1322,13 @@ namespace gl
}
};
enum image_aspect : u32
{
color = 1,
depth = 2,
stencil = 4
};
class texture
{
public:
@ -1479,6 +1486,7 @@ namespace gl
GLuint m_mipmaps = 0;
GLuint m_pitch = 0;
GLuint m_compressed = GL_FALSE;
GLuint m_aspect_flags = 0;
target m_target = target::texture2D;
internal_format m_internal_format = internal_format::rgba8;
@ -1563,18 +1571,21 @@ namespace gl
m_height = height;
m_depth = depth;
m_mipmaps = mipmaps;
m_aspect_flags = image_aspect::color;
switch (sized_format)
{
case GL_DEPTH_COMPONENT16:
{
m_pitch = width * 2;
m_aspect_flags = image_aspect::depth;
break;
}
case GL_DEPTH24_STENCIL8:
case GL_DEPTH32F_STENCIL8:
{
m_pitch = width * 4;
m_aspect_flags = image_aspect::depth | image_aspect::stencil;
break;
}
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
@ -1688,6 +1699,11 @@ namespace gl
return m_compressed;
}
GLuint aspect() const
{
return m_aspect_flags;
}
sizei size2D() const
{
return{ (int)m_width, (int)m_height };
@ -1800,13 +1816,6 @@ namespace gl
}
};
enum image_aspect : u32
{
color = 1,
depth = 2,
stencil = 4
};
class texture_view
{
GLuint m_id = 0;
@ -1950,6 +1959,7 @@ public:
}
}
verify(HERE), aspect() & aspect_flags;
auto mapping = apply_swizzle_remap(get_native_component_layout(), remap);
auto view = std::make_unique<texture_view>(this, mapping.data(), aspect_flags);
auto result = view.get();

View File

@ -592,43 +592,26 @@ namespace gl
m_temporary_surfaces.resize(0);
}
gl::texture_view* create_temporary_subresource_impl(gl::texture* src, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format,
gl::texture_view* create_temporary_subresource_impl(gl::command_context& cmd, gl::texture* src, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format,
u16 x, u16 y, u16 width, u16 height, const texture_channel_remap_t& remap, bool copy)
{
if (sized_internal_fmt == GL_NONE)
sized_internal_fmt = gl::get_sized_internal_format(gcm_format);
gl::texture::internal_format ifmt = static_cast<gl::texture::internal_format>(sized_internal_fmt);
if (src)
{
ifmt = src->get_internal_format();
switch (ifmt)
{
case gl::texture::internal_format::depth16:
case gl::texture::internal_format::depth24_stencil8:
case gl::texture::internal_format::depth32f_stencil8:
//HACK! Should use typeless transfer instead
sized_internal_fmt = (GLenum)ifmt;
break;
}
}
const auto ifmt = static_cast<gl::texture::internal_format>(sized_internal_fmt);
std::unique_ptr<gl::texture> dst = std::make_unique<gl::viewable_image>(dst_type, width, height, 1, 1, sized_internal_fmt);
if (copy)
{
//Empty GL_ERROR
glGetError();
std::vector<copy_region_descriptor> region =
{{
src,
surface_transform::identity,
x, y, 0, 0, 0,
width, height, width, height
}};
glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, x, y, 0,
dst->id(), dst_type, 0, 0, 0, 0, width, height, 1);
//Check for error
if (GLenum err = glGetError())
{
LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err);
return nullptr;
}
copy_transfer_regions_impl(cmd, dst.get(), region);
}
std::array<GLenum, 4> swizzle;
@ -694,37 +677,56 @@ namespace gl
void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources) const
{
const auto dst_bpp = dst_image->pitch() / dst_image->width();
const auto dst_aspect = dst_image->aspect();
for (const auto &slice : sources)
{
if (!slice.src)
continue;
const auto src_bpp = slice.src->pitch() / slice.src->width();
const bool typeless = dst_bpp != src_bpp || dst_aspect != slice.src->aspect();
auto src_image = slice.src;
auto src_x = slice.src_x;
std::unique_ptr<gl::texture> tmp;
if (UNLIKELY(typeless))
{
const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp;
tmp = std::make_unique<texture>(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, (GLenum)dst_image->get_internal_format());
src_image = tmp.get();
src_x = u16(src_x * src_bpp) / dst_bpp;
gl::copy_typeless(src_image, slice.src);
}
if (slice.src_w == slice.dst_w && slice.src_h == slice.dst_h)
{
glCopyImageSubData(slice.src->id(), GL_TEXTURE_2D, 0, slice.src_x, slice.src_y, 0,
glCopyImageSubData(src_image->id(), GL_TEXTURE_2D, 0, src_x, slice.src_y, 0,
dst_image->id(), (GLenum)dst_image->get_target(), 0, slice.dst_x, slice.dst_y, slice.dst_z, slice.src_w, slice.src_h, 1);
}
else
{
verify(HERE), dst_image->get_target() == gl::texture::target::texture2D;
std::unique_ptr<gl::texture> tmp;
auto _dst = dst_image;
auto _blitter = gl::g_hw_blitter;
const areai src_rect = { slice.src_x, slice.src_y, slice.src_x + slice.src_w, slice.src_y + slice.src_h };
const areai src_rect = { src_x, slice.src_y, src_x + slice.src_w, slice.src_y + slice.src_h };
const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h };
if (UNLIKELY(slice.src->get_internal_format() != dst_image->get_internal_format()))
auto _dst = dst_image;
if (UNLIKELY(src_image->get_internal_format() != dst_image->get_internal_format()))
{
verify(HERE), !typeless;
tmp = std::make_unique<texture>(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, (GLenum)slice.src->get_internal_format());
_dst = tmp.get();
}
_blitter->scale_image(cmd, slice.src, _dst,
_blitter->scale_image(cmd, src_image, _dst,
src_rect, dst_rect, false, false, {});
if (tmp)
if (_dst != dst_image)
{
// Data cast comes after scaling
glCopyImageSubData(tmp->id(), GL_TEXTURE_2D, 0, slice.dst_x, slice.dst_y, 0,
@ -773,16 +775,16 @@ namespace gl
protected:
gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
gl::texture_view* create_temporary_subresource_view(gl::command_context &cmd, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
const texture_channel_remap_t& remap_vector) override
{
return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true);
return create_temporary_subresource_impl(cmd, *src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true);
}
gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
gl::texture_view* create_temporary_subresource_view(gl::command_context &cmd, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
const texture_channel_remap_t& remap_vector) override
{
return create_temporary_subresource_impl(src, (GLenum)src->get_internal_format(),
return create_temporary_subresource_impl(cmd, src, (GLenum)src->get_internal_format(),
GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true);
}
@ -834,7 +836,7 @@ namespace gl
const texture_channel_remap_t& remap_vector) override
{
auto _template = get_template_from_collection_impl(sections_to_copy);
auto result = create_temporary_subresource_impl(_template, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false);
auto result = create_temporary_subresource_impl(cmd, _template, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false);
copy_transfer_regions_impl(cmd, result->image(), sections_to_copy);
return result;

View File

@ -532,8 +532,7 @@ namespace vk
{
if (image->current_layout == new_layout) return;
VkImageAspectFlags flags = get_aspect_flags(image->info.format);
change_image_layout(cmd, image->value, image->current_layout, new_layout, { flags, 0, 1, 0, 1 });
change_image_layout(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, 1, 0, 1 });
image->current_layout = new_layout;
}

View File

@ -86,7 +86,7 @@ namespace vk
class swap_chain_image;
class physical_device;
class command_buffer;
struct image;
class image;
struct buffer;
struct data_heap;
class mem_allocator_base;
@ -642,8 +642,203 @@ namespace vk
}
};
struct image
class command_pool
{
vk::render_device *owner = nullptr;
VkCommandPool pool = nullptr;
public:
command_pool() {}
~command_pool() {}
void create(vk::render_device &dev)
{
owner = &dev;
VkCommandPoolCreateInfo infos = {};
infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool));
}
void destroy()
{
if (!pool)
return;
vkDestroyCommandPool((*owner), pool, nullptr);
pool = nullptr;
}
vk::render_device& get_owner()
{
return (*owner);
}
operator VkCommandPool()
{
return pool;
}
};
class command_buffer
{
private:
bool is_open = false;
bool is_pending = false;
VkFence m_submit_fence = VK_NULL_HANDLE;
protected:
vk::command_pool *pool = nullptr;
VkCommandBuffer commands = nullptr;
public:
enum access_type_hint
{
flush_only, //Only to be submitted/opened/closed via command flush
all //Auxiliary, can be submitted/opened/closed at any time
}
access_hint = flush_only;
enum command_buffer_data_flag : u32
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4
};
u32 flags = 0;
public:
command_buffer() {}
~command_buffer() {}
void create(vk::command_pool &cmd_pool, bool auto_reset = false)
{
VkCommandBufferAllocateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
infos.commandBufferCount = 1;
infos.commandPool = (VkCommandPool)cmd_pool;
infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands));
if (auto_reset)
{
VkFenceCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence));
}
pool = &cmd_pool;
}
void destroy()
{
vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands);
if (m_submit_fence)
{
vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr);
}
}
vk::command_pool& get_command_pool() const
{
return *pool;
}
void clear_flags()
{
flags = 0;
}
void set_flag(command_buffer_data_flag flag)
{
flags |= flag;
}
operator VkCommandBuffer() const
{
return commands;
}
bool is_recording() const
{
return is_open;
}
void begin()
{
if (m_submit_fence && is_pending)
{
wait_for_fence(m_submit_fence);
is_pending = false;
CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
if (is_open)
return;
VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
CHECK_RESULT(vkBeginCommandBuffer(commands, &begin_infos));
is_open = true;
}
void end()
{
if (!is_open)
{
LOG_ERROR(RSX, "commandbuffer->end was called but commandbuffer is not in a recording state");
return;
}
CHECK_RESULT(vkEndCommandBuffer(commands));
is_open = false;
}
void submit(VkQueue queue, const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{
if (is_open)
{
LOG_ERROR(RSX, "commandbuffer->submit was called whilst the command buffer is in a recording state");
return;
}
if (fence == VK_NULL_HANDLE)
{
fence = m_submit_fence;
is_pending = (fence != VK_NULL_HANDLE);
}
VkSubmitInfo infos = {};
infos.commandBufferCount = 1;
infos.pCommandBuffers = &commands;
infos.pWaitDstStageMask = &pipeline_stage_flags;
infos.pWaitSemaphores = semaphores.data();
infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size());
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
acquire_global_submit_lock();
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
release_global_submit_lock();
clear_flags();
}
};
class image
{
std::stack<VkImageLayout> m_layout_stack;
VkImageAspectFlags m_storage_aspect = 0;
public:
VkImage value = VK_NULL_HANDLE;
VkComponentMapping native_component_map = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A};
VkImageLayout current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@ -692,6 +887,8 @@ namespace vk
memory = std::make_shared<vk::memory_block>(m_device, memory_req.size, memory_req.alignment, memory_type_index);
CHECK_RESULT(vkBindImageMemory(m_device, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset()));
m_storage_aspect = get_aspect_flags(format);
}
// TODO: Ctor that uses a provided memory heap
@ -719,6 +916,40 @@ namespace vk
return info.extent.depth;
}
VkFormat format() const
{
return info.format;
}
VkImageAspectFlags aspect() const
{
return m_storage_aspect;
}
void push_layout(command_buffer& cmd, VkImageLayout layout)
{
m_layout_stack.push(current_layout);
change_image_layout(cmd, this, layout);
}
void pop_layout(command_buffer& cmd)
{
verify(HERE), !m_layout_stack.empty();
auto layout = m_layout_stack.top();
m_layout_stack.pop();
change_image_layout(cmd, this, layout);
}
void change_layout(command_buffer& cmd, VkImageLayout new_layout)
{
if (current_layout == new_layout)
return;
verify(HERE), m_layout_stack.empty();
change_image_layout(cmd, this, new_layout);
}
private:
VkDevice m_device;
};
@ -851,7 +1082,9 @@ namespace vk
remap
);
const auto range = vk::get_image_subresource_range(0, 0, info.arrayLayers, info.mipLevels, get_aspect_flags(info.format) & mask);
const auto range = vk::get_image_subresource_range(0, 0, info.arrayLayers, info.mipLevels, aspect() & mask);
verify(HERE), range.aspectMask;
auto view = std::make_unique<vk::image_view>(*get_current_renderer(), this, real_mapping, range);
auto result = view.get();
@ -1110,197 +1343,6 @@ namespace vk
VkDevice m_device;
};
class command_pool
{
vk::render_device *owner = nullptr;
VkCommandPool pool = nullptr;
public:
command_pool() {}
~command_pool() {}
void create(vk::render_device &dev)
{
owner = &dev;
VkCommandPoolCreateInfo infos = {};
infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool));
}
void destroy()
{
if (!pool)
return;
vkDestroyCommandPool((*owner), pool, nullptr);
pool = nullptr;
}
vk::render_device& get_owner()
{
return (*owner);
}
operator VkCommandPool()
{
return pool;
}
};
class command_buffer
{
private:
bool is_open = false;
bool is_pending = false;
VkFence m_submit_fence = VK_NULL_HANDLE;
protected:
vk::command_pool *pool = nullptr;
VkCommandBuffer commands = nullptr;
public:
enum access_type_hint
{
flush_only, //Only to be submitted/opened/closed via command flush
all //Auxiliary, can be submitted/opened/closed at any time
}
access_hint = flush_only;
enum command_buffer_data_flag : u32
{
cb_has_occlusion_task = 1,
cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4
};
u32 flags = 0;
public:
command_buffer() {}
~command_buffer() {}
void create(vk::command_pool &cmd_pool, bool auto_reset = false)
{
VkCommandBufferAllocateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
infos.commandBufferCount = 1;
infos.commandPool = (VkCommandPool)cmd_pool;
infos.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
CHECK_RESULT(vkAllocateCommandBuffers(cmd_pool.get_owner(), &infos, &commands));
if (auto_reset)
{
VkFenceCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence));
}
pool = &cmd_pool;
}
void destroy()
{
vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands);
if (m_submit_fence)
{
vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr);
}
}
vk::command_pool& get_command_pool() const
{
return *pool;
}
void clear_flags()
{
flags = 0;
}
void set_flag(command_buffer_data_flag flag)
{
flags |= flag;
}
operator VkCommandBuffer() const
{
return commands;
}
bool is_recording() const
{
return is_open;
}
void begin()
{
if (m_submit_fence && is_pending)
{
wait_for_fence(m_submit_fence);
is_pending = false;
CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
}
if (is_open)
return;
VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
CHECK_RESULT(vkBeginCommandBuffer(commands, &begin_infos));
is_open = true;
}
void end()
{
if (!is_open)
{
LOG_ERROR(RSX, "commandbuffer->end was called but commandbuffer is not in a recording state");
return;
}
CHECK_RESULT(vkEndCommandBuffer(commands));
is_open = false;
}
void submit(VkQueue queue, const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{
if (is_open)
{
LOG_ERROR(RSX, "commandbuffer->submit was called whilst the command buffer is in a recording state");
return;
}
if (fence == VK_NULL_HANDLE)
{
fence = m_submit_fence;
is_pending = (fence != VK_NULL_HANDLE);
}
VkSubmitInfo infos = {};
infos.commandBufferCount = 1;
infos.pCommandBuffers = &commands;
infos.pWaitDstStageMask = &pipeline_stage_flags;
infos.pWaitSemaphores = semaphores.data();
infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size());
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
acquire_global_submit_lock();
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
release_global_submit_lock();
clear_flags();
}
};
class swapchain_image_WSI
{
VkImageView view = nullptr;

View File

@ -498,25 +498,44 @@ namespace vk
void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer) const
{
const auto dst_aspect = dst->aspect();
const auto dst_bpp = vk::get_format_texel_width(dst->format());
for (const auto &section : sections_to_transfer)
{
if (!section.src)
continue;
VkImageAspectFlags dst_aspect = vk::get_aspect_flags(dst->info.format);
VkImageAspectFlags src_aspect = vk::get_aspect_flags(section.src->info.format);
VkImageSubresourceRange src_range = { src_aspect, 0, 1, 0, 1 };
const auto src_bpp = vk::get_format_texel_width(section.src->format());
const bool typeless = section.src->aspect() != dst_aspect || src_bpp != dst_bpp;
if (section.src_w == section.dst_w && section.src_h == section.dst_h &&
section.xform == surface_transform::identity)
section.src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
auto src_image = section.src;
if (UNLIKELY(typeless))
{
VkImageLayout old_src_layout = section.src->current_layout;
VkImageCopy copy_rgn;
src_image = vk::get_typeless_helper(dst->info.format, section.src_x + section.src_w, section.src_y + section.src_h);
src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
const u16 convert_w = u16(section.src_w * dst_bpp) / src_bpp;
const areai src_rect = coordi{{ section.src_x, section.src_y }, { convert_w, section.src_h }};
const areai dst_rect = coordi{{ section.src_x, section.src_y }, { section.src_w, section.src_h }};
vk::copy_image_typeless(cmd, section.src, src_image, src_rect, dst_rect, 1, section.src->aspect(), dst_aspect);
src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
}
verify(HERE), src_image->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
// Final aspect mask of the 'final' transfer source
const auto new_src_aspect = src_image->aspect();
if (LIKELY(section.src_w == section.dst_w && section.src_h == section.dst_h && section.xform == surface_transform::identity))
{
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { section.src_x, section.src_y, 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.dstSubresource = { dst_aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { new_src_aspect, 0, 0, 1 };
copy_rgn.extent = { section.src_w, section.src_h, 1 };
if (dst->info.imageType == VK_IMAGE_TYPE_3D)
@ -528,77 +547,79 @@ namespace vk
copy_rgn.dstSubresource.baseArrayLayer = section.dst_z;
}
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, &copy_rgn);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
vkCmdCopyImage(cmd, src_image->value, src_image->current_layout, dst->value, dst->current_layout, 1, &copy_rgn);
}
else
{
verify(HERE), section.dst_z == 0;
u16 dst_x = section.dst_x, dst_y = section.dst_y;
auto xform = section.xform;
vk::image* _dst;
if (LIKELY(section.src->info.format == dst->info.format))
if (LIKELY(src_image->info.format == dst->info.format))
{
_dst = dst;
}
else
{
_dst = vk::get_typeless_helper(section.src->info.format, dst->width(), dst->height() * 2);
vk::change_image_layout(cmd, _dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, src_range);
verify(HERE), !typeless;
_dst = vk::get_typeless_helper(src_image->info.format, dst->width(), dst->height() * 2);
_dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
if (section.xform == surface_transform::identity)
{
vk::copy_scaled_image(cmd, section.src->value, _dst->value, section.src->current_layout, _dst->current_layout,
vk::copy_scaled_image(cmd, src_image->value, _dst->value, section.src->current_layout, _dst->current_layout,
coordi{ { section.src_x, section.src_y }, { section.src_w, section.src_h } },
coordi{ { section.dst_x, section.dst_y }, { section.dst_w, section.dst_h } },
1, src_aspect, section.src->info.format == _dst->info.format,
VK_FILTER_NEAREST, section.src->info.format, _dst->info.format);
1, src_image->aspect(), src_image->info.format == _dst->info.format,
VK_FILTER_NEAREST, src_image->info.format, _dst->info.format);
}
else if (section.xform == surface_transform::argb_to_bgra)
{
VkImageLayout old_src_layout = section.src->current_layout;
VkBufferImageCopy copy{};
copy.imageExtent = { section.src_w, section.src_h, 1 };
copy.imageOffset = { section.src_x, section.src_y, 0 };
copy.imageSubresource = { src_aspect, 0, 0, 1 };
copy.imageSubresource = { src_image->aspect(), 0, 0, 1 };
auto scratch_buf = vk::get_scratch_buffer();
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
vkCmdCopyImageToBuffer(cmd, section.src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, scratch_buf->value, 1, &copy);
vkCmdCopyImageToBuffer(cmd, src_image->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, scratch_buf->value, 1, &copy);
const auto length = section.src->width() * section.src->width() * 4;
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
const auto mem_length = section.src_w * section.src_h * dst_bpp;
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
auto shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
shuffle_kernel->run(cmd, scratch_buf, length);
shuffle_kernel->run(cmd, scratch_buf, mem_length);
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
auto tmp = vk::get_typeless_helper(section.src->info.format, section.dst_x + section.dst_w, section.dst_y + section.dst_h);
vk::change_image_layout(cmd, tmp, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { src_aspect, 0, 1, 0, 1 });
copy.imageOffset = { 0, 0, 0 };
auto tmp = vk::get_typeless_helper(src_image->info.format, section.dst_x + section.dst_w, section.dst_y + section.dst_h);
tmp->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
copy.imageOffset = { 0, 0, 0 };
vkCmdCopyBufferToImage(cmd, scratch_buf->value, tmp->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);
if (UNLIKELY(tmp == _dst))
dst_x = 0;
dst_y = 0;
if (section.src_w != section.dst_w || section.src_h != section.dst_h)
{
dst_x = 0;
dst_y = section.src_h;
// Optionally scale if needed
if (UNLIKELY(tmp == _dst))
{
dst_y = section.src_h;
}
vk::copy_scaled_image(cmd, tmp->value, _dst->value, tmp->current_layout, _dst->current_layout,
areai{ 0, 0, section.src_w, (s32)section.src_h },
coordi{ { dst_x, dst_y }, { section.dst_w, section.dst_h } },
1, new_src_aspect, tmp->info.format == _dst->info.format,
VK_FILTER_NEAREST, tmp->info.format, _dst->info.format);
}
vk::copy_scaled_image(cmd, tmp->value, _dst->value, tmp->current_layout, _dst->current_layout,
areai{ 0, 0, (s32)section.src_w, (s32)section.src_h },
coordi{ {dst_x, dst_y}, {section.dst_w, section.dst_h} },
1, src_aspect, section.src->info.format == _dst->info.format,
VK_FILTER_NEAREST, tmp->info.format, _dst->info.format);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
}
else
{
@ -608,18 +629,19 @@ namespace vk
if (UNLIKELY(_dst != dst))
{
// Casting comes after the scaling!
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { s32(dst_x), s32(dst_y), 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.dstSubresource = { dst_aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { _dst->aspect(), 0, 0, 1 };
copy_rgn.extent = { section.dst_w, section.dst_h, 1 };
vk::change_image_layout(cmd, _dst, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
_dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vkCmdCopyImage(cmd, _dst->value, _dst->current_layout, dst->value, dst->current_layout, 1, &copy_rgn);
}
}
section.src->pop_layout(cmd);
}
}
@ -664,24 +686,9 @@ namespace vk
std::unique_ptr<vk::image> image;
std::unique_ptr<vk::image_view> view;
VkImageAspectFlags aspect;
VkImageCreateFlags image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format);
if (source)
{
aspect = vk::get_aspect_flags(source->info.format);
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT ||
vk::get_format_texel_width(dst_format) != vk::get_format_texel_width(source->info.format))
{
//HACK! Should use typeless transfer
dst_format = source->info.format;
}
}
else
{
aspect = vk::get_aspect_flags(dst_format);
}
VkImageAspectFlags aspect = vk::get_aspect_flags(dst_format);
image.reset(new vk::viewable_image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type,
@ -714,22 +721,17 @@ namespace vk
if (copy)
{
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
VkImageLayout old_src_layout = source->current_layout;
std::vector<copy_region_descriptor> region =
{{
source,
surface_transform::identity,
x, y, 0, 0, 0,
w, h, w, h
}};
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vk::change_image_layout(cmd, source, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { (s32)x, (s32)y, 0 };
copy_rgn.dstOffset = { (s32)0, (s32)0, 0 };
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
copy_rgn.extent = { w, h, 1 };
vkCmdCopyImage(cmd, source->value, source->current_layout, image->value, image->current_layout, 1, &copy_rgn);
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
vk::change_image_layout(cmd, source, old_src_layout, subresource_range);
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
copy_transfer_regions_impl(cmd, image.get(), region);
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
const u32 resource_memory = w * h * 4; //Rough approximate