1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-23 11:13:19 +01:00

rsx: Fixups for data cast operations via typeless transfer

This commit is contained in:
kd-11 2019-04-05 14:39:43 +03:00 committed by kd-11
parent f04a0a2bb6
commit a5ed30a8c0
9 changed files with 207 additions and 86 deletions

View File

@ -627,7 +627,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
else
{
// Mem cast, generate typeless xfer info
if (src_bpp != dst_bpp || aspect() != src_texture->aspect())
if (!formats_are_bitcast_compatible((GLenum)get_internal_format(), (GLenum)src_texture->get_internal_format()) ||
aspect() != src_texture->aspect())
{
typeless_info.src_is_typeless = true;
typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage;

View File

@ -615,6 +615,97 @@ namespace gl
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
}
u32 get_format_texel_width(GLenum format)
{
switch (format)
{
case GL_R8:
return 1;
case GL_R32F:
case GL_RG16:
case GL_RG16F:
case GL_RGBA8:
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
return 4;
case GL_R16:
case GL_RG8:
case GL_RGB565:
return 2;
case GL_RGBA16F:
return 8;
case GL_RGBA32F:
return 16;
case GL_DEPTH_COMPONENT16:
return 2;
case GL_DEPTH24_STENCIL8:
case GL_DEPTH32F_STENCIL8:
return 4;
default:
fmt::throw_exception("Unexpected internal format 0x%X" HERE, (u32)format);
}
}
std::pair<bool, u32> get_format_convert_flags(GLenum format)
{
switch (format)
{
case GL_R8:
case GL_RG8:
case GL_RGBA8:
return { false, 1 };
case GL_R16:
case GL_RG16:
case GL_RG16F:
case GL_RGB565:
case GL_RGBA16F:
return { true, 2 };
case GL_R32F:
case GL_RGBA32F:
return { true, 4 };
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
return { false, 4 };
case GL_DEPTH_COMPONENT16:
return { true, 2 };
case GL_DEPTH24_STENCIL8:
case GL_DEPTH32F_STENCIL8:
return { true, 4 };
default:
fmt::throw_exception("Unexpected internal format 0x%X" HERE, (u32)format);
}
}
bool formats_are_bitcast_compatible(GLenum format1, GLenum format2)
{
if (LIKELY(format1 == format2))
{
return true;
}
// Formats are compatible if the following conditions are met:
// 1. Texel sizes must match
// 2. Both formats require no transforms (basic memcpy) or...
// 3. Both formats have the same transform (e.g RG16_UNORM to RG16_SFLOAT, both are down and uploaded with a 2-byte byteswap)
if (get_format_texel_width(format1) != get_format_texel_width(format2))
{
return false;
}
const auto transform_a = get_format_convert_flags(format1);
const auto transform_b = get_format_convert_flags(format2);
if (transform_a.first == transform_b.first)
{
return !transform_a.first || (transform_a.second == transform_b.second);
}
return false;
}
void copy_typeless(texture * dst, const texture * src)
{
GLsizeiptr src_mem = src->width() * src->height();

View File

@ -21,6 +21,7 @@ namespace gl
viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type);
bool formats_are_bitcast_compatible(GLenum format1, GLenum format2);
void copy_typeless(texture* dst, const texture* src);
/**
* is_swizzled - determines whether input bytes are in morton order

View File

@ -596,9 +596,10 @@ namespace gl
u16 x, u16 y, u16 width, u16 height, const texture_channel_remap_t& remap, bool copy)
{
if (sized_internal_fmt == GL_NONE)
{
sized_internal_fmt = gl::get_sized_internal_format(gcm_format);
}
const auto ifmt = static_cast<gl::texture::internal_format>(sized_internal_fmt);
std::unique_ptr<gl::texture> dst = std::make_unique<gl::viewable_image>(dst_type, width, height, 1, 1, sized_internal_fmt);
if (copy)
@ -615,15 +616,9 @@ namespace gl
}
std::array<GLenum, 4> swizzle;
if (!src || (GLenum)ifmt != sized_internal_fmt)
if (!src || (GLenum)src->get_internal_format() != sized_internal_fmt)
{
if (src)
{
//Format mismatch
warn_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt);
}
//Apply base component map onto the new texture if a data cast has been done
// Apply base component map onto the new texture if a data cast has been done
swizzle = get_component_mapping(gcm_format, rsx::texture_create_flags::default_component_order);
}
else
@ -685,8 +680,8 @@ namespace gl
if (!slice.src)
continue;
const auto src_bpp = slice.src->pitch() / slice.src->width();
const bool typeless = dst_bpp != src_bpp || dst_aspect != slice.src->aspect();
const bool typeless = dst_aspect != slice.src->aspect() ||
!formats_are_bitcast_compatible((GLenum)slice.src->get_internal_format(), (GLenum)dst_image->get_internal_format());
auto src_image = slice.src;
auto src_x = slice.src_x;
@ -694,6 +689,7 @@ namespace gl
if (UNLIKELY(typeless))
{
const auto src_bpp = slice.src->pitch() / slice.src->width();
const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp;
tmp = std::make_unique<texture>(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, (GLenum)dst_image->get_internal_format());
@ -842,10 +838,17 @@ namespace gl
return result;
}
void update_image_contents(gl::command_context&, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override
void update_image_contents(gl::command_context& cmd, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override
{
glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, 0, 0, 0,
dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
std::vector<copy_region_descriptor> region =
{{
src,
surface_transform::identity,
0, 0, 0, 0, 0,
width, height, width, height
}};
copy_transfer_regions_impl(cmd, dst->image(), region);
}
cached_texture_section* create_new_texture(gl::command_context&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,

View File

@ -368,4 +368,82 @@ namespace vk
fmt::throw_exception("Unexpected vkFormat 0x%X", (u32)format);
}
std::pair<bool, u32> get_format_convert_flags(VkFormat format)
{
switch (format)
{
//8-bit
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8_SNORM:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_R8G8B8A8_UNORM:
return{ false, 1 };
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
return{ true, 4 };
//16-bit
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16_UNORM:
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
return{ true, 2 };
//32-bit
case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
return{ true, 4 };
//DXT
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
case VK_FORMAT_BC2_SRGB_BLOCK:
case VK_FORMAT_BC3_SRGB_BLOCK:
return{ false, 1 };
//Depth
case VK_FORMAT_D16_UNORM:
return{ true, 2 };
case VK_FORMAT_D32_SFLOAT_S8_UINT:
case VK_FORMAT_D24_UNORM_S8_UINT:
return{ true, 4 };
}
fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format);
}
bool formats_are_bitcast_compatible(VkFormat format1, VkFormat format2)
{
if (LIKELY(format1 == format2))
{
return true;
}
// Formats are compatible if the following conditions are met:
// 1. Texel sizes must match
// 2. Both formats require no transforms (basic memcpy) or...
// 3. Both formats have the same transform (e.g RG16_UNORM to RG16_SFLOAT, both are down and uploaded with a 2-byte byteswap)
if (get_format_texel_width(format1) != get_format_texel_width(format2))
{
return false;
}
const auto transform_a = get_format_convert_flags(format1);
const auto transform_b = get_format_convert_flags(format2);
if (transform_a.first == transform_b.first)
{
return !transform_a.first || (transform_a.second == transform_b.second);
}
return false;
}
}

View File

@ -11,6 +11,8 @@ namespace vk
VkFormat get_compatible_srgb_format(VkFormat rgb_format);
u8 get_format_texel_width(VkFormat format);
std::pair<u8, u8> get_format_element_size(VkFormat format);
std::pair<bool, u32> get_format_convert_flags(VkFormat format);
bool formats_are_bitcast_compatible(VkFormat format1, VkFormat format2);
std::tuple<VkFilter, VkSamplerMipmapMode> get_min_filter_and_mip(rsx::texture_minify_filter min_filter);
VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter);

View File

@ -118,7 +118,8 @@ namespace vk
}
else
{
if (src_bpp != dst_bpp || src_texture->attachment_aspect_flag != attachment_aspect_flag)
if (!formats_are_bitcast_compatible(format(), src_texture->format()) ||
src_texture->attachment_aspect_flag != attachment_aspect_flag)
{
typeless_info.src_is_typeless = true;
typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage;

View File

@ -56,56 +56,6 @@ namespace vk
}
}
std::pair<bool, u32> get_format_convert_flags(VkFormat format)
{
switch (format)
{
//8-bit
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8_SNORM:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_R8G8B8A8_UNORM:
return{ false, 1 };
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
return{ true, 4 };
//16-bit
case VK_FORMAT_R16_UINT:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16_UNORM:
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
return{ true, 2 };
//32-bit
case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
return{ true, 4 };
//DXT
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
case VK_FORMAT_BC2_SRGB_BLOCK:
case VK_FORMAT_BC3_SRGB_BLOCK:
return{ false, 1 };
//Depth
case VK_FORMAT_D16_UNORM:
return{ true, 2 };
case VK_FORMAT_D32_SFLOAT_S8_UINT:
case VK_FORMAT_D24_UNORM_S8_UINT:
return{ true, 4 };
}
fmt::throw_exception("Unknown vkFormat 0x%x" HERE, (u32)format);
}
void copy_image_to_buffer(VkCommandBuffer cmd, const vk::image* src, const vk::buffer* dst, const VkBufferImageCopy& region)
{
switch (src->format())

View File

@ -506,8 +506,8 @@ namespace vk
if (!section.src)
continue;
const auto src_bpp = vk::get_format_texel_width(section.src->format());
const bool typeless = section.src->aspect() != dst_aspect || src_bpp != dst_bpp;
const bool typeless = section.src->aspect() != dst_aspect ||
!formats_are_bitcast_compatible(dst->format(), section.src->format());
section.src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
@ -517,6 +517,7 @@ namespace vk
src_image = vk::get_typeless_helper(dst->info.format, section.src_x + section.src_w, section.src_y + section.src_h);
src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
const auto src_bpp = vk::get_format_texel_width(section.src->format());
const u16 convert_w = u16(section.src_w * dst_bpp) / src_bpp;
const areai src_rect = coordi{{ section.src_x, section.src_y }, { convert_w, section.src_h }};
const areai dst_rect = coordi{{ section.src_x, section.src_y }, { section.src_w, section.src_h }};
@ -874,25 +875,18 @@ namespace vk
void update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) override
{
VkImage dst = dst_view->info.image;
VkImageAspectFlags aspect = vk::get_aspect_flags(src->info.format);
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
std::vector<copy_region_descriptor> region =
{{
src,
surface_transform::identity,
0, 0, 0, 0, 0,
width, height, width, height
}};
VkImageLayout old_src_layout = src->current_layout;
vk::change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { 0, 0, 0 };
copy_rgn.dstOffset = { 0, 0, 0 };
copy_rgn.dstSubresource = { aspect & ~(VK_IMAGE_ASPECT_DEPTH_BIT), 0, 0, 1 };
copy_rgn.srcSubresource = { aspect & ~(VK_IMAGE_ASPECT_DEPTH_BIT), 0, 0, 1 };
copy_rgn.extent = { width, height, 1 };
vkCmdCopyImage(cmd, src->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_rgn);
vk::change_image_layout(cmd, src, old_src_layout, subresource_range);
vk::change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
auto dst = dst_view->image();
dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
copy_transfer_regions_impl(cmd, dst, region);
dst->pop_layout(cmd);
}
cached_texture_section* create_new_texture(vk::command_buffer& cmd, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,