1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-26 04:32:35 +01:00

rsx: Fix decompression of RB_RG textures.

- Removes several subtle hacks that hid the real issue.
  A compressed texture has more than one texel per 'block'.
This commit is contained in:
kd-11 2021-04-10 23:11:26 +03:00 committed by kd-11
parent e4059dfe6a
commit 06dc99ab85
4 changed files with 129 additions and 84 deletions

View File

@ -200,33 +200,43 @@ struct copy_unmodified_block_vtc
struct copy_decoded_rb_rg_block
{
template<typename T, typename U>
static void copy_mipmap_level(gsl::span<T> dst, gsl::span<const U> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
template <bool SwapWords = false, typename T>
static void copy_mipmap_level(gsl::span<u32> dst, gsl::span<const T> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
{
static_assert(sizeof(T) == 4, "Type size doesn't match.");
static_assert(sizeof(U) == 2, "Type size doesn't match.");
u32 src_offset = 0;
u32 dst_offset = 0;
// Temporaries
u32 red0, red1, blue, green;
for (int row = 0; row < row_count * depth; ++row)
{
for (int col = 0; col < width_in_block; col += 2)
for (int col = 0; col < width_in_block; ++col)
{
// Process 2 pixels at a time and write in BGRA format
const u16 src0 = src[src_offset + col]; // R,B
const u16 src1 = src[src_offset + col + 1]; // R,G
const u32 blue = (src0 & 0xFF00) >> 8;
const u32 green = (src1 & 0xFF00);
const u32 data0 = blue | green | (src0 & 0xFF) << 16 | 0xFF << 24;
const u32 data1 = blue | green | (src1 & 0xFF) << 16 | 0xFF << 24;
// Decompress one block to 2 pixels at a time and write output in BGRA format
const auto data = src[src_offset + col];
dst[dst_offset + col] = data0;
if (!(width_in_block & 0x1))
if constexpr (SwapWords)
{
// If size is even, fill in the second pixel
dst[dst_offset + col + 1] = data1;
// BR_GR
blue = (data >> 0) & 0xFF;
red0 = (data >> 8) & 0xFF;
green = (data >> 16) & 0XFF;
red1 = (data >> 24) & 0xFF;
}
else
{
// RB_RG
red0 = (data >> 0) & 0xFF;
blue = (data >> 8) & 0xFF;
red1 = (data >> 16) & 0XFF;
green = (data >> 24) & 0xFF;
}
dst[dst_offset + (col * 2)] = blue | (green << 8) | (red0 << 16) | (0xFF << 24);
dst[dst_offset + (col * 2 + 1)] = blue | (green << 8) | (red1 << 16) | (0xFF << 24);
}
src_offset += src_pitch_in_block;
@ -394,11 +404,17 @@ namespace
current_subresource_layout.width_in_block = miplevel_width_in_texel;
current_subresource_layout.height_in_block = miplevel_height_in_texel;
}
else
else if constexpr (block_edge_in_texel == 4)
{
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
current_subresource_layout.height_in_block = utils::aligned_div(miplevel_height_in_texel, block_edge_in_texel);
}
else
{
// Only the width is compressed
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
current_subresource_layout.height_in_block = miplevel_height_in_texel;
}
if (padded_row)
{
@ -520,6 +536,7 @@ std::vector<rsx::subresource_layout> get_subresources_layout_impl(const RsxTextu
return get_subresources_layout_impl<1, u8>(pixels, w, h, depth, layer, texture.get_exact_mipmap_count(), pitch, !is_swizzled, has_border);
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return get_subresources_layout_impl<2, u32>(pixels, w, h, depth, layer, texture.get_exact_mipmap_count(), pitch, !is_swizzled, has_border);
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case CELL_GCM_TEXTURE_DEPTH16:
@ -610,13 +627,13 @@ namespace rsx
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
{
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
copy_decoded_rb_rg_block::copy_mipmap_level<true>(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
break;
}
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
{
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u16>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
break;
}
@ -809,6 +826,46 @@ namespace rsx
return result;
}
bool is_compressed_host_format(u32 texture_format)
{
switch (texture_format)
{
case CELL_GCM_TEXTURE_B8:
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_G8B8:
case CELL_GCM_TEXTURE_R6G5B5:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_Y16_X16:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
case CELL_GCM_TEXTURE_X32_FLOAT:
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
// The following formats are compressed in RSX/GCM but not on the host device.
// They are decompressed in sw before uploading
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return false;
// True compressed formats on the host device
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return true;
}
fmt::throw_exception("Unknown format 0x%x", texture_format);
}
/**
* A texture is stored as an array of blocks, where a block is a pixel for standard texture
* but is a structure containing several pixels for compressed format
@ -875,9 +932,9 @@ namespace rsx
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
case CELL_GCM_TEXTURE_X32_FLOAT:
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return 1;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 1;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 2;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 4;

View File

@ -161,6 +161,7 @@ namespace rsx
u8 get_format_block_size_in_bytes(rsx::surface_color_format format);
u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format);
bool is_compressed_host_format(u32 format); // Returns true for host-compressed formats (DXT)
u8 get_format_sample_count(rsx::surface_antialiasing antialias);
u32 get_max_depth_value(rsx::surface_depth_format2 format);
bool is_depth_stencil_format(rsx::surface_depth_format2 format);

View File

@ -358,43 +358,6 @@ namespace gl
set_parameteri(GL_TEXTURE_COMPARE_MODE, GL_NONE);
}
bool is_compressed_format(u32 texture_format)
{
switch (texture_format)
{
case CELL_GCM_TEXTURE_B8:
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_G8B8:
case CELL_GCM_TEXTURE_R6G5B5:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_Y16_X16:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
case CELL_GCM_TEXTURE_X32_FLOAT:
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_D8R8G8B8:
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
return false;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
return true;
}
fmt::throw_exception("Unknown format 0x%x", texture_format);
}
std::array<GLenum, 4> get_swizzle_remap(u32 texture_format)
{
// NOTE: This must be in ARGB order in all forms below.
@ -610,7 +573,7 @@ namespace gl
gl::viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps,
rsx::texture_dimension_extended type)
{
if (is_compressed_format(gcm_format))
if (rsx::is_compressed_host_format(gcm_format))
{
//Compressed formats have a 4-byte alignment
//TODO: Verify that samplers are not affected by the padding
@ -634,7 +597,7 @@ namespace gl
pixel_unpack_settings unpack_settings;
unpack_settings.row_length(0).alignment(4);
if (is_compressed_format(format)) [[likely]]
if (rsx::is_compressed_host_format(format)) [[likely]]
{
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;

View File

@ -832,21 +832,61 @@ namespace vk
return *pcmd;
}
static const std::pair<u32, u32> calculate_upload_pitch(int format, u32 heap_align, vk::image* dst_image, const rsx::subresource_layout& layout)
{
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
u32 row_pitch, upload_pitch_in_texel;
if (!heap_align) [[likely]]
{
if (!layout.border) [[likely]]
{
row_pitch = (layout.pitch_in_block * block_size_in_bytes);
}
else
{
// Skip the border texels if possible. Padding is undesirable for GPU deswizzle
row_pitch = (layout.width_in_block * block_size_in_bytes);
}
// We have row_pitch in source coordinates. But some formats have a software decode step which can affect this packing!
// For such formats, the packed pitch on src does not match packed pitch on dst
if (!rsx::is_compressed_host_format(format))
{
const auto host_texel_width = vk::get_format_texel_width(dst_image->format());
const auto host_packed_pitch = host_texel_width * dst_image->width();
row_pitch = std::max(row_pitch, host_packed_pitch);
upload_pitch_in_texel = row_pitch / host_texel_width;
}
else
{
upload_pitch_in_texel = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
}
}
else
{
row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, heap_align);
upload_pitch_in_texel = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
ensure(row_pitch == heap_align);
}
return { row_pitch, upload_pitch_in_texel };
}
void upload_image(const vk::command_buffer& cmd, vk::image* dst_image,
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 /*mipmap_count*/,
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags)
{
const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT);
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
rsx::texture_uploader_capabilities caps{ .alignment = heap_align };
rsx::texture_memory_info opt{};
bool check_caps = true;
vk::buffer* scratch_buf = nullptr;
u32 scratch_offset = 0;
u32 row_pitch, image_linear_size;
u32 image_linear_size;
vk::buffer* upload_buffer = nullptr;
usz offset_in_upload_buffer = 0;
@ -858,26 +898,10 @@ namespace vk
for (const rsx::subresource_layout &layout : subresource_layout)
{
if (!heap_align) [[likely]]
{
if (!layout.border) [[likely]]
{
row_pitch = (layout.pitch_in_block * block_size_in_bytes);
}
else
{
// Skip the border texels if possible. Padding is undesirable for GPU deswizzle
row_pitch = (layout.width_in_block * block_size_in_bytes);
}
caps.alignment = row_pitch;
}
else
{
row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, heap_align);
ensure(row_pitch == heap_align);
}
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout);
caps.alignment = row_pitch;
// Calculate estimated memory utilization for this subresource
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
// Map with extra padding bytes in case of realignment
@ -908,7 +932,7 @@ namespace vk
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.baseArrayLayer = layout.layer;
copy_info.imageSubresource.mipLevel = layout.level;
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
copy_info.bufferRowLength = upload_pitch_in_texel;
upload_buffer = upload_heap.heap.get();
@ -993,7 +1017,7 @@ namespace vk
upload_commands.back().second++;
}
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * layout.pitch_in_block, layout.width_in_texel);
copy_info.bufferRowLength = upload_pitch_in_texel;
}
}