mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-26 04:32:35 +01:00
rsx: Fix decompression of RB_RG textures.
- Removes several subtle hacks that hid the real issue. A compressed texture has more than one texel per 'block'.
This commit is contained in:
parent
e4059dfe6a
commit
06dc99ab85
@ -200,33 +200,43 @@ struct copy_unmodified_block_vtc
|
||||
|
||||
struct copy_decoded_rb_rg_block
|
||||
{
|
||||
template<typename T, typename U>
|
||||
static void copy_mipmap_level(gsl::span<T> dst, gsl::span<const U> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
|
||||
template <bool SwapWords = false, typename T>
|
||||
static void copy_mipmap_level(gsl::span<u32> dst, gsl::span<const T> src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block)
|
||||
{
|
||||
static_assert(sizeof(T) == 4, "Type size doesn't match.");
|
||||
static_assert(sizeof(U) == 2, "Type size doesn't match.");
|
||||
|
||||
u32 src_offset = 0;
|
||||
u32 dst_offset = 0;
|
||||
|
||||
// Temporaries
|
||||
u32 red0, red1, blue, green;
|
||||
|
||||
for (int row = 0; row < row_count * depth; ++row)
|
||||
{
|
||||
for (int col = 0; col < width_in_block; col += 2)
|
||||
for (int col = 0; col < width_in_block; ++col)
|
||||
{
|
||||
// Process 2 pixels at a time and write in BGRA format
|
||||
const u16 src0 = src[src_offset + col]; // R,B
|
||||
const u16 src1 = src[src_offset + col + 1]; // R,G
|
||||
const u32 blue = (src0 & 0xFF00) >> 8;
|
||||
const u32 green = (src1 & 0xFF00);
|
||||
const u32 data0 = blue | green | (src0 & 0xFF) << 16 | 0xFF << 24;
|
||||
const u32 data1 = blue | green | (src1 & 0xFF) << 16 | 0xFF << 24;
|
||||
// Decompress one block to 2 pixels at a time and write output in BGRA format
|
||||
const auto data = src[src_offset + col];
|
||||
|
||||
dst[dst_offset + col] = data0;
|
||||
if (!(width_in_block & 0x1))
|
||||
if constexpr (SwapWords)
|
||||
{
|
||||
// If size is even, fill in the second pixel
|
||||
dst[dst_offset + col + 1] = data1;
|
||||
// BR_GR
|
||||
blue = (data >> 0) & 0xFF;
|
||||
red0 = (data >> 8) & 0xFF;
|
||||
green = (data >> 16) & 0XFF;
|
||||
red1 = (data >> 24) & 0xFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
// RB_RG
|
||||
red0 = (data >> 0) & 0xFF;
|
||||
blue = (data >> 8) & 0xFF;
|
||||
red1 = (data >> 16) & 0XFF;
|
||||
green = (data >> 24) & 0xFF;
|
||||
}
|
||||
|
||||
dst[dst_offset + (col * 2)] = blue | (green << 8) | (red0 << 16) | (0xFF << 24);
|
||||
dst[dst_offset + (col * 2 + 1)] = blue | (green << 8) | (red1 << 16) | (0xFF << 24);
|
||||
}
|
||||
|
||||
src_offset += src_pitch_in_block;
|
||||
@ -394,11 +404,17 @@ namespace
|
||||
current_subresource_layout.width_in_block = miplevel_width_in_texel;
|
||||
current_subresource_layout.height_in_block = miplevel_height_in_texel;
|
||||
}
|
||||
else
|
||||
else if constexpr (block_edge_in_texel == 4)
|
||||
{
|
||||
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
||||
current_subresource_layout.height_in_block = utils::aligned_div(miplevel_height_in_texel, block_edge_in_texel);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Only the width is compressed
|
||||
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
||||
current_subresource_layout.height_in_block = miplevel_height_in_texel;
|
||||
}
|
||||
|
||||
if (padded_row)
|
||||
{
|
||||
@ -520,6 +536,7 @@ std::vector<rsx::subresource_layout> get_subresources_layout_impl(const RsxTextu
|
||||
return get_subresources_layout_impl<1, u8>(pixels, w, h, depth, layer, texture.get_exact_mipmap_count(), pitch, !is_swizzled, has_border);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
|
||||
return get_subresources_layout_impl<2, u32>(pixels, w, h, depth, layer, texture.get_exact_mipmap_count(), pitch, !is_swizzled, has_border);
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
|
||||
case CELL_GCM_TEXTURE_DEPTH16:
|
||||
@ -610,13 +627,13 @@ namespace rsx
|
||||
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
|
||||
{
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const be_t<u16>>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level<true>(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
|
||||
{
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u16>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(w, caps.alignment), src_layout.pitch_in_block);
|
||||
copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround<u32>(dst_buffer), as_const_span<const u32>(src_layout.data), w, h, depth, get_row_pitch_in_block<u32>(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -809,6 +826,46 @@ namespace rsx
|
||||
return result;
|
||||
}
|
||||
|
||||
bool is_compressed_host_format(u32 texture_format)
|
||||
{
|
||||
switch (texture_format)
|
||||
{
|
||||
case CELL_GCM_TEXTURE_B8:
|
||||
case CELL_GCM_TEXTURE_A1R5G5B5:
|
||||
case CELL_GCM_TEXTURE_A4R4G4B4:
|
||||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_G8B8:
|
||||
case CELL_GCM_TEXTURE_R6G5B5:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
|
||||
case CELL_GCM_TEXTURE_DEPTH16:
|
||||
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
|
||||
case CELL_GCM_TEXTURE_X16:
|
||||
case CELL_GCM_TEXTURE_Y16_X16:
|
||||
case CELL_GCM_TEXTURE_R5G5B5A1:
|
||||
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
|
||||
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||
case CELL_GCM_TEXTURE_X32_FLOAT:
|
||||
case CELL_GCM_TEXTURE_D1R5G5B5:
|
||||
case CELL_GCM_TEXTURE_D8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
|
||||
// The following formats are compressed in RSX/GCM but not on the host device.
|
||||
// They are decompressed in sw before uploading
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
|
||||
return false;
|
||||
// True compressed formats on the host device
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
|
||||
return true;
|
||||
}
|
||||
fmt::throw_exception("Unknown format 0x%x", texture_format);
|
||||
}
|
||||
|
||||
/**
|
||||
* A texture is stored as an array of blocks, where a block is a pixel for standard texture
|
||||
* but is a structure containing several pixels for compressed format
|
||||
@ -875,9 +932,9 @@ namespace rsx
|
||||
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||
case CELL_GCM_TEXTURE_X32_FLOAT:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return 1;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 1;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 2;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 4;
|
||||
|
@ -161,6 +161,7 @@ namespace rsx
|
||||
u8 get_format_block_size_in_bytes(rsx::surface_color_format format);
|
||||
u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format);
|
||||
|
||||
bool is_compressed_host_format(u32 format); // Returns true for host-compressed formats (DXT)
|
||||
u8 get_format_sample_count(rsx::surface_antialiasing antialias);
|
||||
u32 get_max_depth_value(rsx::surface_depth_format2 format);
|
||||
bool is_depth_stencil_format(rsx::surface_depth_format2 format);
|
||||
|
@ -358,43 +358,6 @@ namespace gl
|
||||
set_parameteri(GL_TEXTURE_COMPARE_MODE, GL_NONE);
|
||||
}
|
||||
|
||||
bool is_compressed_format(u32 texture_format)
|
||||
{
|
||||
switch (texture_format)
|
||||
{
|
||||
case CELL_GCM_TEXTURE_B8:
|
||||
case CELL_GCM_TEXTURE_A1R5G5B5:
|
||||
case CELL_GCM_TEXTURE_A4R4G4B4:
|
||||
case CELL_GCM_TEXTURE_R5G6B5:
|
||||
case CELL_GCM_TEXTURE_A8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_G8B8:
|
||||
case CELL_GCM_TEXTURE_R6G5B5:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8:
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
|
||||
case CELL_GCM_TEXTURE_DEPTH16:
|
||||
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
|
||||
case CELL_GCM_TEXTURE_X16:
|
||||
case CELL_GCM_TEXTURE_Y16_X16:
|
||||
case CELL_GCM_TEXTURE_R5G5B5A1:
|
||||
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
|
||||
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||
case CELL_GCM_TEXTURE_X32_FLOAT:
|
||||
case CELL_GCM_TEXTURE_D1R5G5B5:
|
||||
case CELL_GCM_TEXTURE_D8R8G8B8:
|
||||
case CELL_GCM_TEXTURE_Y16_X16_FLOAT:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8:
|
||||
return false;
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
|
||||
case CELL_GCM_TEXTURE_COMPRESSED_DXT45:
|
||||
return true;
|
||||
}
|
||||
fmt::throw_exception("Unknown format 0x%x", texture_format);
|
||||
}
|
||||
|
||||
std::array<GLenum, 4> get_swizzle_remap(u32 texture_format)
|
||||
{
|
||||
// NOTE: This must be in ARGB order in all forms below.
|
||||
@ -610,7 +573,7 @@ namespace gl
|
||||
gl::viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps,
|
||||
rsx::texture_dimension_extended type)
|
||||
{
|
||||
if (is_compressed_format(gcm_format))
|
||||
if (rsx::is_compressed_host_format(gcm_format))
|
||||
{
|
||||
//Compressed formats have a 4-byte alignment
|
||||
//TODO: Verify that samplers are not affected by the padding
|
||||
@ -634,7 +597,7 @@ namespace gl
|
||||
pixel_unpack_settings unpack_settings;
|
||||
unpack_settings.row_length(0).alignment(4);
|
||||
|
||||
if (is_compressed_format(format)) [[likely]]
|
||||
if (rsx::is_compressed_host_format(format)) [[likely]]
|
||||
{
|
||||
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
|
||||
|
||||
|
@ -832,21 +832,61 @@ namespace vk
|
||||
return *pcmd;
|
||||
}
|
||||
|
||||
static const std::pair<u32, u32> calculate_upload_pitch(int format, u32 heap_align, vk::image* dst_image, const rsx::subresource_layout& layout)
|
||||
{
|
||||
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
|
||||
u32 row_pitch, upload_pitch_in_texel;
|
||||
|
||||
if (!heap_align) [[likely]]
|
||||
{
|
||||
if (!layout.border) [[likely]]
|
||||
{
|
||||
row_pitch = (layout.pitch_in_block * block_size_in_bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Skip the border texels if possible. Padding is undesirable for GPU deswizzle
|
||||
row_pitch = (layout.width_in_block * block_size_in_bytes);
|
||||
}
|
||||
|
||||
// We have row_pitch in source coordinates. But some formats have a software decode step which can affect this packing!
|
||||
// For such formats, the packed pitch on src does not match packed pitch on dst
|
||||
if (!rsx::is_compressed_host_format(format))
|
||||
{
|
||||
const auto host_texel_width = vk::get_format_texel_width(dst_image->format());
|
||||
const auto host_packed_pitch = host_texel_width * dst_image->width();
|
||||
row_pitch = std::max(row_pitch, host_packed_pitch);
|
||||
upload_pitch_in_texel = row_pitch / host_texel_width;
|
||||
}
|
||||
else
|
||||
{
|
||||
upload_pitch_in_texel = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, heap_align);
|
||||
upload_pitch_in_texel = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
|
||||
ensure(row_pitch == heap_align);
|
||||
}
|
||||
|
||||
return { row_pitch, upload_pitch_in_texel };
|
||||
}
|
||||
|
||||
void upload_image(const vk::command_buffer& cmd, vk::image* dst_image,
|
||||
const std::vector<rsx::subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 /*mipmap_count*/,
|
||||
VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags)
|
||||
{
|
||||
const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT);
|
||||
u32 block_in_pixel = rsx::get_format_block_size_in_texel(format);
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
|
||||
rsx::texture_uploader_capabilities caps{ .alignment = heap_align };
|
||||
rsx::texture_memory_info opt{};
|
||||
bool check_caps = true;
|
||||
|
||||
vk::buffer* scratch_buf = nullptr;
|
||||
u32 scratch_offset = 0;
|
||||
u32 row_pitch, image_linear_size;
|
||||
u32 image_linear_size;
|
||||
|
||||
vk::buffer* upload_buffer = nullptr;
|
||||
usz offset_in_upload_buffer = 0;
|
||||
@ -858,26 +898,10 @@ namespace vk
|
||||
|
||||
for (const rsx::subresource_layout &layout : subresource_layout)
|
||||
{
|
||||
if (!heap_align) [[likely]]
|
||||
{
|
||||
if (!layout.border) [[likely]]
|
||||
{
|
||||
row_pitch = (layout.pitch_in_block * block_size_in_bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Skip the border texels if possible. Padding is undesirable for GPU deswizzle
|
||||
row_pitch = (layout.width_in_block * block_size_in_bytes);
|
||||
}
|
||||
|
||||
caps.alignment = row_pitch;
|
||||
}
|
||||
else
|
||||
{
|
||||
row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, heap_align);
|
||||
ensure(row_pitch == heap_align);
|
||||
}
|
||||
const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout);
|
||||
caps.alignment = row_pitch;
|
||||
|
||||
// Calculate estimated memory utilization for this subresource
|
||||
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
|
||||
|
||||
// Map with extra padding bytes in case of realignment
|
||||
@ -908,7 +932,7 @@ namespace vk
|
||||
copy_info.imageSubresource.layerCount = 1;
|
||||
copy_info.imageSubresource.baseArrayLayer = layout.layer;
|
||||
copy_info.imageSubresource.mipLevel = layout.level;
|
||||
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel);
|
||||
copy_info.bufferRowLength = upload_pitch_in_texel;
|
||||
|
||||
upload_buffer = upload_heap.heap.get();
|
||||
|
||||
@ -993,7 +1017,7 @@ namespace vk
|
||||
upload_commands.back().second++;
|
||||
}
|
||||
|
||||
copy_info.bufferRowLength = std::max<u32>(block_in_pixel * layout.pitch_in_block, layout.width_in_texel);
|
||||
copy_info.bufferRowLength = upload_pitch_in_texel;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user