diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index faabd301b3..e0064b4988 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -200,33 +200,43 @@ struct copy_unmodified_block_vtc struct copy_decoded_rb_rg_block { - template - static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) + template + static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) { static_assert(sizeof(T) == 4, "Type size doesn't match."); - static_assert(sizeof(U) == 2, "Type size doesn't match."); u32 src_offset = 0; u32 dst_offset = 0; + // Temporaries + u32 red0, red1, blue, green; + for (int row = 0; row < row_count * depth; ++row) { - for (int col = 0; col < width_in_block; col += 2) + for (int col = 0; col < width_in_block; ++col) { - // Process 2 pixels at a time and write in BGRA format - const u16 src0 = src[src_offset + col]; // R,B - const u16 src1 = src[src_offset + col + 1]; // R,G - const u32 blue = (src0 & 0xFF00) >> 8; - const u32 green = (src1 & 0xFF00); - const u32 data0 = blue | green | (src0 & 0xFF) << 16 | 0xFF << 24; - const u32 data1 = blue | green | (src1 & 0xFF) << 16 | 0xFF << 24; + // Decompress one block to 2 pixels at a time and write output in BGRA format + const auto data = src[src_offset + col]; - dst[dst_offset + col] = data0; - if (!(width_in_block & 0x1)) + if constexpr (SwapWords) { - // If size is even, fill in the second pixel - dst[dst_offset + col + 1] = data1; + // BR_GR + blue = (data >> 0) & 0xFF; + red0 = (data >> 8) & 0xFF; + green = (data >> 16) & 0XFF; + red1 = (data >> 24) & 0xFF; } + else + { + // RB_RG + red0 = (data >> 0) & 0xFF; + blue = (data >> 8) & 0xFF; + red1 = (data >> 16) & 0XFF; + green = (data >> 24) & 0xFF; + } + + dst[dst_offset + (col * 2)] = blue | (green << 8) | (red0 << 16) | (0xFF << 24); + dst[dst_offset + (col * 2 + 1)] = blue | (green << 8) | (red1 << 16) | (0xFF << 24); } src_offset += src_pitch_in_block; @@ -394,11 +404,17 @@ namespace current_subresource_layout.width_in_block = miplevel_width_in_texel; current_subresource_layout.height_in_block = miplevel_height_in_texel; } - else + else if constexpr (block_edge_in_texel == 4) { current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel); current_subresource_layout.height_in_block = utils::aligned_div(miplevel_height_in_texel, block_edge_in_texel); } + else + { + // Only the width is compressed + current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel); + current_subresource_layout.height_in_block = miplevel_height_in_texel; + } if (padded_row) { @@ -520,6 +536,7 @@ std::vector get_subresources_layout_impl(const RsxTextu return get_subresources_layout_impl<1, u8>(pixels, w, h, depth, layer, texture.get_exact_mipmap_count(), pitch, !is_swizzled, has_border); case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + return get_subresources_layout_impl<2, u32>(pixels, w, h, depth, layer, texture.get_exact_mipmap_count(), pitch, !is_swizzled, has_border); case CELL_GCM_TEXTURE_COMPRESSED_HILO8: case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: case CELL_GCM_TEXTURE_DEPTH16: @@ -610,13 +627,13 @@ namespace rsx case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: { - copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); break; } case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: { - copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(src_layout.width_in_texel, caps.alignment), src_layout.pitch_in_block); break; } @@ -809,6 +826,46 @@ namespace rsx return result; } + bool is_compressed_host_format(u32 texture_format) + { + switch (texture_format) + { + case CELL_GCM_TEXTURE_B8: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + // The following formats are compressed in RSX/GCM but not on the host device. + // They are decompressed in sw before uploading + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + return false; + // True compressed formats on the host device + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + return true; + } + fmt::throw_exception("Unknown format 0x%x", texture_format); + } + /** * A texture is stored as an array of blocks, where a block is a pixel for standard texture * but is a structure containing several pixels for compressed format @@ -875,9 +932,9 @@ namespace rsx case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: case CELL_GCM_TEXTURE_X32_FLOAT: case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return 1; case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 1; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 2; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 4; diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 3c576ecc05..29727b28f9 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -161,6 +161,7 @@ namespace rsx u8 get_format_block_size_in_bytes(rsx::surface_color_format format); u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format); + bool is_compressed_host_format(u32 format); // Returns true for host-compressed formats (DXT) u8 get_format_sample_count(rsx::surface_antialiasing antialias); u32 get_max_depth_value(rsx::surface_depth_format2 format); bool is_depth_stencil_format(rsx::surface_depth_format2 format); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 57022c0e15..fe204a2fc8 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -358,43 +358,6 @@ namespace gl set_parameteri(GL_TEXTURE_COMPARE_MODE, GL_NONE); } - bool is_compressed_format(u32 texture_format) - { - switch (texture_format) - { - case CELL_GCM_TEXTURE_B8: - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return false; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - return true; - } - fmt::throw_exception("Unknown format 0x%x", texture_format); - } - std::array get_swizzle_remap(u32 texture_format) { // NOTE: This must be in ARGB order in all forms below. @@ -610,7 +573,7 @@ namespace gl gl::viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type) { - if (is_compressed_format(gcm_format)) + if (rsx::is_compressed_host_format(gcm_format)) { //Compressed formats have a 4-byte alignment //TODO: Verify that samplers are not affected by the padding @@ -634,7 +597,7 @@ namespace gl pixel_unpack_settings unpack_settings; unpack_settings.row_length(0).alignment(4); - if (is_compressed_format(format)) [[likely]] + if (rsx::is_compressed_host_format(format)) [[likely]] { caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 73319a5a43..386b635b74 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -832,21 +832,61 @@ namespace vk return *pcmd; } + static const std::pair calculate_upload_pitch(int format, u32 heap_align, vk::image* dst_image, const rsx::subresource_layout& layout) + { + u32 block_in_pixel = rsx::get_format_block_size_in_texel(format); + u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); + + u32 row_pitch, upload_pitch_in_texel; + + if (!heap_align) [[likely]] + { + if (!layout.border) [[likely]] + { + row_pitch = (layout.pitch_in_block * block_size_in_bytes); + } + else + { + // Skip the border texels if possible. Padding is undesirable for GPU deswizzle + row_pitch = (layout.width_in_block * block_size_in_bytes); + } + + // We have row_pitch in source coordinates. But some formats have a software decode step which can affect this packing! + // For such formats, the packed pitch on src does not match packed pitch on dst + if (!rsx::is_compressed_host_format(format)) + { + const auto host_texel_width = vk::get_format_texel_width(dst_image->format()); + const auto host_packed_pitch = host_texel_width * dst_image->width(); + row_pitch = std::max(row_pitch, host_packed_pitch); + upload_pitch_in_texel = row_pitch / host_texel_width; + } + else + { + upload_pitch_in_texel = std::max(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel); + } + } + else + { + row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, heap_align); + upload_pitch_in_texel = std::max(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel); + ensure(row_pitch == heap_align); + } + + return { row_pitch, upload_pitch_in_texel }; + } + void upload_image(const vk::command_buffer& cmd, vk::image* dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 /*mipmap_count*/, VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align, rsx::flags32_t image_setup_flags) { const bool requires_depth_processing = (dst_image->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || (format == CELL_GCM_TEXTURE_DEPTH16_FLOAT); - u32 block_in_pixel = rsx::get_format_block_size_in_texel(format); - u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - rsx::texture_uploader_capabilities caps{ .alignment = heap_align }; rsx::texture_memory_info opt{}; bool check_caps = true; vk::buffer* scratch_buf = nullptr; u32 scratch_offset = 0; - u32 row_pitch, image_linear_size; + u32 image_linear_size; vk::buffer* upload_buffer = nullptr; usz offset_in_upload_buffer = 0; @@ -858,26 +898,10 @@ namespace vk for (const rsx::subresource_layout &layout : subresource_layout) { - if (!heap_align) [[likely]] - { - if (!layout.border) [[likely]] - { - row_pitch = (layout.pitch_in_block * block_size_in_bytes); - } - else - { - // Skip the border texels if possible. Padding is undesirable for GPU deswizzle - row_pitch = (layout.width_in_block * block_size_in_bytes); - } - - caps.alignment = row_pitch; - } - else - { - row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, heap_align); - ensure(row_pitch == heap_align); - } + const auto [row_pitch, upload_pitch_in_texel] = calculate_upload_pitch(format, heap_align, dst_image, layout); + caps.alignment = row_pitch; + // Calculate estimated memory utilization for this subresource image_linear_size = row_pitch * layout.height_in_block * layout.depth; // Map with extra padding bytes in case of realignment @@ -908,7 +932,7 @@ namespace vk copy_info.imageSubresource.layerCount = 1; copy_info.imageSubresource.baseArrayLayer = layout.layer; copy_info.imageSubresource.mipLevel = layout.level; - copy_info.bufferRowLength = std::max(block_in_pixel * row_pitch / block_size_in_bytes, layout.width_in_texel); + copy_info.bufferRowLength = upload_pitch_in_texel; upload_buffer = upload_heap.heap.get(); @@ -993,7 +1017,7 @@ namespace vk upload_commands.back().second++; } - copy_info.bufferRowLength = std::max(block_in_pixel * layout.pitch_in_block, layout.width_in_texel); + copy_info.bufferRowLength = upload_pitch_in_texel; } }