From 65ead0888087403a0c5caeefdad4b0c42ff2f27d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 16 Aug 2020 00:33:34 +0300 Subject: [PATCH] rsx: Refactor and improve image memory manipulation routines --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 1058 +++++++++--------- rpcs3/Emu/RSX/Common/TextureUtils.h | 195 ++-- rpcs3/Emu/RSX/Common/surface_utils.h | 16 - rpcs3/Emu/RSX/Common/texture_cache.h | 22 +- rpcs3/Emu/RSX/Common/texture_cache_helpers.h | 25 +- rpcs3/Emu/RSX/Common/texture_cache_utils.h | 16 +- rpcs3/Emu/RSX/GL/GLHelpers.h | 57 +- rpcs3/Emu/RSX/GL/GLPresent.cpp | 2 +- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 2 +- rpcs3/Emu/RSX/GL/GLTexture.cpp | 12 +- rpcs3/Emu/RSX/GL/GLTexture.h | 2 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 2 +- rpcs3/Emu/RSX/RSXThread.cpp | 4 +- rpcs3/Emu/RSX/VK/VKDraw.cpp | 2 +- rpcs3/Emu/RSX/VK/VKFormats.cpp | 12 + rpcs3/Emu/RSX/VK/VKFormats.h | 1 + rpcs3/Emu/RSX/VK/VKHelpers.cpp | 6 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 43 +- rpcs3/Emu/RSX/VK/VKPresent.cpp | 24 +- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 16 +- rpcs3/Emu/RSX/VK/VKTexture.cpp | 205 ++-- rpcs3/Emu/RSX/VK/VKTextureCache.h | 43 +- 22 files changed, 908 insertions(+), 857 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index a0fa8215f4..e0f5a97a11 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -4,24 +4,6 @@ #include "../RSXThread.h" #include "../rsx_utils.h" -namespace rsx -{ - void typeless_xfer::analyse() - { - // TODO: This method needs to be re-evaluated - // Check if scaling hints match, which likely means internal formats match as well - // Only possible when doing RTT->RTT transfer with non-base-type formats like WZYX16/32 - if (src_is_typeless && dst_is_typeless && src_gcm_format == dst_gcm_format) - { - if (fcmp(src_scaling_hint, dst_scaling_hint) && !fcmp(src_scaling_hint, 1.f)) - { - src_is_typeless = dst_is_typeless = false; - src_scaling_hint = dst_scaling_hint = 1.f; - } - } - } -} - namespace { // FIXME: GSL as_span break build if template parameter is non const with current revision. @@ -325,7 +307,7 @@ namespace * Sometimes texture provides a pitch even if texture is swizzled (and then packed) and in such case it's ignored. It's passed via suggested_pitch and is used only if padded_row is false. */ template - std::vector get_subresources_layout_impl(const std::byte *texture_data_pointer, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u32 suggested_pitch_in_bytes, bool padded_row, bool border) + std::vector get_subresources_layout_impl(const std::byte *texture_data_pointer, u16 width_in_texel, u16 height_in_texel, u16 depth, u8 layer_count, u16 mipmap_count, u32 suggested_pitch_in_bytes, bool padded_row, bool border) { /** * Note about size type: RSX texture width is stored in a 16 bits int and pitch is stored in a 20 bits int. @@ -334,7 +316,7 @@ namespace // <= 128 so fits in u8 u8 block_size_in_bytes = sizeof(SRC_TYPE); - std::vector result; + std::vector result; size_t offset_in_src = 0; u8 border_size = border ? (padded_row ? 1 : 4) : 0; @@ -348,7 +330,7 @@ namespace for (unsigned mip_level = 0; mip_level < mipmap_count; mip_level++) { result.push_back({}); - rsx_subresource_layout& current_subresource_layout = result.back(); + rsx::subresource_layout& current_subresource_layout = result.back(); current_subresource_layout.width_in_texel = miplevel_width_in_texel; current_subresource_layout.height_in_texel = miplevel_height_in_texel; @@ -448,7 +430,7 @@ std::tuple get_height_depth_layer(const RsxTextureType &tex) } template -std::vector get_subresources_layout_impl(const RsxTextureType &texture) +std::vector get_subresources_layout_impl(const RsxTextureType &texture) { u16 w = texture.width(); u16 h; @@ -470,10 +452,10 @@ std::vector get_subresources_layout_impl(const RsxTextur { if (pitch) [[likely]] { - if (pitch < get_format_packed_pitch(format, w, has_border, false)) + if (pitch < rsx::get_format_packed_pitch(format, w, has_border, false)) { - const u32 real_width_in_block = pitch / get_format_block_size_in_bytes(format); - w = std::max(real_width_in_block * get_format_block_size_in_texel(format), 1); + const u32 real_width_in_block = pitch / rsx::get_format_block_size_in_bytes(format); + w = std::max(real_width_in_block * rsx::get_format_block_size_in_texel(format), 1); } } else @@ -522,339 +504,356 @@ std::vector get_subresources_layout_impl(const RsxTextur fmt::throw_exception("Wrong format 0x%x" HERE, format); } -std::vector get_subresources_layout(const rsx::fragment_texture &texture) +namespace rsx { - return get_subresources_layout_impl(texture); -} - -std::vector get_subresources_layout(const rsx::vertex_texture &texture) -{ - return get_subresources_layout_impl(texture); -} - -texture_memory_info upload_texture_subresource(gsl::span dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps) -{ - u16 w = src_layout.width_in_block; - u16 h = src_layout.height_in_block; - u16 depth = src_layout.depth; - u32 pitch = src_layout.pitch_in_block; - - texture_memory_info result{}; - - // Ignore when texture width > pitch - if (w > pitch) - return result; - - // Check if we can use a fast path - int word_size = 0; - int words_per_block; - u32 dst_pitch_in_block; - - switch (format) + void typeless_xfer::analyse() { - case CELL_GCM_TEXTURE_B8: - { - word_size = words_per_block = 1; - dst_pitch_in_block = get_row_pitch_in_block(w, caps.alignment); - break; - } - - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - { - copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - break; - } - - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - { - copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - break; - } - - case CELL_GCM_TEXTURE_R6G5B5: - { - if (is_swizzled) - copy_rgb655_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment)); - else - copy_rgb655_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - break; - } - - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - // TODO: Test if the HILO compressed formats support swizzling (other compressed_* formats ignore this option) - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: // Untested - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_G8B8: - { - word_size = 2; - words_per_block = 1; - dst_pitch_in_block = get_row_pitch_in_block(w, caps.alignment); - break; - } - - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // Untested - { - word_size = 4; - words_per_block = 1; - dst_pitch_in_block = get_row_pitch_in_block(w, caps.alignment); - break; - } - - // NOTE: Textures with WZYX notations refer to arbitrary data and not color swizzles as in common GPU lang - // WZYX actually maps directly as a RGBA16 format in Cell memory! R=W, not R=X - - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - { - const u16 block_size = get_format_block_size_in_bytes(format); - word_size = 2; - words_per_block = block_size / 2; - dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment); - break; - } - - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - { - const u16 block_size = get_format_block_size_in_bytes(format); - word_size = 4; - words_per_block = block_size / 4; - dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment); - break; - } - - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - { - if (depth > 1 && !caps.supports_vtc_decoding) + // TODO: This method needs to be re-evaluated + // Check if scaling hints match, which likely means internal formats match as well + // Only possible when doing RTT->RTT transfer with non-base-type formats like WZYX16/32 + if (src_is_typeless && dst_is_typeless && src_gcm_format == dst_gcm_format) { - // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. - // This is only supported using Nvidia OpenGL. - // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + if (fcmp(src_scaling_hint, dst_scaling_hint) && !fcmp(src_scaling_hint, 1.f)) + { + src_is_typeless = dst_is_typeless = false; + src_scaling_hint = dst_scaling_hint = 1.f; + } } - else - { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - } - break; } - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + std::vector get_subresources_layout(const rsx::fragment_texture& texture) { - if (depth > 1 && !caps.supports_vtc_decoding) - { - // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. - // This is only supported using Nvidia OpenGL. - // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - } - else - { - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - } - break; + return get_subresources_layout_impl(texture); } - default: - fmt::throw_exception("Wrong format 0x%x" HERE, format); - } - - if (word_size) + std::vector get_subresources_layout(const rsx::vertex_texture& texture) { - if (word_size == 1) + return get_subresources_layout_impl(texture); + } + + texture_memory_info upload_texture_subresource(gsl::span dst_buffer, const rsx::subresource_layout& src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps) + { + u16 w = src_layout.width_in_block; + u16 h = src_layout.height_in_block; + u16 depth = src_layout.depth; + u32 pitch = src_layout.pitch_in_block; + + texture_memory_info result{}; + + // Ignore when texture width > pitch + if (w > pitch) + return result; + + // Check if we can use a fast path + int word_size = 0; + int words_per_block; + u32 dst_pitch_in_block; + + switch (format) + { + case CELL_GCM_TEXTURE_B8: + { + word_size = words_per_block = 1; + dst_pitch_in_block = get_row_pitch_in_block(w, caps.alignment); + break; + } + + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + { + copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; + } + + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: + { + copy_decoded_rb_rg_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; + } + + case CELL_GCM_TEXTURE_R6G5B5: { if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + copy_rgb655_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment)); else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + copy_rgb655_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), w, h, depth, src_layout.border, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (caps.supports_byteswap) - { - result.require_swap = true; - result.element_size = word_size; - result.block_length = words_per_block; - if (word_size == 2) + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + // TODO: Test if the HILO compressed formats support swizzling (other compressed_* formats ignore this option) + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: // Untested + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_G8B8: + { + word_size = 2; + words_per_block = 1; + dst_pitch_in_block = get_row_pitch_in_block(w, caps.alignment); + break; + } + + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: // Untested + { + word_size = 4; + words_per_block = 1; + dst_pitch_in_block = get_row_pitch_in_block(w, caps.alignment); + break; + } + + // NOTE: Textures with WZYX notations refer to arbitrary data and not color swizzles as in common GPU lang + // WZYX actually maps directly as a RGBA16 format in Cell memory! R=W, not R=X + + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + { + const u16 block_size = get_format_block_size_in_bytes(format); + word_size = 2; + words_per_block = block_size / 2; + dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment); + break; + } + + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + { + const u16 block_size = get_format_block_size_in_bytes(format); + word_size = 4; + words_per_block = block_size / 4; + dst_pitch_in_block = get_row_pitch_in_block(block_size, w, caps.alignment); + break; + } + + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + { + if (depth > 1 && !caps.supports_vtc_decoding) + { + // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. + // This is only supported using Nvidia OpenGL. + // Remove the VTC tiling to support ATI and Vulkan. + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + } + else + { + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + } + break; + } + + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + { + if (depth > 1 && !caps.supports_vtc_decoding) + { + // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. + // This is only supported using Nvidia OpenGL. + // Remove the VTC tiling to support ATI and Vulkan. + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + } + else + { + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + } + break; + } + + default: + fmt::throw_exception("Wrong format 0x%x" HERE, format); + } + + if (word_size) + { + if (word_size == 1) { if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + else + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (caps.supports_byteswap) + { + result.require_swap = true; + result.element_size = word_size; + result.block_length = words_per_block; + + if (word_size == 2) { - if (((word_size * words_per_block) & 3) == 0 && caps.supports_hw_deswizzle) + if (is_swizzled) { - result.require_deswizzle = true; + if (((word_size * words_per_block) & 3) == 0 && caps.supports_hw_deswizzle) + { + result.require_deswizzle = true; + } } + + if (is_swizzled && !result.require_deswizzle) + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + else + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } + else if (word_size == 4) + { + result.require_deswizzle = (is_swizzled && caps.supports_hw_deswizzle); - if (is_swizzled && !result.require_deswizzle) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + if (is_swizzled && !caps.supports_hw_deswizzle) + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + else + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } } - else if (word_size == 4) + else { - result.require_deswizzle = (is_swizzled && caps.supports_hw_deswizzle); - - if (is_swizzled && !caps.supports_hw_deswizzle) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + if (word_size == 2) + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + else + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (word_size == 4) + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + else + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } } } - else + + return result; + } + + /** + * A texture is stored as an array of blocks, where a block is a pixel for standard texture + * but is a structure containing several pixels for compressed format + */ + u8 get_format_block_size_in_bytes(int format) + { + switch (format) { - if (word_size == 2) - { - if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else if (word_size == 4) - { - if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), as_const_span>(src_layout.data), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } + case CELL_GCM_TEXTURE_B8: return 1; + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return 2; + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 4; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return 8; + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return 16; + default: + rsx_log.error("Unimplemented block size in bytes for texture format: 0x%x", format); + return 1; } } - return result; -} - -/** - * A texture is stored as an array of blocks, where a block is a pixel for standard texture - * but is a structure containing several pixels for compressed format - */ -u8 get_format_block_size_in_bytes(int format) -{ - switch (format) + u8 get_format_block_size_in_texel(int format) { - case CELL_GCM_TEXTURE_B8: return 1; - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return 2; - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 4; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return 8; - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return 16; - default: - rsx_log.error("Unimplemented block size in bytes for texture format: 0x%x", format); - return 1; + switch (format) + { + case CELL_GCM_TEXTURE_B8: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_R6G5B5: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_Y16_X16: + case CELL_GCM_TEXTURE_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: + case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: + case CELL_GCM_TEXTURE_X32_FLOAT: + case CELL_GCM_TEXTURE_COMPRESSED_HILO8: + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 1; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 4; + default: + rsx_log.error("Unimplemented block size in texels for texture format: 0x%x", format); + return 1; + } } -} -u8 get_format_block_size_in_texel(int format) -{ - switch (format) + u8 get_format_block_size_in_bytes(rsx::surface_color_format format) { - case CELL_GCM_TEXTURE_B8: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return 1; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return 4; - default: - rsx_log.error("Unimplemented block size in texels for texture format: 0x%x", format); - return 1; + switch (format) + { + case rsx::surface_color_format::b8: + return 1; + case rsx::surface_color_format::g8b8: + case rsx::surface_color_format::r5g6b5: + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + return 2; + case rsx::surface_color_format::a8b8g8r8: + case rsx::surface_color_format::a8r8g8b8: + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x32: + return 4; + case rsx::surface_color_format::w16z16y16x16: + return 8; + case rsx::surface_color_format::w32z32y32x32: + return 16; + default: + fmt::throw_exception("Invalid color format 0x%x" HERE, static_cast(format)); + } } -} -u8 get_format_block_size_in_bytes(rsx::surface_color_format format) -{ - switch (format) + u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format) { - case rsx::surface_color_format::b8: - return 1; - case rsx::surface_color_format::g8b8: - case rsx::surface_color_format::r5g6b5: - case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - return 2; - case rsx::surface_color_format::a8b8g8r8: - case rsx::surface_color_format::a8r8g8b8: - case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: - case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: - case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: - case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: - case rsx::surface_color_format::x32: - return 4; - case rsx::surface_color_format::w16z16y16x16: - return 8; - case rsx::surface_color_format::w32z32y32x32: - return 16; - default: - fmt::throw_exception("Invalid color format 0x%x" HERE, static_cast(format)); + switch (format) + { + case rsx::surface_depth_format2::z24s8_uint: + case rsx::surface_depth_format2::z24s8_float: + return 4; + default: + return 2; + } } -} -u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format) -{ - switch (format) - { - case rsx::surface_depth_format2::z24s8_uint: - case rsx::surface_depth_format2::z24s8_float: - return 4; - default: - return 2; - } -} - -u8 get_format_sample_count(rsx::surface_antialiasing antialias) -{ - switch (antialias) + u8 get_format_sample_count(rsx::surface_antialiasing antialias) { + switch (antialias) + { case rsx::surface_antialiasing::center_1_sample: return 1; case rsx::surface_antialiasing::diagonal_centered_2_samples: @@ -865,230 +864,265 @@ u8 get_format_sample_count(rsx::surface_antialiasing antialias) default: ASSUME(0); return 0; - } -} - -bool is_depth_stencil_format(rsx::surface_depth_format2 format) -{ - switch (format) - { - case rsx::surface_depth_format2::z24s8_uint: - case rsx::surface_depth_format2::z24s8_float: - return true; - default: - return false; - } -} - -/** - * Returns number of texel lines decoded in one pitch-length number of bytes - */ -u8 get_format_texel_rows_per_line(u32 format) -{ - switch (format) - { - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - // Layout is 4x4 blocks, i.e one row of pitch bytes in length actually encodes 4 texel rows - return 4; - default: - return 1; - } -} - -u32 get_format_packed_pitch(u32 format, u16 width, bool border, bool swizzled) -{ - const auto texels_per_block = get_format_block_size_in_texel(format); - const auto bytes_per_block = get_format_block_size_in_bytes(format); - - auto width_in_block = ((width + texels_per_block - 1) / texels_per_block); - if (border) - { - width_in_block = swizzled ? rsx::next_pow2(width_in_block + 8): - width_in_block + 2; + } } - return width_in_block * bytes_per_block; -} - -size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignment, size_t mipmap_alignment) -{ - format &= ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - size_t block_edge = get_format_block_size_in_texel(format); - size_t block_size_in_byte = get_format_block_size_in_bytes(format); - - size_t height_in_blocks = (height + block_edge - 1) / block_edge; - size_t width_in_blocks = (width + block_edge - 1) / block_edge; - - size_t result = 0; - for (u16 i = 0; i < mipmap; ++i) + bool is_depth_stencil_format(rsx::surface_depth_format2 format) { - size_t rowPitch = align(block_size_in_byte * width_in_blocks, row_pitch_alignment); - result += align(rowPitch * height_in_blocks * depth, mipmap_alignment); - height_in_blocks = std::max(height_in_blocks / 2, 1); - width_in_blocks = std::max(width_in_blocks / 2, 1); - } - - // Mipmap, height and width aren't allowed to be zero - return verify("Texture params" HERE, result) * (cubemap ? 6 : 1); -} - -size_t get_placed_texture_storage_size(const rsx::fragment_texture &texture, size_t row_pitch_alignment, size_t mipmap_alignment) -{ - return get_placed_texture_storage_size(texture.width(), texture.height(), texture.depth(), texture.format(), texture.mipmap(), texture.cubemap(), - row_pitch_alignment, mipmap_alignment); -} - -size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_t row_pitch_alignment, size_t mipmap_alignment) -{ - return get_placed_texture_storage_size(texture.width(), texture.height(), texture.depth(), texture.format(), texture.mipmap(), texture.cubemap(), - row_pitch_alignment, mipmap_alignment); -} - -static size_t get_texture_size(u32 format, u16 width, u16 height, u16 depth, u32 pitch, u16 mipmaps, u16 layers, u8 border) -{ - const auto gcm_format = format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - const bool packed = !(format & CELL_GCM_TEXTURE_LN); - const auto texel_rows_per_line = get_format_texel_rows_per_line(gcm_format); - - if (!pitch && !packed) - { - if (width > 1 || height > 1) + switch (format) { - // If width == 1, the scanning just returns texel 0, so it is a valid setup - rsx_log.error("Invalid texture pitch setup, width=%d, height=%d, format=0x%x(0x%x)", - width, height, format, gcm_format); + case rsx::surface_depth_format2::z24s8_uint: + case rsx::surface_depth_format2::z24s8_float: + return true; + default: + return false; + } + } + + /** + * Returns number of texel lines decoded in one pitch-length number of bytes + */ + u8 get_format_texel_rows_per_line(u32 format) + { + switch (format) + { + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + // Layout is 4x4 blocks, i.e one row of pitch bytes in length actually encodes 4 texel rows + return 4; + default: + return 1; + } + } + + u32 get_format_packed_pitch(u32 format, u16 width, bool border, bool swizzled) + { + const auto texels_per_block = get_format_block_size_in_texel(format); + const auto bytes_per_block = get_format_block_size_in_bytes(format); + + auto width_in_block = ((width + texels_per_block - 1) / texels_per_block); + if (border) + { + width_in_block = swizzled ? rsx::next_pow2(width_in_block + 8) : + width_in_block + 2; } - pitch = get_format_packed_pitch(gcm_format, width, !!border, packed); + return width_in_block * bytes_per_block; } - u32 size = 0; - if (!packed) + size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignment, size_t mipmap_alignment) { - // Constant pitch layout, simple scanning - const u32 internal_height = (height + texel_rows_per_line - 1) / texel_rows_per_line; // Convert texels to blocks - for (u32 layer = 0; layer < layers; ++layer) + format &= ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + size_t block_edge = get_format_block_size_in_texel(format); + size_t block_size_in_byte = get_format_block_size_in_bytes(format); + + size_t height_in_blocks = (height + block_edge - 1) / block_edge; + size_t width_in_blocks = (width + block_edge - 1) / block_edge; + + size_t result = 0; + for (u16 i = 0; i < mipmap; ++i) { - u32 mip_height = internal_height; - for (u32 mipmap = 0; mipmap < mipmaps && mip_height > 0; ++mipmap) + size_t rowPitch = align(block_size_in_byte * width_in_blocks, row_pitch_alignment); + result += align(rowPitch * height_in_blocks * depth, mipmap_alignment); + height_in_blocks = std::max(height_in_blocks / 2, 1); + width_in_blocks = std::max(width_in_blocks / 2, 1); + } + + // Mipmap, height and width aren't allowed to be zero + return verify("Texture params" HERE, result) * (cubemap ? 6 : 1); + } + + size_t get_placed_texture_storage_size(const rsx::fragment_texture& texture, size_t row_pitch_alignment, size_t mipmap_alignment) + { + return get_placed_texture_storage_size(texture.width(), texture.height(), texture.depth(), texture.format(), texture.mipmap(), texture.cubemap(), + row_pitch_alignment, mipmap_alignment); + } + + size_t get_placed_texture_storage_size(const rsx::vertex_texture& texture, size_t row_pitch_alignment, size_t mipmap_alignment) + { + return get_placed_texture_storage_size(texture.width(), texture.height(), texture.depth(), texture.format(), texture.mipmap(), texture.cubemap(), + row_pitch_alignment, mipmap_alignment); + } + + static size_t get_texture_size(u32 format, u16 width, u16 height, u16 depth, u32 pitch, u16 mipmaps, u16 layers, u8 border) + { + const auto gcm_format = format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + const bool packed = !(format & CELL_GCM_TEXTURE_LN); + const auto texel_rows_per_line = get_format_texel_rows_per_line(gcm_format); + + if (!pitch && !packed) + { + if (width > 1 || height > 1) { - size += pitch * mip_height * depth; - mip_height = std::max(mip_height / 2u, 1u); + // If width == 1, the scanning just returns texel 0, so it is a valid setup + rsx_log.error("Invalid texture pitch setup, width=%d, height=%d, format=0x%x(0x%x)", + width, height, format, gcm_format); + } + + pitch = get_format_packed_pitch(gcm_format, width, !!border, packed); + } + + u32 size = 0; + if (!packed) + { + // Constant pitch layout, simple scanning + const u32 internal_height = (height + texel_rows_per_line - 1) / texel_rows_per_line; // Convert texels to blocks + for (u32 layer = 0; layer < layers; ++layer) + { + u32 mip_height = internal_height; + for (u32 mipmap = 0; mipmap < mipmaps && mip_height > 0; ++mipmap) + { + size += pitch * mip_height * depth; + mip_height = std::max(mip_height / 2u, 1u); + } } } - } - else - { - // Variable pitch per mipmap level - const auto texels_per_block = get_format_block_size_in_texel(gcm_format); - const auto bytes_per_block = get_format_block_size_in_bytes(gcm_format); - - const u32 internal_height = (height + texel_rows_per_line - 1) / texel_rows_per_line; // Convert texels to blocks - const u32 internal_width = (width + texels_per_block - 1) / texels_per_block; // Convert texels to blocks - for (u32 layer = 0; layer < layers; ++layer) + else { - u32 mip_height = internal_height; - u32 mip_width = internal_width; - for (u32 mipmap = 0; mipmap < mipmaps && mip_height > 0; ++mipmap) + // Variable pitch per mipmap level + const auto texels_per_block = get_format_block_size_in_texel(gcm_format); + const auto bytes_per_block = get_format_block_size_in_bytes(gcm_format); + + const u32 internal_height = (height + texel_rows_per_line - 1) / texel_rows_per_line; // Convert texels to blocks + const u32 internal_width = (width + texels_per_block - 1) / texels_per_block; // Convert texels to blocks + for (u32 layer = 0; layer < layers; ++layer) { - size += (mip_width * bytes_per_block * mip_height * depth); - mip_height = std::max(mip_height / 2u, 1u); - mip_width = std::max(mip_width / 2u, 1u); + u32 mip_height = internal_height; + u32 mip_width = internal_width; + for (u32 mipmap = 0; mipmap < mipmaps && mip_height > 0; ++mipmap) + { + size += (mip_width * bytes_per_block * mip_height * depth); + mip_height = std::max(mip_height / 2u, 1u); + mip_width = std::max(mip_width / 2u, 1u); + } } } + + return size; } - return size; -} - -size_t get_texture_size(const rsx::fragment_texture &texture) -{ - return get_texture_size(texture.format(), texture.width(), texture.height(), texture.depth(), + size_t get_texture_size(const rsx::fragment_texture& texture) + { + return get_texture_size(texture.format(), texture.width(), texture.height(), texture.depth(), texture.pitch(), texture.get_exact_mipmap_count(), texture.cubemap() ? 6 : 1, texture.border_type() ^ 1); -} + } -size_t get_texture_size(const rsx::vertex_texture &texture) -{ - return get_texture_size(texture.format(), texture.width(), texture.height(), texture.depth(), - texture.pitch(), texture.get_exact_mipmap_count(), texture.cubemap() ? 6 : 1, - texture.border_type() ^ 1); -} - -u32 get_remap_encoding(const std::pair, std::array>& remap) -{ - u32 encode = 0; - encode |= (remap.first[0] << 0); - encode |= (remap.first[1] << 2); - encode |= (remap.first[2] << 4); - encode |= (remap.first[3] << 6); - encode |= (remap.second[0] << 8); - encode |= (remap.second[1] << 10); - encode |= (remap.second[2] << 12); - encode |= (remap.second[3] << 14); - return encode; -} - -std::pair get_compatible_gcm_format(rsx::surface_color_format format) -{ - switch (format) + size_t get_texture_size(const rsx::vertex_texture& texture) { - case rsx::surface_color_format::r5g6b5: - return{ CELL_GCM_TEXTURE_R5G6B5, false }; + return get_texture_size(texture.format(), texture.width(), texture.height(), texture.depth(), + texture.pitch(), texture.get_exact_mipmap_count(), texture.cubemap() ? 6 : 1, + texture.border_type() ^ 1); + } - case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: - case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: - case rsx::surface_color_format::a8r8g8b8: - return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; //verified + u32 get_remap_encoding(const std::pair, std::array>& remap) + { + u32 encode = 0; + encode |= (remap.first[0] << 0); + encode |= (remap.first[1] << 2); + encode |= (remap.first[2] << 4); + encode |= (remap.first[3] << 6); + encode |= (remap.second[0] << 8); + encode |= (remap.second[1] << 10); + encode |= (remap.second[2] << 12); + encode |= (remap.second[3] << 14); + return encode; + } - case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: - case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: - case rsx::surface_color_format::a8b8g8r8: - return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; + std::pair get_compatible_gcm_format(rsx::surface_color_format format) + { + switch (format) + { + case rsx::surface_color_format::r5g6b5: + return{ CELL_GCM_TEXTURE_R5G6B5, false }; - case rsx::surface_color_format::w16z16y16x16: - return{ CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT, true }; + case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: + case rsx::surface_color_format::x8r8g8b8_o8r8g8b8: + case rsx::surface_color_format::a8r8g8b8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; //verified - case rsx::surface_color_format::w32z32y32x32: - return{ CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT, true }; + case rsx::surface_color_format::x8b8g8r8_o8b8g8r8: + case rsx::surface_color_format::x8b8g8r8_z8b8g8r8: + case rsx::surface_color_format::a8b8g8r8: + return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; - case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - return{ CELL_GCM_TEXTURE_A1R5G5B5, false }; + case rsx::surface_color_format::w16z16y16x16: + return{ CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT, true }; - case rsx::surface_color_format::b8: - return{ CELL_GCM_TEXTURE_B8, false }; + case rsx::surface_color_format::w32z32y32x32: + return{ CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT, true }; - case rsx::surface_color_format::g8b8: - return{ CELL_GCM_TEXTURE_G8B8, true }; + case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: + case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: + return{ CELL_GCM_TEXTURE_A1R5G5B5, false }; - case rsx::surface_color_format::x32: - return{ CELL_GCM_TEXTURE_X32_FLOAT, true }; //verified - default: - fmt::throw_exception("Unhandled surface format 0x%x", static_cast(format)); + case rsx::surface_color_format::b8: + return{ CELL_GCM_TEXTURE_B8, false }; + + case rsx::surface_color_format::g8b8: + return{ CELL_GCM_TEXTURE_G8B8, true }; + + case rsx::surface_color_format::x32: + return{ CELL_GCM_TEXTURE_X32_FLOAT, true }; //verified + default: + fmt::throw_exception("Unhandled surface format 0x%x", static_cast(format)); + } + } + + std::pair get_compatible_gcm_format(rsx::surface_depth_format2 format) + { + switch (format) + { + case rsx::surface_depth_format2::z16_uint: + return{ CELL_GCM_TEXTURE_DEPTH16, true }; + case rsx::surface_depth_format2::z24s8_uint: + return{ CELL_GCM_TEXTURE_DEPTH24_D8, true }; + case rsx::surface_depth_format2::z16_float: + return{ CELL_GCM_TEXTURE_DEPTH16_FLOAT, true }; + case rsx::surface_depth_format2::z24s8_float: + return{ CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT, true }; + default: + ASSUME(0); + } + } + + rsx::format_class classify_format(rsx::surface_depth_format2 format) + { + switch (format) + { + case rsx::surface_depth_format2::z16_uint: + return RSX_FORMAT_CLASS_DEPTH16_UNORM; + case rsx::surface_depth_format2::z24s8_uint: + return RSX_FORMAT_CLASS_DEPTH24_UNORM_X8_PACK32; + case rsx::surface_depth_format2::z16_float: + return RSX_FORMAT_CLASS_DEPTH16_FLOAT; + case rsx::surface_depth_format2::z24s8_float: + return RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32; + default: + return RSX_FORMAT_CLASS_COLOR; + } + } + + rsx::format_class classify_format(u32 gcm_format) + { + switch (gcm_format) + { + case CELL_GCM_TEXTURE_DEPTH16: + return RSX_FORMAT_CLASS_DEPTH16_UNORM; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + return RSX_FORMAT_CLASS_DEPTH16_FLOAT; + case CELL_GCM_TEXTURE_DEPTH24_D8: + return RSX_FORMAT_CLASS_DEPTH24_UNORM_X8_PACK32; + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32; + default: + return RSX_FORMAT_CLASS_COLOR; + } + } + + u32 get_max_depth_value(rsx::surface_depth_format2 format) + { + return get_format_block_size_in_bytes(format) == 2 ? 0xFFFF : 0xFFFFFF; } } - -std::pair get_compatible_gcm_format(rsx::surface_depth_format2 format) -{ - switch (format) - { - case rsx::surface_depth_format2::z16_uint: - return{ CELL_GCM_TEXTURE_DEPTH16, true }; - case rsx::surface_depth_format2::z24s8_uint: - return{ CELL_GCM_TEXTURE_DEPTH24_D8, true }; - case rsx::surface_depth_format2::z16_float : - return{ CELL_GCM_TEXTURE_DEPTH16_FLOAT, true }; - case rsx::surface_depth_format2::z24s8_float: - return{ CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT, true }; - default: - ASSUME(0); - } -} - -u32 get_max_depth_value(rsx::surface_depth_format2 format) -{ - return get_format_block_size_in_bytes(format) == 2 ? 0xFFFF : 0xFFFFFF; -} diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 78300de896..a92854aef1 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -43,19 +43,31 @@ namespace rsx transfer = 2 }; - enum format_type : u8 + // Defines how the underlying PS3-visible memory backed by a texture is accessed + namespace format_class_ { - color = 0, - depth_uint = 1, - depth_float = 2 - }; + // TODO: Remove when enum import is supported by GCC + enum format_class : u8 + { + RSX_FORMAT_CLASS_UNDEFINED = 0, + RSX_FORMAT_CLASS_COLOR = 1, + RSX_FORMAT_CLASS_DEPTH16_UNORM = 2, + RSX_FORMAT_CLASS_DEPTH16_FLOAT = 4, + RSX_FORMAT_CLASS_DEPTH24_UNORM_X8_PACK32 = 8, + RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32 = 16, + + RSX_FORMAT_CLASS_DEPTH_FLOAT_MASK = (RSX_FORMAT_CLASS_DEPTH16_FLOAT | RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32) + }; + } + + using namespace format_class_; //Sampled image descriptor struct sampled_image_descriptor_base { texture_upload_context upload_context = texture_upload_context::shader_read; rsx::texture_dimension_extended image_type = texture_dimension_extended::texture_dimension_2d; - rsx::format_type format_class = rsx::format_type::color; + rsx::format_class format_class = RSX_FORMAT_CLASS_UNDEFINED; bool is_cyclic_reference = false; f32 scale_x = 1.f; f32 scale_y = 1.f; @@ -82,88 +94,91 @@ namespace rsx void analyse(); }; + + struct subresource_layout + { + gsl::span data; + u16 width_in_texel; + u16 height_in_texel; + u16 width_in_block; + u16 height_in_block; + u16 depth; + u16 level; + u16 layer; + u8 border; + u8 reserved; + u32 pitch_in_block; + }; + + struct texture_memory_info + { + int element_size; + int block_length; + bool require_swap; + bool require_deswizzle; + }; + + struct texture_uploader_capabilities + { + bool supports_byteswap; + bool supports_vtc_decoding; + bool supports_hw_deswizzle; + size_t alignment; + }; + + /** + * Get size to store texture in a linear fashion. + * Storage is assumed to use a rowPitchAlignment boundary for every row of texture. + */ + size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignment, size_t mipmap_alignment); + size_t get_placed_texture_storage_size(const rsx::fragment_texture &texture, size_t row_pitch_alignment, size_t mipmap_alignment = 0x200); + size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_t row_pitch_alignment, size_t mipmap_alignment = 0x200); + + /** + * get all rsx::subresource_layout for texture. + * The subresources are ordered per layer then per mipmap level (as in rsx memory). + */ + std::vector get_subresources_layout(const rsx::fragment_texture &texture); + std::vector get_subresources_layout(const rsx::vertex_texture &texture); + + texture_memory_info upload_texture_subresource(gsl::span dst_buffer, const subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps); + + u8 get_format_block_size_in_bytes(int format); + u8 get_format_block_size_in_texel(int format); + u8 get_format_block_size_in_bytes(rsx::surface_color_format format); + u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format); + + u8 get_format_sample_count(rsx::surface_antialiasing antialias); + u32 get_max_depth_value(rsx::surface_depth_format2 format); + bool is_depth_stencil_format(rsx::surface_depth_format2 format); + + /** + * Returns number of texel rows encoded in one pitch-length line of bytes + */ + u8 get_format_texel_rows_per_line(u32 format); + + /** + * Get number of bytes occupied by texture in RSX mem + */ + size_t get_texture_size(const rsx::fragment_texture &texture); + size_t get_texture_size(const rsx::vertex_texture &texture); + + /** + * Get packed pitch + */ + u32 get_format_packed_pitch(u32 format, u16 width, bool border = false, bool swizzled = false); + + /** + * Reverse encoding + */ + u32 get_remap_encoding(const std::pair, std::array>& remap); + + /** + * Get gcm texel layout. Returns + */ + std::pair get_compatible_gcm_format(rsx::surface_color_format format); + std::pair get_compatible_gcm_format(rsx::surface_depth_format2 format); + + format_class classify_format(rsx::surface_depth_format2 format); + format_class classify_format(u32 gcm_format); } - -struct rsx_subresource_layout -{ - gsl::span data; - u16 width_in_texel; - u16 height_in_texel; - u16 width_in_block; - u16 height_in_block; - u16 depth; - u16 level; - u16 layer; - u8 border; - u8 reserved; - u32 pitch_in_block; -}; - -struct texture_memory_info -{ - int element_size; - int block_length; - bool require_swap; - bool require_deswizzle; -}; - -struct texture_uploader_capabilities -{ - bool supports_byteswap; - bool supports_vtc_decoding; - bool supports_hw_deswizzle; - size_t alignment; -}; - -/** -* Get size to store texture in a linear fashion. -* Storage is assumed to use a rowPitchAlignment boundary for every row of texture. -*/ -size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignment, size_t mipmap_alignment); -size_t get_placed_texture_storage_size(const rsx::fragment_texture &texture, size_t row_pitch_alignment, size_t mipmap_alignment = 0x200); -size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_t row_pitch_alignment, size_t mipmap_alignment = 0x200); - -/** - * get all rsx_subresource_layout for texture. - * The subresources are ordered per layer then per mipmap level (as in rsx memory). - */ -std::vector get_subresources_layout(const rsx::fragment_texture &texture); -std::vector get_subresources_layout(const rsx::vertex_texture &texture); - -texture_memory_info upload_texture_subresource(gsl::span dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, const texture_uploader_capabilities& caps); - -u8 get_format_block_size_in_bytes(int format); -u8 get_format_block_size_in_texel(int format); -u8 get_format_block_size_in_bytes(rsx::surface_color_format format); -u8 get_format_block_size_in_bytes(rsx::surface_depth_format2 format); - -u8 get_format_sample_count(rsx::surface_antialiasing antialias); -u32 get_max_depth_value(rsx::surface_depth_format2 format); -bool is_depth_stencil_format(rsx::surface_depth_format2 format); - -/** - * Returns number of texel rows encoded in one pitch-length line of bytes - */ -u8 get_format_texel_rows_per_line(u32 format); - -/** -* Get number of bytes occupied by texture in RSX mem -*/ -size_t get_texture_size(const rsx::fragment_texture &texture); -size_t get_texture_size(const rsx::vertex_texture &texture); - -/** -* Get packed pitch -*/ -u32 get_format_packed_pitch(u32 format, u16 width, bool border = false, bool swizzled = false); - -/** -* Reverse encoding -*/ -u32 get_remap_encoding(const std::pair, std::array>& remap); - -/** - * Get gcm texel layout. Returns - */ -std::pair get_compatible_gcm_format(rsx::surface_color_format format); -std::pair get_compatible_gcm_format(rsx::surface_depth_format2 format); diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index cb13937bed..7d1ac4e1cb 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -135,8 +135,6 @@ namespace rsx u8 samples_x = 1; u8 samples_y = 1; - format_type format_class = format_type::color; - std::unique_ptr::type> resolve_surface; surface_sample_layout sample_layout = surface_sample_layout::null; surface_raster_type raster_type = surface_raster_type::linear; @@ -266,15 +264,6 @@ namespace rsx void set_format(rsx::surface_depth_format2 format) { format_info.gcm_depth_format = format; - - if (format >= rsx::surface_depth_format2::z16_float) - { - format_class = rsx::format_type::depth_float; - } - else - { - format_class = rsx::format_type::depth_uint; - } } rsx::surface_color_format get_surface_color_format() const @@ -287,11 +276,6 @@ namespace rsx return format_info.gcm_depth_format; } - rsx::format_type get_format_type() const - { - return format_class; - } - bool dirty() const { return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty(); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 65a0f6fbc0..bdd64117a7 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -169,7 +169,7 @@ namespace rsx sampled_image_descriptor() = default; - sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, format_type ftype, + sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, rsx::format_class ftype, size2f scale, rsx::texture_dimension_extended type, bool cyclic_reference = false) { image_handle = handle; @@ -183,7 +183,7 @@ namespace rsx sampled_image_descriptor(image_resource_type external_handle, deferred_request_command reason, const image_section_attributes_t& attr, position2u src_offset, - texture_upload_context ctx, format_type ftype, size2f scale, + texture_upload_context ctx, rsx::format_class ftype, size2f scale, rsx::texture_dimension_extended type, const texture_channel_remap_t& remap) { external_subresource_desc = { external_handle, reason, attr, src_offset, remap }; @@ -320,7 +320,7 @@ namespace rsx virtual section_storage_type* create_new_texture(commandbuffer_type&, const address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, bool swizzled, texture_create_flags flags) = 0; virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, const address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, texture_upload_context context, - const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0; + const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0; virtual section_storage_type* create_nul_section(commandbuffer_type&, const address_range &rsx_range, bool memory_load) = 0; virtual void enforce_surface_creation_type(section_storage_type& section, u32 gcm_format, texture_create_flags expected) = 0; virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex) = 0; @@ -1516,7 +1516,7 @@ namespace rsx // Most mesh textures are stored as compressed to make the most of the limited memory if (auto cached_texture = find_texture_from_dimensions(attr.address, attr.gcm_format, attr.width, attr.height, attr.depth)) { - return{ cached_texture->get_view(encoded_remap, remap), cached_texture->get_context(), cached_texture->get_format_type(), scale, cached_texture->get_image_type() }; + return{ cached_texture->get_view(encoded_remap, remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() }; } } else @@ -1605,7 +1605,7 @@ namespace rsx continue; } - return{ cached_texture->get_view(encoded_remap, remap), cached_texture->get_context(), cached_texture->get_format_type(), scale, cached_texture->get_image_type() }; + return{ cached_texture->get_view(encoded_remap, remap), cached_texture->get_context(), cached_texture->get_format_class(), scale, cached_texture->get_image_type() }; } } @@ -1677,7 +1677,7 @@ namespace rsx new_attr.gcm_format = gcm_format; return { last->get_raw_texture(), deferred_request_command::copy_image_static, new_attr, {}, - last->get_context(), helpers::get_format_class(gcm_format), scale, extended_dimension, remap }; + last->get_context(), classify_format(gcm_format), scale, extended_dimension, remap }; } } @@ -1936,7 +1936,7 @@ namespace rsx // Do direct upload from CPU as the last resort const auto subresources_layout = get_subresources_layout(tex); - const auto format_class = helpers::get_format_class(attributes.gcm_format); + const auto format_class = classify_format(attributes.gcm_format); if (!tex_size) { @@ -2544,8 +2544,8 @@ namespace rsx image_height = src_h; } - std::vector subresource_layout; - rsx_subresource_layout subres = {}; + std::vector subresource_layout; + rsx::subresource_layout subres = {}; subres.width_in_block = subres.width_in_texel = image_width; subres.height_in_block = subres.height_in_texel = image_height; subres.pitch_in_block = full_width; @@ -2676,8 +2676,8 @@ namespace rsx utils::memory_protect(vm::base(prot_range.start), prot_range.length(), utils::protection::no); const u16 pitch_in_block = dst.pitch / dst_bpp; - std::vector subresource_layout; - rsx_subresource_layout subres = {}; + std::vector subresource_layout; + rsx::subresource_layout subres = {}; subres.width_in_block = subres.width_in_texel = dst_dimensions.width; subres.height_in_block = subres.height_in_texel = dst_dimensions.height; subres.pitch_in_block = pitch_in_block; diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index 47620e1ea1..4ba34aeaeb 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -164,21 +164,6 @@ namespace rsx } } - static inline format_type get_format_class(u32 gcm_format) - { - switch (gcm_format) - { - default: - return format_type::color; - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH24_D8: - return format_type::depth_uint; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - return format_type::depth_float; - } - } - static blit_target_properties get_optimal_blit_target_properties( bool src_is_render_target, address_range dst_range, @@ -556,7 +541,7 @@ namespace rsx const auto scaled_w = rsx::apply_resolution_scale(attr2.width, true); const auto scaled_h = rsx::apply_resolution_scale(attr2.height, true); - const auto format_class = (force_convert) ? get_format_class(attr2.gcm_format) : texptr->get_format_type(); + const auto format_class = (force_convert) ? classify_format(attr2.gcm_format) : texptr->format_class(); const auto command = surface_is_rop_target ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; attr2.width = scaled_w; @@ -568,7 +553,7 @@ namespace rsx } return{ texptr->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage, - texptr->get_format_type(), scale, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target }; + texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target }; } const auto scaled_w = rsx::apply_resolution_scale(attr2.width, true); @@ -578,7 +563,7 @@ namespace rsx { return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::_3d_unwrap, attr2, {}, - texture_upload_context::framebuffer_storage, texptr->get_format_type(), scale, + texture_upload_context::framebuffer_storage, texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; } @@ -589,7 +574,7 @@ namespace rsx return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::cubemap_unwrap, attr2, {}, - texture_upload_context::framebuffer_storage, texptr->get_format_type(), scale, + texture_upload_context::framebuffer_storage, texptr->format_class(), scale, rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; } @@ -659,7 +644,7 @@ namespace rsx // If this method was called, there is no easy solution, likely means atlas gather is needed auto scaled_w = rsx::apply_resolution_scale(attr2.width, true); auto scaled_h = rsx::apply_resolution_scale(attr2.height, true); - const auto format_class = get_format_class(attr2.gcm_format); + const auto format_class = classify_format(attr2.gcm_format); if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap) { diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index ca99e7fea6..5a713c6526 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -1673,21 +1673,9 @@ namespace rsx return sync_timestamp; } - format_type get_format_type() const + rsx::format_class get_format_class() const { - switch (gcm_format) - { - default: - return format_type::color; - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH24_D8: - return format_type::depth_uint; - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - return format_type::depth_float; - case 0: - fmt::throw_exception("Unreachable" HERE); - } + return classify_format(gcm_format); } /** diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 1424c5d0df..d6cf4cd9f4 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -40,6 +40,9 @@ inline static void _SelectTexture(int unit) { glActiveTexture(GL_TEXTURE0 + unit); } +//using enum rsx::format_class; +using namespace ::rsx::format_class_; + namespace gl { //Function call wrapped in ARB_DSA vs EXT_DSA compat check @@ -1516,18 +1519,6 @@ namespace gl ref_to_texture = GL_COMPARE_REF_TO_TEXTURE }; - enum class compare_func - { - never = GL_NEVER, - less = GL_LESS, - equal = GL_EQUAL, - lequal = GL_LEQUAL, - greater = GL_GREATER, - notequal = GL_NOTEQUAL, - gequal = GL_GEQUAL, - always = GL_ALWAYS - }; - enum class target { texture1D = GL_TEXTURE_1D, @@ -1537,25 +1528,6 @@ namespace gl textureBuffer = GL_TEXTURE_BUFFER }; - enum class channel_type - { - none = GL_NONE, - signed_normalized = GL_SIGNED_NORMALIZED, - unsigned_normalized = GL_UNSIGNED_NORMALIZED, - float_ = GL_FLOAT, - int_ = GL_INT, - uint_ = GL_UNSIGNED_INT - }; - - enum class channel_name - { - red = GL_TEXTURE_RED_TYPE, - green = GL_TEXTURE_GREEN_TYPE, - blue = GL_TEXTURE_BLUE_TYPE, - alpha = GL_TEXTURE_ALPHA_TYPE, - depth = GL_TEXTURE_DEPTH_TYPE - }; - protected: GLuint m_id = GL_NONE; GLuint m_width = 0; @@ -1570,6 +1542,8 @@ namespace gl internal_format m_internal_format = internal_format::rgba8; std::array m_component_layout; + rsx::format_class m_format_class = RSX_FORMAT_CLASS_UNDEFINED; + private: class save_binding_state { @@ -1612,7 +1586,8 @@ namespace gl texture(const texture&) = delete; texture(texture&& texture_) = delete; - texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format) + texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format, + rsx::format_class format_class = rsx::RSX_FORMAT_CLASS_UNDEFINED) { save_binding_state save(target); glGenTextures(1, &m_id); @@ -1705,9 +1680,22 @@ namespace gl } } + if (format_class == RSX_FORMAT_CLASS_UNDEFINED) + { + if (m_aspect_flags != image_aspect::color) + { + rsx_log.error("Undefined format class for depth texture is not allowed"); + } + else + { + format_class = RSX_FORMAT_CLASS_COLOR; + } + } + m_target = static_cast(target); m_internal_format = static_cast(sized_format); m_component_layout = { GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; + m_format_class = format_class; } virtual ~texture() @@ -1791,6 +1779,11 @@ namespace gl return m_aspect_flags; } + rsx::format_class format_class() const + { + return m_format_class; + } + sizeu size2D() const { return{ m_width, m_height }; diff --git a/rpcs3/Emu/RSX/GL/GLPresent.cpp b/rpcs3/Emu/RSX/GL/GLPresent.cpp index 99c7c5d25f..4b81db2987 100644 --- a/rpcs3/Emu/RSX/GL/GLPresent.cpp +++ b/rpcs3/Emu/RSX/GL/GLPresent.cpp @@ -10,7 +10,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons // Check the surface store first gl::command_context cmd = { gl_state }; - const auto format_bpp = get_format_block_size_in_bytes(info->format); + const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format); const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 06be7cab72..27bacf47f6 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -458,7 +458,7 @@ void gl::render_target::load_memory(gl::command_context& cmd) get_compatible_gcm_format(format_info.gcm_depth_format).first : get_compatible_gcm_format(format_info.gcm_color_format).first; - rsx_subresource_layout subres{}; + rsx::subresource_layout subres{}; subres.width_in_block = subres.width_in_texel = surface_width * samples_x; subres.height_in_block = subres.height_in_texel = surface_height * samples_y; subres.pitch_in_block = rsx_pitch / get_bpp(); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 00d674a504..ae173dfc78 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -480,9 +480,9 @@ namespace gl } void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth, - const std::vector &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector& staging_buffer) + const std::vector &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector& staging_buffer) { - texture_uploader_capabilities caps{ true, false, false, 4 }; + rsx::texture_uploader_capabilities caps{ true, false, false, 4 }; pixel_unpack_settings unpack_settings; unpack_settings.row_length(0).alignment(4); @@ -496,7 +496,7 @@ namespace gl const GLsizei format_block_size = (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16; - for (const rsx_subresource_layout& layout : input_layouts) + for (const rsx::subresource_layout& layout : input_layouts) { upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps); const sizei image_size{ align(layout.width_in_texel, 4), align(layout.height_in_texel, 4) }; @@ -561,7 +561,7 @@ namespace gl unpack_settings.apply(); } - for (const rsx_subresource_layout& layout : input_layouts) + for (const rsx::subresource_layout& layout : input_layouts) { auto op = upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps); if (apply_settings) @@ -627,7 +627,7 @@ namespace gl } void upload_texture(GLuint id, u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type, - const std::vector& subresources_layout) + const std::vector& subresources_layout) { GLenum target; switch (type) @@ -652,7 +652,7 @@ namespace gl // The rest of sampler state is now handled by sampler state objects // Calculate staging buffer size - const u32 aligned_pitch = align(width * get_format_block_size_in_bytes(gcm_format), 4); + const u32 aligned_pitch = align(width * rsx::get_format_block_size_in_bytes(gcm_format), 4); size_t texture_data_sz = depth * height * aligned_pitch; std::vector data_upload_buf(texture_data_sz); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index d6a9224ee3..86c35e34b9 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -44,7 +44,7 @@ namespace gl * static_state - set up the texture without consideration for sampler state (useful for vertex textures which have no real sampler state on RSX) */ void upload_texture(GLuint id, u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type, - const std::vector& subresources_layout); + const std::vector& subresources_layout); class sampler_state { diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index df7101e802..69dee1d630 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -895,7 +895,7 @@ namespace gl } cached_texture_section* upload_image_from_cpu(gl::command_context &cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, - rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override + rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override { auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, input_swizzled, rsx::texture_create_flags::default_component_order); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 31ed7cc294..baa1f0774a 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1830,7 +1830,7 @@ namespace rsx if (raw_format & CELL_GCM_TEXTURE_UN) result.unnormalized_coords |= (1 << i); - if (sampler_descriptors[i]->format_class != format_type::color) + if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR) { switch (format) { @@ -1844,7 +1844,7 @@ namespace rsx { // Reading depth data as XRGB8 is supported with in-shader conversion // TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that) - u32 control_bits = sampler_descriptors[i]->format_class == format_type::depth_float? (1u << 16) : 0u; + u32 control_bits = sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32? (1u << 16) : 0u; control_bits |= tex.remap() & 0xFFFF; result.redirected_textures |= (1 << i); result.texture_scale[i][2] = std::bit_cast(control_bits); diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 6416e92770..7a06736de7 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -198,7 +198,7 @@ void VKGSRender::load_texture_env() // Check if non-point filtering can even be used on this format bool can_sample_linear; - if (sampler_state->format_class == rsx::format_type::color) [[likely]] + if (sampler_state->format_class == RSX_FORMAT_CLASS_COLOR) [[likely]] { // Most PS3-like formats can be linearly filtered without problem can_sample_linear = true; diff --git a/rpcs3/Emu/RSX/VK/VKFormats.cpp b/rpcs3/Emu/RSX/VK/VKFormats.cpp index 42d58b9874..55f8e61eb3 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.cpp +++ b/rpcs3/Emu/RSX/VK/VKFormats.cpp @@ -487,4 +487,16 @@ namespace vk return false; } + + bool formats_are_bitcast_compatible(image* image1, image* image2) + { + if (const u32 transfer_class = image1->format_class() | image2->format_class(); + transfer_class & RSX_FORMAT_CLASS_DEPTH_FLOAT_MASK) + { + // If any one of the two images is a depth float, the other must match exactly or bust + return (image1->format_class() == image2->format_class()); + } + + return formats_are_bitcast_compatible(image1->format(), image2->format()); + } } diff --git a/rpcs3/Emu/RSX/VK/VKFormats.h b/rpcs3/Emu/RSX/VK/VKFormats.h index 43119e397a..3e78e8e8aa 100644 --- a/rpcs3/Emu/RSX/VK/VKFormats.h +++ b/rpcs3/Emu/RSX/VK/VKFormats.h @@ -20,6 +20,7 @@ namespace vk std::pair get_format_element_size(VkFormat format); std::pair get_format_convert_flags(VkFormat format); bool formats_are_bitcast_compatible(VkFormat format1, VkFormat format2); + bool formats_are_bitcast_compatible(image* image1, image* image2); minification_filter get_min_filter(rsx::texture_minify_filter min_filter); VkFilter get_mag_filter(rsx::texture_magnify_filter mag_filter); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index f63a709b69..2743d9ab3a 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -344,7 +344,7 @@ namespace vk return tex->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector); } - vk::image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height) + vk::image* get_typeless_helper(VkFormat format, rsx::format_class format_class, u32 requested_width, u32 requested_height) { auto create_texture = [&]() { @@ -356,7 +356,9 @@ namespace vk VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0); }; - auto& ptr = g_typeless_textures[+format]; + const u32 key = (format_class << 24u) | format; + auto& ptr = g_typeless_textures[key]; + if (!ptr || ptr->width() < requested_width || ptr->height() < requested_height) { if (ptr) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 66560503c3..40762f6e39 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -36,6 +36,9 @@ #define FRAME_PRESENT_TIMEOUT 10000000ull // 10 seconds #define GENERAL_WAIT_TIMEOUT 2000000ull // 2 seconds +//using enum rsx::format_class; +using namespace ::rsx::format_class_; + namespace rsx { class fragment_texture; @@ -145,7 +148,7 @@ namespace vk VkSampler null_sampler(); image_view* null_image_view(vk::command_buffer&, VkImageViewType type); - image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height); + image* get_typeless_helper(VkFormat format, rsx::format_class format_class, u32 requested_width, u32 requested_height); buffer* get_scratch_buffer(u32 min_required_size = 0); data_heap* get_upload_heap(); @@ -180,7 +183,7 @@ namespace vk * dst_image must be in TRANSFER_DST_OPTIMAL layout and upload_buffer have TRANSFER_SRC_BIT usage flag. */ void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, - const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, + const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align = 0); //Other texture management helpers @@ -192,16 +195,15 @@ namespace vk void copy_buffer_to_image(VkCommandBuffer cmd, const vk::buffer* src, const vk::image* dst, const VkBufferImageCopy& region); void copy_image_typeless(const command_buffer &cmd, image *src, image *dst, const areai& src_rect, const areai& dst_rect, - u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, - VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF); + u32 mipmaps, VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF); - void copy_image(VkCommandBuffer cmd, VkImage src, VkImage dst, VkImageLayout srcLayout, VkImageLayout dstLayout, - const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, + void copy_image(const vk::command_buffer& cmd, vk::image* src, vk::image* dst, + const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF); - void copy_scaled_image(VkCommandBuffer cmd, VkImage src, VkImage dst, VkImageLayout srcLayout, VkImageLayout dstLayout, - const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats, - VkFilter filter = VK_FILTER_LINEAR, VkFormat src_format = VK_FORMAT_UNDEFINED, VkFormat dst_format = VK_FORMAT_UNDEFINED); + void copy_scaled_image(const vk::command_buffer& cmd, vk::image* src, vk::image* dst, + const areai& src_rect, const areai& dst_rect, u32 mipmaps, + bool compatible_formats, VkFilter filter = VK_FILTER_LINEAR); std::pair get_compatible_surface_format(rsx::surface_color_format color_format); @@ -1385,6 +1387,7 @@ private: std::stack m_layout_stack; VkImageAspectFlags m_storage_aspect = 0; + rsx::format_class m_format_class = RSX_FORMAT_CLASS_UNDEFINED; public: VkImage value = VK_NULL_HANDLE; VkComponentMapping native_component_map = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; @@ -1403,7 +1406,8 @@ private: VkImageLayout initial_layout, VkImageTiling tiling, VkImageUsageFlags usage, - VkImageCreateFlags image_flags) + VkImageCreateFlags image_flags, + rsx::format_class format_class = RSX_FORMAT_CLASS_UNDEFINED) : current_layout(initial_layout) , m_device(dev) { @@ -1437,6 +1441,20 @@ private: CHECK_RESULT(vkBindImageMemory(m_device, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset())); m_storage_aspect = get_aspect_flags(format); + + if (format_class == RSX_FORMAT_CLASS_UNDEFINED) + { + if (m_storage_aspect != VK_IMAGE_ASPECT_COLOR_BIT) + { + rsx_log.error("Depth/stencil textures must have format class explicitly declared"); + } + else + { + format_class = RSX_FORMAT_CLASS_COLOR; + } + } + + m_format_class = format_class; } // TODO: Ctor that uses a provided memory heap @@ -1489,6 +1507,11 @@ private: return m_storage_aspect; } + rsx::format_class format_class() const + { + return m_format_class; + } + void push_layout(VkCommandBuffer cmd, VkImageLayout layout) { m_layout_stack.push(current_layout); diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp index 2dd874f27e..8daabdeb06 100644 --- a/rpcs3/Emu/RSX/VK/VKPresent.cpp +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -275,7 +275,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const vk::image* image_to_flip = nullptr; // Check the surface store first - const auto format_bpp = get_format_block_size_in_bytes(info->format); + const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format); const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read); @@ -601,8 +601,26 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) if (calibration_src.empty()) [[likely]] { - vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, image_to_flip->current_layout, target_layout, - { 0, 0, static_cast(buffer_width), static_cast(buffer_height) }, aspect_ratio, 1, VK_IMAGE_ASPECT_COLOR_BIT, false); + // Do raw transfer here as there is no image object associated with textures owned by the driver (TODO) + const areai dst_rect = aspect_ratio; + VkImageBlit rgn = {}; + + rgn.srcSubresource = { image_to_flip->aspect(), 0, 0, 1 }; + rgn.dstSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; + rgn.srcOffsets[0] = { 0, 0, 0 }; + rgn.srcOffsets[1] = { s32(buffer_width), s32(buffer_height), 1 }; + rgn.dstOffsets[0] = { dst_rect.x1, dst_rect.y1, 0 }; + rgn.dstOffsets[1] = { dst_rect.x2, dst_rect.y2, 1 }; + + image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + if (target_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + { + vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + + vkCmdBlitImage(*m_current_command_buffer, image_to_flip->value, image_to_flip->current_layout, target_image, target_layout, 1, &rgn, VK_FILTER_LINEAR); + image_to_flip->pop_layout(*m_current_command_buffer); } else { diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 6b7acd3d78..0cb87a4d3f 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -243,7 +243,7 @@ namespace vk gcm_format = get_compatible_gcm_format(format_info.gcm_color_format).first; } - rsx_subresource_layout subres{}; + rsx::subresource_layout subres{}; subres.width_in_block = subres.width_in_texel = surface_width * samples_x; subres.height_in_block = subres.height_in_texel = surface_height * samples_y; subres.pitch_in_block = rsx_pitch / get_bpp(); @@ -273,7 +273,7 @@ namespace vk } else { - content = vk::get_typeless_helper(format(), subres.width_in_block, subres.height_in_block); + content = vk::get_typeless_helper(format(), rsx::classify_format(gcm_format), subres.width_in_block, subres.height_in_block); content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } @@ -286,10 +286,10 @@ namespace vk // Avoid layout push/pop on scratch memory by setting explicit layout here content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - vk::copy_scaled_image(cmd, content->value, final_dst->value, content->current_layout, final_dst->current_layout, - { 0, 0, subres.width_in_block, subres.height_in_block }, { 0, 0, static_cast(final_dst->width()), static_cast(final_dst->height()) }, - 1, aspect(), true, aspect() == VK_IMAGE_ASPECT_COLOR_BIT ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, - format(), format()); + vk::copy_scaled_image(cmd, content, final_dst, + { 0, 0, subres.width_in_block, subres.height_in_block }, + { 0, 0, static_cast(final_dst->width()), static_cast(final_dst->height()) }, + 1, true, aspect() == VK_IMAGE_ASPECT_COLOR_BIT ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); } final_dst->pop_layout(cmd); @@ -670,7 +670,7 @@ namespace rsx VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, usage_flags, - 0); + 0, RSX_FORMAT_CLASS_COLOR); rtt->change_layout(cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); @@ -728,7 +728,7 @@ namespace rsx VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, usage_flags, - 0); + 0, rsx::classify_format(format)); ds->change_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 2225c5bc4d..d2a1f815fa 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -118,8 +118,7 @@ namespace vk { job = vk::get_compute_task>(); } - else if (auto dsv = dynamic_cast(src); - dsv && dsv->get_format_type() == rsx::format_type::depth_float) + else if (src->format_class() == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32) { job = vk::get_compute_task>(); } @@ -134,8 +133,7 @@ namespace vk { job = vk::get_compute_task>(); } - else if (auto dsv = dynamic_cast(src); - dsv && dsv->get_format_type() == rsx::format_type::depth_float) + else if (src->format_class() == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32) { job = vk::get_compute_task>(); } @@ -211,8 +209,7 @@ namespace vk { job = vk::get_compute_task(); } - else if (auto dsv = dynamic_cast(dst); - dsv && dsv->get_format_type() == rsx::format_type::depth_float) + else if (dst->format_class() == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32) { job = vk::get_compute_task>(); } @@ -241,11 +238,11 @@ namespace vk } void copy_image_typeless(const vk::command_buffer& cmd, vk::image* src, vk::image* dst, const areai& src_rect, const areai& dst_rect, - u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask) + u32 mipmaps, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask) { if (src->info.format == dst->info.format) { - copy_image(cmd, src->value, dst->value, src->current_layout, dst->current_layout, src_rect, dst_rect, mipmaps, src_aspect, dst_aspect, src_transfer_mask, dst_transfer_mask); + copy_image(cmd, src, dst, src_rect, dst_rect, mipmaps, src_transfer_mask, dst_transfer_mask); return; } @@ -267,11 +264,11 @@ namespace vk VkBufferImageCopy src_copy{}, dst_copy{}; src_copy.imageExtent = { u32(src_rect.x2 - src_rect.x1), u32(src_rect.y2 - src_rect.y1), 1 }; src_copy.imageOffset = { src_rect.x1, src_rect.y1, 0 }; - src_copy.imageSubresource = { src_aspect & src_transfer_mask, 0, 0, 1 }; + src_copy.imageSubresource = { src->aspect() & src_transfer_mask, 0, 0, 1 }; dst_copy.imageExtent = { u32(dst_rect.x2 - dst_rect.x1), u32(dst_rect.y2 - dst_rect.y1), 1 }; dst_copy.imageOffset = { dst_rect.x1, dst_rect.y1, 0 }; - dst_copy.imageSubresource = { dst_aspect & dst_transfer_mask, 0, 0, 1 }; + dst_copy.imageSubresource = { dst->aspect() & dst_transfer_mask, 0, 0, 1 }; const auto src_texel_size = vk::get_format_texel_width(src->info.format); const auto src_length = src_texel_size * src_copy.imageExtent.width * src_copy.imageExtent.height; @@ -346,21 +343,28 @@ namespace vk if (src != dst) dst->pop_layout(cmd); } - void copy_image(VkCommandBuffer cmd, VkImage src, VkImage dst, VkImageLayout srcLayout, VkImageLayout dstLayout, - const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect, + void copy_image(const vk::command_buffer& cmd, vk::image* src, vk::image* dst, + const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask) { // NOTE: src_aspect should match dst_aspect according to spec but drivers seem to work just fine with the mismatch // TODO: Implement separate pixel transfer for drivers that refuse this workaround + if ((src->aspect() & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 && + src->format() != dst->format()) + { + // Copying between depth formats must match + copy_image_typeless(cmd, src, dst, src_rect, dst_rect, mipmaps); + return; + } VkImageSubresourceLayers a_src = {}, a_dst = {}; - a_src.aspectMask = src_aspect & src_transfer_mask; + a_src.aspectMask = src->aspect() & src_transfer_mask; a_src.baseArrayLayer = 0; a_src.layerCount = 1; a_src.mipLevel = 0; a_dst = a_src; - a_dst.aspectMask = dst_aspect & dst_transfer_mask; + a_dst.aspectMask = dst->aspect() & dst_transfer_mask; VkImageCopy rgn = {}; rgn.extent.depth = 1; @@ -371,64 +375,61 @@ namespace vk rgn.srcSubresource = a_src; rgn.dstSubresource = a_dst; - auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - if (vk::is_renderpass_open(cmd)) { vk::end_renderpass(cmd); } - if (srcLayout != preferred_src_format) - change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); - - if (dstLayout != preferred_dst_format && src != dst) - change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); + if (src != dst) + { + src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + } + else + { + src->push_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { - vkCmdCopyImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, &rgn); + vkCmdCopyImage(cmd, src->value, src->current_layout, dst->value, dst->current_layout, 1, &rgn); rgn.srcSubresource.mipLevel++; rgn.dstSubresource.mipLevel++; } - if (srcLayout != preferred_src_format) - change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); - - if (dstLayout != preferred_dst_format && src != dst) - change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); + src->pop_layout(cmd); + if (src != dst) dst->pop_layout(cmd); } - void copy_scaled_image(VkCommandBuffer cmd, - VkImage src, VkImage dst, - VkImageLayout srcLayout, VkImageLayout dstLayout, - const areai& src_rect, const areai& dst_rect, - u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats, - VkFilter filter, VkFormat src_format, VkFormat dst_format) + void copy_scaled_image(const vk::command_buffer& cmd, + vk::image* src, vk::image* dst, + const areai& src_rect, const areai& dst_rect, u32 mipmaps, + bool compatible_formats, VkFilter filter) { VkImageSubresourceLayers a_src = {}, a_dst = {}; - a_src.aspectMask = aspect; + a_src.aspectMask = src->aspect(); a_src.baseArrayLayer = 0; a_src.layerCount = 1; a_src.mipLevel = 0; a_dst = a_src; - auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - if (vk::is_renderpass_open(cmd)) { vk::end_renderpass(cmd); } //TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions - if (srcLayout != preferred_src_format) - change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - - if (dstLayout != preferred_dst_format && src != dst) - change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + if (src != dst) + { + src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + } + else + { + src->push_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } if (compatible_formats && !src_rect.is_flipped() && !dst_rect.is_flipped() && src_rect.width() == dst_rect.width() && src_rect.height() == dst_rect.height()) @@ -436,24 +437,27 @@ namespace vk VkImageCopy copy_rgn; copy_rgn.srcOffset = { src_rect.x1, src_rect.y1, 0 }; copy_rgn.dstOffset = { dst_rect.x1, dst_rect.y1, 0 }; - copy_rgn.dstSubresource = { static_cast(aspect), 0, 0, 1 }; - copy_rgn.srcSubresource = { static_cast(aspect), 0, 0, 1 }; + copy_rgn.dstSubresource = { dst->aspect(), 0, 0, 1 }; + copy_rgn.srcSubresource = { src->aspect(), 0, 0, 1 }; copy_rgn.extent = { static_cast(src_rect.width()), static_cast(src_rect.height()), 1 }; - vkCmdCopyImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, ©_rgn); + vkCmdCopyImage(cmd, src->value, src->current_layout, dst->value, dst->current_layout, 1, ©_rgn); } - else if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) + else if ((src->aspect() & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { //Most depth/stencil formats cannot be scaled using hw blit - if (src_format == VK_FORMAT_UNDEFINED) + if (src->format() != dst->format()) { - rsx_log.error("Could not blit depth/stencil image. src_fmt=0x%x", static_cast(src_format)); + // Can happen because of depth float mismatch. Format width should be equal RSX-side + auto typeless = vk::get_typeless_helper(dst->format(), dst->format_class(), src_rect.width(), src_rect.height()); + copy_image_typeless(cmd, src, typeless, src_rect, src_rect, mipmaps); + copy_scaled_image(cmd, typeless, dst, src_rect, dst_rect, mipmaps, true, filter); } else { verify(HERE), !dst_rect.is_flipped(); - auto stretch_image_typeless_unsafe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless, + auto stretch_image_typeless_unsafe = [&cmd, filter](vk::image* src, vk::image* dst, vk::image* typeless, const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF) { const auto src_w = src_rect.width(); @@ -466,19 +470,16 @@ namespace vk // NOTE: Tranfers of single aspect D/S from Nvidia's D24S8 is very slow //1. Copy unscaled to typeless surface - copy_image(cmd, src, typeless, preferred_src_format, VK_IMAGE_LAYOUT_GENERAL, - src_rect, { 0, 0, src_w, src_h }, 1, aspect, VK_IMAGE_ASPECT_COLOR_BIT, transfer_flags, 0xFF); + copy_image(cmd, src, typeless, src_rect, { 0, 0, src_w, src_h }, 1, transfer_flags, 0xFF); //2. Blit typeless surface to self - copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, - { 0, 0, src_w, src_h }, { 0, src_h, dst_w, (src_h + dst_h) }, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter); + copy_scaled_image(cmd, typeless, typeless, { 0, 0, src_w, src_h }, { 0, src_h, dst_w, (src_h + dst_h) }, 1, true, filter); //3. Copy back the aspect bits - copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format, - {0, src_h, dst_w, (src_h + dst_h) }, dst_rect, 1, VK_IMAGE_ASPECT_COLOR_BIT, aspect, 0xFF, transfer_flags); + copy_image(cmd, typeless, dst, {0, src_h, dst_w, (src_h + dst_h) }, dst_rect, 1, 0xFF, transfer_flags); }; - auto stretch_image_typeless_safe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless, + auto stretch_image_typeless_safe = [&cmd, filter](vk::image* src, vk::image* dst, vk::image* typeless, const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF) { const auto src_w = src_rect.width(); @@ -494,27 +495,26 @@ namespace vk info.imageExtent = { static_cast(src_w), static_cast(src_h), 1 }; info.imageSubresource = { aspect & transfer_flags, 0, 0, 1 }; - vkCmdCopyImageToBuffer(cmd, src, preferred_src_format, scratch_buf->value, 1, &info); + vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, scratch_buf->value, 1, &info); insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); info.imageOffset = {}; info.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; - vkCmdCopyBufferToImage(cmd, scratch_buf->value, typeless, VK_IMAGE_LAYOUT_GENERAL, 1, &info); + vkCmdCopyBufferToImage(cmd, scratch_buf->value, typeless->value, VK_IMAGE_LAYOUT_GENERAL, 1, &info); //2. Blit typeless surface to self and apply transform if necessary areai src_rect2 = { 0, 0, src_w, src_h }; if (src_rect.x1 > src_rect.x2) src_rect2.flip_horizontal(); if (src_rect.y1 > src_rect.y2) src_rect2.flip_vertical(); - insert_image_memory_barrier(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + insert_image_memory_barrier(cmd, typeless->value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, - src_rect2, { 0, src_h, dst_w, (src_h + dst_h) }, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter); + copy_scaled_image(cmd, typeless, typeless, src_rect2, { 0, src_h, dst_w, (src_h + dst_h) }, 1, true, filter); - insert_image_memory_barrier(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + insert_image_memory_barrier(cmd, typeless->value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); @@ -523,31 +523,31 @@ namespace vk info.imageExtent = { static_cast(dst_w), static_cast(dst_h), 1 }; info.imageOffset = { 0, src_h, 0 }; - vkCmdCopyImageToBuffer(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL, scratch_buf->value, 1, &info); + vkCmdCopyImageToBuffer(cmd, typeless->value, VK_IMAGE_LAYOUT_GENERAL, scratch_buf->value, 1, &info); insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); info.imageOffset = { dst_rect.x1, dst_rect.y1, 0 }; info.imageSubresource = { aspect & transfer_flags, 0, 0, 1 }; - vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst, preferred_dst_format, 1, &info); + vkCmdCopyBufferToImage(cmd, scratch_buf->value, dst->value, dst->current_layout, 1, &info); }; const u32 typeless_w = std::max(dst_rect.width(), src_rect.width()); const u32 typeless_h = src_rect.height() + dst_rect.height(); - switch (src_format) + switch (src->format()) { case VK_FORMAT_D16_UNORM: { - auto typeless = vk::get_typeless_helper(VK_FORMAT_R16_UNORM, typeless_w, typeless_h); + auto typeless = vk::get_typeless_helper(VK_FORMAT_R16_UNORM, RSX_FORMAT_CLASS_COLOR, typeless_w, typeless_h); change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL); - stretch_image_typeless_unsafe(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT); + stretch_image_typeless_unsafe(src, dst, typeless, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT); break; } case VK_FORMAT_D32_SFLOAT: { - auto typeless = vk::get_typeless_helper(VK_FORMAT_R32_SFLOAT, typeless_w, typeless_h); + auto typeless = vk::get_typeless_helper(VK_FORMAT_R32_SFLOAT, RSX_FORMAT_CLASS_COLOR, typeless_w, typeless_h); change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL); - stretch_image_typeless_unsafe(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT); + stretch_image_typeless_unsafe(src, dst, typeless, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT); break; } case VK_FORMAT_D24_UNORM_S8_UINT: @@ -556,19 +556,19 @@ namespace vk if (const auto chip_family = vk::get_chip_family(); chip_family > vk::chip_class::NV_generic && chip_family < vk::chip_class::NV_turing) { - auto typeless = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, typeless_w, typeless_h); + auto typeless = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, RSX_FORMAT_CLASS_COLOR, typeless_w, typeless_h); change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL); - stretch_image_typeless_unsafe(src, dst, typeless->value, src_rect, dst_rect, depth_stencil); + stretch_image_typeless_unsafe(src, dst, typeless, src_rect, dst_rect, depth_stencil); } else { - auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, typeless_w, typeless_h); - auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UNORM, typeless_w, typeless_h); + auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, RSX_FORMAT_CLASS_COLOR, typeless_w, typeless_h); + auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UNORM, RSX_FORMAT_CLASS_COLOR, typeless_w, typeless_h); change_image_layout(cmd, typeless_depth, VK_IMAGE_LAYOUT_GENERAL); change_image_layout(cmd, typeless_stencil, VK_IMAGE_LAYOUT_GENERAL); - stretch_image_typeless_safe(src, dst, typeless_depth->value, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT); - stretch_image_typeless_safe(src, dst, typeless_stencil->value, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT); + stretch_image_typeless_safe(src, dst, typeless_depth, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT); + stretch_image_typeless_safe(src, dst, typeless_stencil, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT); } break; } @@ -581,14 +581,14 @@ namespace vk // NOTE: While it may seem intuitive to use R32_SFLOAT as the carrier for the depth aspect, this does not work properly // Floating point interpolation is non-linear from a bit-by-bit perspective and generates undesirable effects - auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, typeless_w, typeless_h); - auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UNORM, typeless_w, typeless_h); + auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, RSX_FORMAT_CLASS_COLOR, typeless_w, typeless_h); + auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UNORM, RSX_FORMAT_CLASS_COLOR, typeless_w, typeless_h); change_image_layout(cmd, typeless_depth, VK_IMAGE_LAYOUT_GENERAL); change_image_layout(cmd, typeless_stencil, VK_IMAGE_LAYOUT_GENERAL); const VkImageAspectFlags depth_stencil = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - stretch_image_typeless_safe(src, dst, typeless_depth->value, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT); - stretch_image_typeless_safe(src, dst, typeless_stencil->value, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT); + stretch_image_typeless_safe(src, dst, typeless_depth, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT); + stretch_image_typeless_safe(src, dst, typeless_stencil, src_rect, dst_rect, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT); break; } default: @@ -609,18 +609,15 @@ namespace vk for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { - vkCmdBlitImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, &rgn, filter); + vkCmdBlitImage(cmd, src->value, src->current_layout, dst->value, dst->current_layout, 1, &rgn, filter); rgn.srcSubresource.mipLevel++; rgn.dstSubresource.mipLevel++; } } - if (srcLayout != preferred_src_format) - change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - - if (dstLayout != preferred_dst_format && src != dst) - change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); + src->pop_layout(cmd); + if (src != dst) dst->pop_layout(cmd); } void gpu_deswizzle_sections_impl(VkCommandBuffer cmd, vk::buffer* scratch_buf, u32 dst_offset, int word_size, int word_count, bool swap_bytes, std::vector& sections) @@ -756,14 +753,14 @@ namespace vk } void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, - const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, + const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, VkImageAspectFlags flags, vk::data_heap &upload_heap, u32 heap_align) { - u32 block_in_pixel = get_format_block_size_in_texel(format); - u8 block_size_in_bytes = get_format_block_size_in_bytes(format); + u32 block_in_pixel = rsx::get_format_block_size_in_texel(format); + u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - texture_uploader_capabilities caps{ true, false, true, heap_align }; - texture_memory_info opt{}; + rsx::texture_uploader_capabilities caps{ true, false, true, heap_align }; + rsx::texture_memory_info opt{}; bool check_caps = true; vk::buffer* scratch_buf = nullptr; @@ -779,7 +776,7 @@ namespace vk vk::end_renderpass(cmd); } - for (const rsx_subresource_layout &layout : subresource_layout) + for (const rsx::subresource_layout &layout : subresource_layout) { if (!heap_align) [[likely]] { @@ -959,12 +956,11 @@ namespace vk // Final dimensions are a match if (xfer_info.src_is_typeless || xfer_info.dst_is_typeless) { - vk::copy_image_typeless(cmd, src, dst, src_area, dst_area, 1, src->aspect(), dst->aspect()); + vk::copy_image_typeless(cmd, src, dst, src_area, dst_area, 1); } else { - copy_image(cmd, src->value, dst->value, src->current_layout, dst->current_layout, - src_area, dst_area, 1, src->aspect(), dst->aspect()); + copy_image(cmd, src, dst, src_area, dst_area, 1); } return; @@ -983,11 +979,10 @@ namespace vk const auto aspect = vk::get_aspect_flags(format); // Transfer bits from src to typeless src - real_src = vk::get_typeless_helper(format, static_cast(internal_width), src->height()); + real_src = vk::get_typeless_helper(format, rsx::classify_format(xfer_info.src_gcm_format), static_cast(internal_width), src->height()); vk::change_image_layout(cmd, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect, 0, 1, 0, 1 }); - vk::copy_image_typeless(cmd, src, real_src, { 0, 0, static_cast(src->width()), static_cast(src->height()) }, { 0, 0, static_cast(internal_width), static_cast(src->height()) }, 1, - vk::get_aspect_flags(src->info.format), aspect); + vk::copy_image_typeless(cmd, src, real_src, { 0, 0, static_cast(src->width()), static_cast(src->height()) }, { 0, 0, static_cast(internal_width), static_cast(src->height()) }, 1); src_area.x1 = static_cast(src_area.x1 * xfer_info.src_scaling_hint); src_area.x2 = static_cast(src_area.x2 * xfer_info.src_scaling_hint); @@ -1006,11 +1001,10 @@ namespace vk const auto aspect = vk::get_aspect_flags(format); // Transfer bits from dst to typeless dst - real_dst = vk::get_typeless_helper(format, static_cast(internal_width), dst->height()); + real_dst = vk::get_typeless_helper(format, rsx::classify_format(xfer_info.dst_gcm_format), static_cast(internal_width), dst->height()); vk::change_image_layout(cmd, real_dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect, 0, 1, 0, 1 }); - vk::copy_image_typeless(cmd, dst, real_dst, { 0, 0, static_cast(dst->width()), static_cast(dst->height()) }, { 0, 0, static_cast(internal_width), static_cast(dst->height()) }, 1, - vk::get_aspect_flags(dst->info.format), aspect); + vk::copy_image_typeless(cmd, dst, real_dst, { 0, 0, static_cast(dst->width()), static_cast(dst->height()) }, { 0, 0, static_cast(internal_width), static_cast(dst->height()) }, 1); dst_area.x1 = static_cast(dst_area.x1 * xfer_info.dst_scaling_hint); dst_area.x2 = static_cast(dst_area.x2 * xfer_info.dst_scaling_hint); @@ -1048,15 +1042,14 @@ namespace vk verify("Incompatible source and destination format!" HERE), real_src->aspect() == real_dst->aspect(); - copy_scaled_image(cmd, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, - src_area, dst_area, 1, real_src->aspect(), real_src->info.format == real_dst->info.format, - interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, real_src->info.format, real_dst->info.format); + copy_scaled_image(cmd, real_src, real_dst, src_area, dst_area, 1, + formats_are_bitcast_compatible(real_src, real_dst), + interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); if (real_dst != dst) { auto internal_width = dst->width() * xfer_info.dst_scaling_hint; - vk::copy_image_typeless(cmd, real_dst, dst, { 0, 0, static_cast(internal_width), static_cast(dst->height()) }, { 0, 0, static_cast(dst->width()), static_cast(dst->height()) }, 1, - vk::get_aspect_flags(real_dst->info.format), vk::get_aspect_flags(dst->info.format)); + vk::copy_image_typeless(cmd, real_dst, dst, { 0, 0, static_cast(internal_width), static_cast(dst->height()) }, { 0, 0, static_cast(dst->width()), static_cast(dst->height()) }, 1); } } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index cb1b795c74..c52890e2a0 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -353,15 +353,16 @@ namespace vk if (transfer_width != locked_resource->width() || transfer_height != locked_resource->height()) { // TODO: Synchronize access to typeles textures - target = vk::get_typeless_helper(vram_texture->info.format, transfer_width, transfer_height); + target = vk::get_typeless_helper(vram_texture->format(), vram_texture->format_class(), transfer_width, transfer_height); target->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); // Allow bilinear filtering on color textures where compatibility is likely const auto filter = (target->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; - vk::copy_scaled_image(cmd, locked_resource->value, target->value, locked_resource->current_layout, target->current_layout, - { 0, 0, static_cast(locked_resource->width()), static_cast(locked_resource->height()) }, { 0, 0, static_cast(transfer_width), static_cast(transfer_height) }, - 1, target->aspect(), true, filter, vram_texture->format(), target->format()); + vk::copy_scaled_image(cmd, locked_resource, target, + { 0, 0, static_cast(locked_resource->width()), static_cast(locked_resource->height()) }, + { 0, 0, static_cast(transfer_width), static_cast(transfer_height) }, + 1, true, filter); target->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); } @@ -612,7 +613,7 @@ namespace vk continue; const bool typeless = section.src->aspect() != dst_aspect || - !formats_are_bitcast_compatible(dst->format(), section.src->format()); + !formats_are_bitcast_compatible(dst, section.src); // Avoid inserting unnecessary barrier GENERAL->TRANSFER_SRC->GENERAL in active render targets const auto preferred_layout = (section.src->current_layout != VK_IMAGE_LAYOUT_GENERAL) ? @@ -656,18 +657,18 @@ namespace vk // TODO: Handle level and layer offsets const areai src_rect = coordi{{ src_x, src_y }, { src_w, src_h }}; const areai dst_rect = coordi{{ section.dst_x, section.dst_y }, { section.dst_w, section.dst_h }}; - vk::copy_image_typeless(cmd, section.src, dst, src_rect, dst_rect, 1, section.src->aspect(), dst_aspect); + vk::copy_image_typeless(cmd, section.src, dst, src_rect, dst_rect, 1); section.src->pop_layout(cmd); continue; } - src_image = vk::get_typeless_helper(dst->info.format, convert_x + convert_w, src_y + src_h); + src_image = vk::get_typeless_helper(dst->format(), dst->format_class(), convert_x + convert_w, src_y + src_h); src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); const areai src_rect = coordi{{ src_x, src_y }, { src_w, src_h }}; const areai dst_rect = coordi{{ convert_x, src_y }, { convert_w, src_h }}; - vk::copy_image_typeless(cmd, section.src, src_image, src_rect, dst_rect, 1, section.src->aspect(), dst_aspect); + vk::copy_image_typeless(cmd, section.src, src_image, src_rect, dst_rect, 1); src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); src_x = convert_x; @@ -714,17 +715,17 @@ namespace vk else { // Either a bitcast is required or a scale+copy to mipmap level - _dst = vk::get_typeless_helper(src_image->info.format, dst->width(), dst->height() * 2); + _dst = vk::get_typeless_helper(src_image->format(), src_image->format_class(), dst->width(), dst->height() * 2); _dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } if (transform == rsx::surface_transform::identity) { - vk::copy_scaled_image(cmd, src_image->value, _dst->value, section.src->current_layout, _dst->current_layout, + vk::copy_scaled_image(cmd, src_image, _dst, coordi{ { src_x, src_y }, { src_w, src_h } }, coordi{ { section.dst_x, section.dst_y }, { section.dst_w, section.dst_h } }, - 1, src_image->aspect(), src_image->info.format == _dst->info.format, - VK_FILTER_NEAREST, src_image->info.format, _dst->info.format); + 1, src_image->format() == _dst->format(), + VK_FILTER_NEAREST); } else if (transform == rsx::surface_transform::argb_to_bgra) { @@ -746,7 +747,7 @@ namespace vk vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); - auto tmp = vk::get_typeless_helper(src_image->info.format, section.dst_x + section.dst_w, section.dst_y + section.dst_h); + auto tmp = vk::get_typeless_helper(src_image->format(), src_image->format_class(), section.dst_x + section.dst_w, section.dst_y + section.dst_h); tmp->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); copy.imageOffset = { 0, 0, 0 }; @@ -763,11 +764,11 @@ namespace vk dst_y = src_h; } - vk::copy_scaled_image(cmd, tmp->value, _dst->value, tmp->current_layout, _dst->current_layout, + vk::copy_scaled_image(cmd, tmp, _dst, areai{ 0, 0, src_w, static_cast(src_h) }, coordi{ { dst_x, dst_y }, { section.dst_w, section.dst_h } }, - 1, new_src_aspect, tmp->info.format == _dst->info.format, - VK_FILTER_NEAREST, tmp->info.format, _dst->info.format); + 1, tmp->info.format == _dst->info.format, + VK_FILTER_NEAREST); } else { @@ -890,7 +891,8 @@ namespace vk image_type, dst_format, w, h, d, mips, layers, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags); + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, image_flags, + rsx::classify_format(gcm_format)); } //This method is almost exclusively used to work on framebuffer resources @@ -910,7 +912,7 @@ namespace vk } image->set_native_component_layout(view_swizzle); - auto view = image->get_view(get_remap_encoding(remap_vector), remap_vector); + auto view = image->get_view(rsx::get_remap_encoding(remap_vector), remap_vector); if (copy) { @@ -1144,7 +1146,8 @@ namespace vk image_type, vk_format, width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0); + VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0, + rsx::classify_format(gcm_format)); image->native_component_map = apply_component_mapping_flags(gcm_format, flags, rsx::default_remap_vector); @@ -1209,7 +1212,7 @@ namespace vk } cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format, - rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override + rsx::texture_upload_context context, const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) override { auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled, rsx::texture_create_flags::default_component_order);