mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-31 12:31:45 +01:00
rsx: Fix blit transfers when texel sizes mismatch
- Also refactors some bpp handling code - Simplify texture intersection test to use a normalized/uniform coordinate space - Fix broken bounds checking as well
This commit is contained in:
parent
b879b32271
commit
adc59f9810
@ -38,6 +38,16 @@ namespace rsx
|
||||
u16 dst_y = 0;
|
||||
u16 width = 0;
|
||||
u16 height = 0;
|
||||
|
||||
areai get_src_area() const
|
||||
{
|
||||
return coordi{ {src_x, src_y}, {width, height} };
|
||||
}
|
||||
|
||||
areai get_dst_area() const
|
||||
{
|
||||
return coordi{ {dst_x, dst_y}, {width, height} };
|
||||
}
|
||||
};
|
||||
|
||||
struct surface_format_info
|
||||
@ -89,6 +99,11 @@ namespace rsx
|
||||
virtual u16 get_native_pitch() const = 0;
|
||||
virtual bool is_depth_surface() const = 0;
|
||||
|
||||
u8 get_bpp() const
|
||||
{
|
||||
return u8(get_native_pitch() / get_surface_width());
|
||||
}
|
||||
|
||||
void save_aa_mode()
|
||||
{
|
||||
read_aa_mode = write_aa_mode;
|
||||
@ -890,7 +905,7 @@ namespace rsx
|
||||
}
|
||||
|
||||
template <typename commandbuffer_type>
|
||||
std::vector<surface_overlap_info> get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch)
|
||||
std::vector<surface_overlap_info> get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u8 required_bpp)
|
||||
{
|
||||
std::vector<surface_overlap_info> result;
|
||||
std::vector<std::pair<u32, bool>> dirty;
|
||||
@ -930,16 +945,26 @@ namespace rsx
|
||||
surface_format_info surface_info{};
|
||||
Traits::get_surface_info(surface, &surface_info);
|
||||
|
||||
if (this_address < texaddr)
|
||||
const auto normalized_surface_width = (surface_info.surface_width * scale_x * surface_info.bpp) / required_bpp;
|
||||
const auto normalized_surface_height = surface_info.surface_height * scale_y;
|
||||
|
||||
if (LIKELY(this_address >= texaddr))
|
||||
{
|
||||
const auto offset = this_address - texaddr;
|
||||
info.src_x = 0;
|
||||
info.src_y = 0;
|
||||
info.dst_y = (offset / required_pitch);
|
||||
info.dst_x = (offset % required_pitch) / required_bpp;
|
||||
info.width = std::min<u32>(normalized_surface_width, required_width - info.dst_x);
|
||||
info.height = std::min<u32>(normalized_surface_height, required_height - info.dst_y);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto int_required_width = required_width / scale_x;
|
||||
const auto int_required_height = required_height / scale_y;
|
||||
|
||||
const auto offset = texaddr - this_address;
|
||||
info.src_y = (offset / required_pitch) / scale_y;
|
||||
info.src_x = (offset % required_pitch) / surface_info.bpp / scale_x;
|
||||
info.src_y = (offset / required_pitch);
|
||||
info.src_x = (offset % required_pitch) / required_bpp;
|
||||
|
||||
if (UNLIKELY(info.src_x >= surface_info.surface_width || info.src_y >= surface_info.surface_height))
|
||||
if (UNLIKELY(info.src_x >= normalized_surface_width || info.src_y >= normalized_surface_height))
|
||||
{
|
||||
// Region lies outside the actual texture area, but inside the 'tile'
|
||||
// In this case, a small region lies to the top-left corner, partially occupying the target
|
||||
@ -948,31 +973,26 @@ namespace rsx
|
||||
|
||||
info.dst_x = 0;
|
||||
info.dst_y = 0;
|
||||
info.width = std::min<u32>(int_required_width, surface_info.surface_width - info.src_x);
|
||||
info.height = std::min<u32>(int_required_height, surface_info.surface_height - info.src_y);
|
||||
info.is_clipped = (info.width < int_required_width || info.height < int_required_height);
|
||||
info.width = std::min<u32>(required_width, normalized_surface_width - info.src_x);
|
||||
info.height = std::min<u32>(required_height, normalized_surface_height - info.src_y);
|
||||
}
|
||||
else
|
||||
|
||||
info.is_clipped = (info.width < required_width || info.height < required_height);
|
||||
|
||||
if (UNLIKELY(surface_info.bpp != required_bpp))
|
||||
{
|
||||
const auto int_surface_width = surface_info.surface_width * scale_x;
|
||||
const auto int_surface_height = surface_info.surface_height * scale_y;
|
||||
// Width is calculated in the coordinate-space of the requester; normalize
|
||||
info.src_x = (info.src_x * required_bpp) / surface_info.bpp;
|
||||
info.width = (info.width * required_bpp) / surface_info.bpp;
|
||||
}
|
||||
|
||||
const auto offset = this_address - texaddr;
|
||||
info.dst_y = (offset / required_pitch);
|
||||
info.dst_x = (offset % required_pitch) / surface_info.bpp;
|
||||
|
||||
if (UNLIKELY(info.dst_x >= int_surface_width || info.dst_y >= int_surface_height))
|
||||
{
|
||||
// False positive
|
||||
continue;
|
||||
}
|
||||
|
||||
info.src_x = 0;
|
||||
info.src_y = 0;
|
||||
info.width = std::min<u32>(int_surface_width, required_width - info.dst_x);
|
||||
info.height = std::min<u32>(int_surface_height, required_height - info.dst_y);
|
||||
info.is_clipped = (info.width < required_width || info.height < required_height);
|
||||
if (UNLIKELY(scale_x > 1))
|
||||
{
|
||||
info.src_x /= scale_x;
|
||||
info.dst_x /= scale_x;
|
||||
info.width /= scale_x;
|
||||
info.src_y /= scale_y;
|
||||
info.dst_y /= scale_y;
|
||||
info.height /= scale_y;
|
||||
}
|
||||
|
||||
|
@ -2124,7 +2124,8 @@ namespace rsx
|
||||
break;
|
||||
}
|
||||
|
||||
const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch);
|
||||
const auto bpp = get_format_block_size_in_bytes(format);
|
||||
const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp);
|
||||
|
||||
if (!overlapping_fbos.empty() || !overlapping_locals.empty())
|
||||
{
|
||||
@ -2266,9 +2267,9 @@ namespace rsx
|
||||
src_address += (src.width - src_w) * src_bpp;
|
||||
}
|
||||
|
||||
auto rtt_lookup = [&m_rtts, &cmd](u32 address, u32 width, u32 height, u32 pitch, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
|
||||
auto rtt_lookup = [&m_rtts, &cmd](u32 address, u32 width, u32 height, u32 pitch, u32 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
|
||||
{
|
||||
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch);
|
||||
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp);
|
||||
if (list.empty() || (list.back().is_clipped && !allow_clipped))
|
||||
{
|
||||
return {};
|
||||
@ -2278,11 +2279,11 @@ namespace rsx
|
||||
};
|
||||
|
||||
// Check if src/dst are parts of render targets
|
||||
auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false);
|
||||
auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false);
|
||||
dst_is_render_target = dst_subres.surface != nullptr;
|
||||
|
||||
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
|
||||
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true);
|
||||
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, true);
|
||||
src_is_render_target = src_subres.surface != nullptr;
|
||||
|
||||
// Always use GPU blit if src or dst is in the surface store
|
||||
@ -2319,7 +2320,7 @@ namespace rsx
|
||||
src_subres.surface->read_barrier(cmd);
|
||||
|
||||
const auto surf = src_subres.surface;
|
||||
auto bpp = surf->get_native_pitch() / surf->get_surface_width();
|
||||
const auto bpp = surf->get_bpp();
|
||||
if (bpp != src_bpp)
|
||||
{
|
||||
//Enable type scaling in src
|
||||
@ -2327,14 +2328,6 @@ namespace rsx
|
||||
typeless_info.src_is_depth = src_subres.is_depth;
|
||||
typeless_info.src_scaling_hint = (f32)bpp / src_bpp;
|
||||
typeless_info.src_gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
|
||||
|
||||
src_w = (u16)(src_w / typeless_info.src_scaling_hint);
|
||||
if (!src_subres.is_clipped)
|
||||
src_subres.width = (u16)(src_subres.width / typeless_info.src_scaling_hint);
|
||||
else
|
||||
src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true);
|
||||
|
||||
verify(HERE), src_subres.surface != nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2343,7 +2336,7 @@ namespace rsx
|
||||
// Full barrier is required in case of partial transfers
|
||||
dst_subres.surface->read_barrier(cmd);
|
||||
|
||||
auto bpp = dst_subres.surface->get_native_pitch() / dst_subres.surface->get_surface_width();
|
||||
auto bpp = dst_subres.surface->get_bpp();
|
||||
if (bpp != dst_bpp)
|
||||
{
|
||||
//Enable type scaling in dst
|
||||
@ -2351,14 +2344,6 @@ namespace rsx
|
||||
typeless_info.dst_is_depth = dst_subres.is_depth;
|
||||
typeless_info.dst_scaling_hint = (f32)bpp / dst_bpp;
|
||||
typeless_info.dst_gcm_format = dst_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5;
|
||||
|
||||
dst_w = (u16)(dst_w / typeless_info.dst_scaling_hint);
|
||||
if (!dst_subres.is_clipped)
|
||||
dst_subres.width = (u16)(dst_subres.width / typeless_info.dst_scaling_hint);
|
||||
else
|
||||
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false);
|
||||
|
||||
verify(HERE), dst_subres.surface != nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2379,7 +2364,7 @@ namespace rsx
|
||||
{
|
||||
// Optimizations table based on common width/height pairings. If we guess wrong, the upload resolver will fix it anyway
|
||||
// TODO: Add more entries based on empirical data
|
||||
if (LIKELY(dst.width == 1280))
|
||||
if (LIKELY(dst_dimensions.width == 1280))
|
||||
{
|
||||
dst_dimensions.height = std::max<s32>(dst.height, 720);
|
||||
}
|
||||
@ -2450,18 +2435,7 @@ namespace rsx
|
||||
else
|
||||
{
|
||||
// Destination dimensions are relaxed (true)
|
||||
dst_area.x1 = dst_subres.src_x;
|
||||
dst_area.y1 = dst_subres.src_y;
|
||||
dst_area.x2 += dst_subres.src_x;
|
||||
dst_area.y2 += dst_subres.src_y;
|
||||
|
||||
f32 scale_x = get_internal_scaling_x(dst_subres.surface);
|
||||
f32 scale_y = get_internal_scaling_y(dst_subres.surface);
|
||||
|
||||
dst_area.x1 = s32(scale_x * dst_area.x1);
|
||||
dst_area.x2 = s32(scale_x * dst_area.x2);
|
||||
dst_area.y1 = s32(scale_y * dst_area.y1);
|
||||
dst_area.y2 = s32(scale_y * dst_area.y2);
|
||||
dst_area = dst_subres.get_src_area();
|
||||
|
||||
dest_texture = dst_subres.surface->get_surface();
|
||||
typeless_info.dst_context = texture_upload_context::framebuffer_storage;
|
||||
@ -2585,27 +2559,7 @@ namespace rsx
|
||||
}
|
||||
else
|
||||
{
|
||||
if (LIKELY(!dst_is_render_target))
|
||||
{
|
||||
u16 src_subres_w = src_subres.width;
|
||||
u16 src_subres_h = src_subres.height;
|
||||
get_rsx_dimensions(src_subres_w, src_subres_h, src_subres.surface);
|
||||
|
||||
const int dst_width = (int)(src_subres_w * scale_x * typeless_info.src_scaling_hint);
|
||||
const int dst_height = (int)(src_subres_h * scale_y);
|
||||
|
||||
dst_area.x2 = dst_area.x1 + dst_width;
|
||||
dst_area.y2 = dst_area.y1 + dst_height;
|
||||
}
|
||||
|
||||
src_area.x2 = src_subres.width;
|
||||
src_area.y2 = src_subres.height;
|
||||
|
||||
src_area.x1 = src_subres.src_x;
|
||||
src_area.y1 = src_subres.src_y;
|
||||
src_area.x2 += src_subres.src_x;
|
||||
src_area.y2 += src_subres.src_y;
|
||||
|
||||
src_area = src_subres.get_src_area();
|
||||
vram_texture = src_subres.surface->get_surface();
|
||||
typeless_info.src_context = texture_upload_context::framebuffer_storage;
|
||||
}
|
||||
|
@ -1645,7 +1645,7 @@ void GLGSRender::flip(int buffer)
|
||||
else
|
||||
{
|
||||
gl::command_context cmd = { gl_state };
|
||||
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch);
|
||||
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp());
|
||||
|
||||
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
|
||||
{
|
||||
|
@ -613,8 +613,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
|
||||
return;
|
||||
}
|
||||
|
||||
auto src_bpp = src_texture->get_native_pitch() / src_texture->get_surface_width();
|
||||
auto dst_bpp = get_native_pitch() / get_surface_width();
|
||||
const auto src_bpp = src_texture->get_bpp();
|
||||
const auto dst_bpp = get_bpp();
|
||||
rsx::typeless_xfer typeless_info{};
|
||||
|
||||
const bool dst_is_depth = is_depth(get_internal_format());
|
||||
|
@ -217,7 +217,7 @@ struct gl_render_target_traits
|
||||
info->native_pitch = surface->get_native_pitch();
|
||||
info->surface_width = surface->get_surface_width();
|
||||
info->surface_height = surface->get_surface_height();
|
||||
info->bpp = static_cast<u8>(info->native_pitch / info->surface_width);
|
||||
info->bpp = surface->get_bpp();
|
||||
}
|
||||
|
||||
static void prepare_rtt_for_drawing(void *, gl::render_target *rtt) { rtt->reset_refs(); }
|
||||
|
@ -3288,7 +3288,7 @@ void VKGSRender::flip(int buffer)
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch);
|
||||
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp());
|
||||
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
|
||||
{
|
||||
// Confirmed to be the newest data source in that range
|
||||
|
@ -106,8 +106,8 @@ namespace vk
|
||||
return;
|
||||
}
|
||||
|
||||
auto src_bpp = src_texture->get_native_pitch() / src_texture->get_surface_width();
|
||||
auto dst_bpp = get_native_pitch() / get_surface_width();
|
||||
const auto src_bpp = src_texture->get_bpp();
|
||||
const auto dst_bpp = get_bpp();
|
||||
rsx::typeless_xfer typeless_info{};
|
||||
|
||||
const auto region = rsx::get_transferable_region(this);
|
||||
@ -259,7 +259,7 @@ namespace rsx
|
||||
info->native_pitch = surface->native_pitch;
|
||||
info->surface_width = surface->get_surface_width();
|
||||
info->surface_height = surface->get_surface_height();
|
||||
info->bpp = static_cast<u8>(info->native_pitch / info->surface_width);
|
||||
info->bpp = surface->get_bpp();
|
||||
}
|
||||
|
||||
static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface)
|
||||
|
Loading…
x
Reference in New Issue
Block a user