1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-26 04:32:35 +01:00

rsx: Tweaks

- Optimize get_surface_subresource
- Add check_program_status time to draw call setup statistics. It can slow down games significantly
This commit is contained in:
kd-11 2017-10-29 19:34:55 +03:00
parent f7063bb57b
commit 31b07f2c5c
4 changed files with 105 additions and 60 deletions

View File

@ -557,52 +557,63 @@ namespace rsx
if (surface_address > texaddr)
return false;
u32 offset = texaddr - surface_address;
if (texaddr >= surface_address)
const u32 offset = texaddr - surface_address;
if (offset == 0)
{
if (offset == 0)
{
is_subslice = true;
}
else
{
surface_format_info info;
Traits::get_surface_info(surface, &info);
*x = 0;
*y = 0;
return true;
}
else
{
surface_format_info info;
Traits::get_surface_info(surface, &info);
u32 range = info.rsx_pitch * info.surface_height;
if (double_height) range *= 2;
u32 range = info.rsx_pitch * info.surface_height;
if (double_height) range <<= 1;
if (offset < range)
if (offset < range)
{
const u32 y = (offset / info.rsx_pitch);
u32 x = (offset % info.rsx_pitch) / info.bpp;
if (scale_to_fit)
{
const u32 y = (offset / info.rsx_pitch);
u32 x = (offset % info.rsx_pitch) / info.bpp;
if (scale_to_fit)
{
const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch;
x = (u32)((f32)x / x_scale);
}
x_offset = x;
y_offset = y;
if (double_height) y_offset /= 2;
is_subslice = true;
const f32 x_scale = (f32)info.rsx_pitch / info.native_pitch;
x = (u32)((f32)x / x_scale);
}
}
if (is_subslice)
{
*x = x_offset;
*y = y_offset;
x_offset = x;
y_offset = y;
return true;
if (double_height) y_offset /= 2;
is_subslice = true;
}
}
if (is_subslice)
{
*x = x_offset;
*y = y_offset;
return true;
}
return false;
}
//Fast hit test
inline bool surface_overlaps_address_fast(surface_type surface, u32 surface_address, u32 texaddr)
{
if (surface_address > texaddr)
return false;
const u32 offset = texaddr - surface_address;
const u32 range = surface->get_rsx_pitch() * surface->get_surface_height();
return (offset < range);
}
bool address_is_bound(u32 address, bool is_depth) const
{
if (is_depth)
@ -629,7 +640,8 @@ namespace rsx
return true;
}
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool double_height = false)
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch,
bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool ignore_color_formats = false, bool double_height = false)
{
auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped)
{
@ -638,12 +650,6 @@ namespace rsx
surface_format_info info;
Traits::get_surface_info(surface, &info);
if (info.rsx_pitch != requested_pitch)
return false;
if (requested_width == 0 || requested_height == 0)
return true;
u16 real_width = requested_width;
if (scale_to_fit)
@ -696,26 +702,55 @@ namespace rsx
u16 w;
u16 h;
for (auto &tex_info : m_render_targets_storage)
if (!ignore_color_formats)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
for (auto &tex_info : m_render_targets_storage)
{
const u32 this_address = std::get<0>(tex_info);
if (texaddr < this_address)
continue;
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
surface = std::get<1>(tex_info).get();
if (surface->get_rsx_pitch() != requested_pitch)
continue;
if (requested_width == 0 || requested_height == 0)
{
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
continue;
else
return{ surface, 0, 0, 0, 0, false, false, false };
}
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
}
}
if (ignore_depth_formats)
return{};
//Check depth surfaces for overlap
for (auto &tex_info : m_depth_stencil_storage)
if (!ignore_depth_formats)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
//Check depth surfaces for overlap
for (auto &tex_info : m_depth_stencil_storage)
{
const u32 this_address = std::get<0>(tex_info);
if (texaddr < this_address)
continue;
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
surface = std::get<1>(tex_info).get();
if (surface->get_rsx_pitch() != requested_pitch)
continue;
if (requested_width == 0 || requested_height == 0)
{
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
continue;
else
return{ surface, 0, 0, 0, 0, false, true, false };
}
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
}
}
return{};

View File

@ -1087,7 +1087,7 @@ namespace rsx
}
//Check if src/dst are parts of render targets
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, dst.compressed_y);
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, false, dst.compressed_y);
dst_is_render_target = dst_subres.surface != nullptr;
if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch)
@ -1099,7 +1099,7 @@ namespace rsx
}
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, src.compressed_y);
auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, false, src.compressed_y);
src_is_render_target = src_subres.surface != nullptr;
if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch)

View File

@ -318,12 +318,17 @@ namespace
void GLGSRender::end()
{
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state())
{
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
if (manually_flush_ring_buffers)
{
//Use approximations to reseve space. This path is mostly for debug purposes anyway
@ -964,7 +969,7 @@ bool GLGSRender::check_program_state()
if (dirty_framebuffer)
return std::make_tuple(false, 0);
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0);

View File

@ -995,6 +995,8 @@ void VKGSRender::end()
return;
}
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
//Load program here since it is dependent on vertex state
if (!check_program_status())
{
@ -1003,14 +1005,17 @@ void VKGSRender::end()
return;
}
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
//Programs data is dependent on vertex state
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
std::chrono::time_point<steady_clock> vertex_start = state_check_end;
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
//Load program
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
std::chrono::time_point<steady_clock> program_start = vertex_end;
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
@ -1841,7 +1846,7 @@ bool VKGSRender::check_program_status()
if (dirty_framebuffer)
return std::make_tuple(false, 0);
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0);