mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-26 04:32:35 +01:00
rsx: Tweaks
- Optimize get_surface_subresource - Add check_program_status time to draw call setup statistics. It can slow down games significantly
This commit is contained in:
parent
f7063bb57b
commit
31b07f2c5c
@ -557,12 +557,12 @@ namespace rsx
|
||||
if (surface_address > texaddr)
|
||||
return false;
|
||||
|
||||
u32 offset = texaddr - surface_address;
|
||||
if (texaddr >= surface_address)
|
||||
{
|
||||
const u32 offset = texaddr - surface_address;
|
||||
if (offset == 0)
|
||||
{
|
||||
is_subslice = true;
|
||||
*x = 0;
|
||||
*y = 0;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -570,7 +570,7 @@ namespace rsx
|
||||
Traits::get_surface_info(surface, &info);
|
||||
|
||||
u32 range = info.rsx_pitch * info.surface_height;
|
||||
if (double_height) range *= 2;
|
||||
if (double_height) range <<= 1;
|
||||
|
||||
if (offset < range)
|
||||
{
|
||||
@ -598,11 +598,22 @@ namespace rsx
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
//Fast hit test
|
||||
inline bool surface_overlaps_address_fast(surface_type surface, u32 surface_address, u32 texaddr)
|
||||
{
|
||||
if (surface_address > texaddr)
|
||||
return false;
|
||||
|
||||
const u32 offset = texaddr - surface_address;
|
||||
const u32 range = surface->get_rsx_pitch() * surface->get_surface_height();
|
||||
|
||||
return (offset < range);
|
||||
}
|
||||
|
||||
bool address_is_bound(u32 address, bool is_depth) const
|
||||
{
|
||||
if (is_depth)
|
||||
@ -629,7 +640,8 @@ namespace rsx
|
||||
return true;
|
||||
}
|
||||
|
||||
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool double_height = false)
|
||||
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch,
|
||||
bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool ignore_color_formats = false, bool double_height = false)
|
||||
{
|
||||
auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped)
|
||||
{
|
||||
@ -638,12 +650,6 @@ namespace rsx
|
||||
surface_format_info info;
|
||||
Traits::get_surface_info(surface, &info);
|
||||
|
||||
if (info.rsx_pitch != requested_pitch)
|
||||
return false;
|
||||
|
||||
if (requested_width == 0 || requested_height == 0)
|
||||
return true;
|
||||
|
||||
u16 real_width = requested_width;
|
||||
|
||||
if (scale_to_fit)
|
||||
@ -696,26 +702,55 @@ namespace rsx
|
||||
u16 w;
|
||||
u16 h;
|
||||
|
||||
if (!ignore_color_formats)
|
||||
{
|
||||
for (auto &tex_info : m_render_targets_storage)
|
||||
{
|
||||
u32 this_address = std::get<0>(tex_info);
|
||||
surface = std::get<1>(tex_info).get();
|
||||
const u32 this_address = std::get<0>(tex_info);
|
||||
if (texaddr < this_address)
|
||||
continue;
|
||||
|
||||
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
|
||||
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
|
||||
surface = std::get<1>(tex_info).get();
|
||||
if (surface->get_rsx_pitch() != requested_pitch)
|
||||
continue;
|
||||
|
||||
if (requested_width == 0 || requested_height == 0)
|
||||
{
|
||||
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
|
||||
continue;
|
||||
else
|
||||
return{ surface, 0, 0, 0, 0, false, false, false };
|
||||
}
|
||||
|
||||
if (ignore_depth_formats)
|
||||
return{};
|
||||
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
|
||||
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
|
||||
}
|
||||
}
|
||||
|
||||
if (!ignore_depth_formats)
|
||||
{
|
||||
//Check depth surfaces for overlap
|
||||
for (auto &tex_info : m_depth_stencil_storage)
|
||||
{
|
||||
u32 this_address = std::get<0>(tex_info);
|
||||
const u32 this_address = std::get<0>(tex_info);
|
||||
if (texaddr < this_address)
|
||||
continue;
|
||||
|
||||
surface = std::get<1>(tex_info).get();
|
||||
if (surface->get_rsx_pitch() != requested_pitch)
|
||||
continue;
|
||||
|
||||
if (requested_width == 0 || requested_height == 0)
|
||||
{
|
||||
if (!surface_overlaps_address_fast(surface, this_address, texaddr))
|
||||
continue;
|
||||
else
|
||||
return{ surface, 0, 0, 0, 0, false, true, false };
|
||||
}
|
||||
|
||||
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
|
||||
return { surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
|
||||
return{ surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
|
||||
}
|
||||
}
|
||||
|
||||
return{};
|
||||
|
@ -1087,7 +1087,7 @@ namespace rsx
|
||||
}
|
||||
|
||||
//Check if src/dst are parts of render targets
|
||||
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, dst.compressed_y);
|
||||
auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false, false, dst.compressed_y);
|
||||
dst_is_render_target = dst_subres.surface != nullptr;
|
||||
|
||||
if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch)
|
||||
@ -1099,7 +1099,7 @@ namespace rsx
|
||||
}
|
||||
|
||||
//TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
|
||||
auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, src.compressed_y);
|
||||
auto src_subres = m_rtts.get_surface_subresource_if_applicable(framebuffer_src_address, src_w, src_h, src.pitch, true, true, false, false, src.compressed_y);
|
||||
src_is_render_target = src_subres.surface != nullptr;
|
||||
|
||||
if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch)
|
||||
|
@ -318,12 +318,17 @@ namespace
|
||||
|
||||
void GLGSRender::end()
|
||||
{
|
||||
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
|
||||
|
||||
if (skip_frame || !framebuffer_status_valid || (conditional_render_enabled && conditional_render_test_failed) || !check_program_state())
|
||||
{
|
||||
rsx::thread::end();
|
||||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
||||
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
//Use approximations to reseve space. This path is mostly for debug purposes anyway
|
||||
@ -964,7 +969,7 @@ bool GLGSRender::check_program_state()
|
||||
if (dirty_framebuffer)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
|
||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
|
||||
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
|
@ -995,6 +995,8 @@ void VKGSRender::end()
|
||||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> state_check_start = steady_clock::now();
|
||||
|
||||
//Load program here since it is dependent on vertex state
|
||||
if (!check_program_status())
|
||||
{
|
||||
@ -1003,14 +1005,17 @@ void VKGSRender::end()
|
||||
return;
|
||||
}
|
||||
|
||||
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
||||
m_setup_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
||||
|
||||
//Programs data is dependent on vertex state
|
||||
std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
|
||||
std::chrono::time_point<steady_clock> vertex_start = state_check_end;
|
||||
auto upload_info = upload_vertex_data();
|
||||
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
||||
|
||||
//Load program
|
||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
||||
std::chrono::time_point<steady_clock> program_start = vertex_end;
|
||||
load_program(std::get<2>(upload_info), std::get<3>(upload_info));
|
||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||
@ -1841,7 +1846,7 @@ bool VKGSRender::check_program_status()
|
||||
if (dirty_framebuffer)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
|
||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch(), false, false, !is_depth, is_depth);
|
||||
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user