mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 18:53:28 +01:00
rsx/gl/vk: Fixes and optimizations
- opengl driver optimization for nvidia. On nvidia glTextureBufferRange performance is horrendous -- Initialize texture buffer to whole buffer at startup and use absolute offsets to read data instead -- Over 2x performance in some cases (Resogun, TNT racers) - gl/vk: Do not flip non-existent display buffers. Fixes spec violation at boot in TNT racers demo - whitespace fixes for sys_rsx
This commit is contained in:
parent
ab17b49e15
commit
3d9e3a16f1
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
struct RsxDriverInfo {
|
||||
struct RsxDriverInfo
|
||||
{
|
||||
be_t<u32> version_driver; // 0x0
|
||||
be_t<u32> version_gpu; // 0x4
|
||||
be_t<u32> memory_size; // 0x8
|
||||
@ -15,7 +16,9 @@ struct RsxDriverInfo {
|
||||
be_t<u32> unk3[6]; // 0x38-0x54
|
||||
be_t<u32> systemModeFlags; // 0x54
|
||||
u8 unk4[0x1064]; // 0x10B8
|
||||
struct Head {
|
||||
|
||||
struct Head
|
||||
{
|
||||
be_t<u64> lastFlipTime; // 0x0 last flip time
|
||||
be_t<u32> flipFlags; // 0x8 flags to handle flip/queue
|
||||
be_t<u32> unk1; // 0xC
|
||||
@ -29,6 +32,7 @@ struct RsxDriverInfo {
|
||||
be_t<u32> unk; // 0x38 possible u32, 'flip field', top/bottom for interlaced
|
||||
be_t<u32> unk5; // 0x3C possible high bits of time stamp? used in getlastVBlankTime
|
||||
} head[8]; // size = 0x40, 0x200
|
||||
|
||||
be_t<u32> unk7; // 0x12B8
|
||||
be_t<u32> unk8; // 0x12BC
|
||||
be_t<u32> handlers; // 0x12C0 -- flags showing which handlers are set
|
||||
@ -46,10 +50,12 @@ struct RsxDriverInfo {
|
||||
be_t<u32> lastError; // 0x12F4 error param for cellGcmSetGraphicsHandler
|
||||
// todo: theres more to this
|
||||
};
|
||||
|
||||
static_assert(sizeof(RsxDriverInfo) == 0x12F8, "rsxSizeTest");
|
||||
static_assert(sizeof(RsxDriverInfo::Head) == 0x40, "rsxHeadSizeTest");
|
||||
|
||||
struct RsxDmaControl {
|
||||
struct RsxDmaControl
|
||||
{
|
||||
u8 resv[0x40];
|
||||
atomic_be_t<u32> put;
|
||||
atomic_be_t<u32> get;
|
||||
@ -58,30 +64,35 @@ struct RsxDmaControl {
|
||||
be_t<u32> unk1;
|
||||
};
|
||||
|
||||
struct RsxSemaphore {
|
||||
struct RsxSemaphore
|
||||
{
|
||||
be_t<u32> val;
|
||||
be_t<u32> pad;
|
||||
be_t<u64> timestamp;
|
||||
};
|
||||
|
||||
struct RsxNotify {
|
||||
struct RsxNotify
|
||||
{
|
||||
be_t<u64> timestamp;
|
||||
be_t<u64> zero;
|
||||
};
|
||||
|
||||
struct RsxReport {
|
||||
struct RsxReport
|
||||
{
|
||||
be_t<u64> timestamp;
|
||||
be_t<u32> val;
|
||||
be_t<u32> pad;
|
||||
};
|
||||
|
||||
struct RsxReports {
|
||||
struct RsxReports
|
||||
{
|
||||
RsxSemaphore semaphore[0x100];
|
||||
RsxNotify notify[64];
|
||||
RsxReport report[2048];
|
||||
};
|
||||
|
||||
struct RsxDisplayInfo {
|
||||
struct RsxDisplayInfo
|
||||
{
|
||||
be_t<u32> offset;
|
||||
be_t<u32> pitch;
|
||||
be_t<u32> width;
|
||||
|
@ -218,11 +218,7 @@ void GLGSRender::end()
|
||||
}
|
||||
|
||||
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
||||
u32 vertex_draw_count;
|
||||
u32 actual_vertex_count;
|
||||
u32 vertex_base;
|
||||
std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
|
||||
std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer();
|
||||
auto upload_info = set_vertex_buffer();
|
||||
|
||||
//Load textures
|
||||
{
|
||||
@ -294,7 +290,7 @@ void GLGSRender::end()
|
||||
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
|
||||
//Load program here since it is dependent on vertex state
|
||||
|
||||
load_program(vertex_base, actual_vertex_count);
|
||||
load_program(upload_info);
|
||||
|
||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||
@ -492,10 +488,10 @@ void GLGSRender::end()
|
||||
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
|
||||
bool single_draw = !supports_multidraw || (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive);
|
||||
|
||||
if (indexed_draw_info)
|
||||
if (upload_info.index_info)
|
||||
{
|
||||
const GLenum index_type = std::get<0>(indexed_draw_info.value());
|
||||
const u32 index_offset = std::get<1>(indexed_draw_info.value());
|
||||
const GLenum index_type = std::get<0>(upload_info.index_info.value());
|
||||
const u32 index_offset = std::get<1>(upload_info.index_info.value());
|
||||
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
||||
|
||||
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
|
||||
@ -505,7 +501,7 @@ void GLGSRender::end()
|
||||
|
||||
if (single_draw)
|
||||
{
|
||||
glDrawElements(draw_mode, vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
|
||||
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -535,7 +531,7 @@ void GLGSRender::end()
|
||||
{
|
||||
if (single_draw)
|
||||
{
|
||||
glDrawArrays(draw_mode, 0, vertex_draw_count);
|
||||
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -652,16 +648,25 @@ void GLGSRender::on_init_thread()
|
||||
//Use industry standard resource alignment values as defaults
|
||||
m_uniform_buffer_offset_align = 256;
|
||||
m_min_texbuffer_alignment = 256;
|
||||
m_max_texbuffer_size = 0;
|
||||
|
||||
glEnable(GL_VERTEX_PROGRAM_POINT_SIZE);
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_offset_align);
|
||||
glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment);
|
||||
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &m_max_texbuffer_size);
|
||||
m_vao.create();
|
||||
|
||||
//Set min alignment to 16-bytes for SSE optimizations with aligned addresses to work
|
||||
m_min_texbuffer_alignment = std::max(m_min_texbuffer_alignment, 16);
|
||||
m_uniform_buffer_offset_align = std::max(m_uniform_buffer_offset_align, 16);
|
||||
|
||||
LOG_NOTICE(RSX, "Supported texel buffer size reported: %d bytes", m_max_texbuffer_size);
|
||||
if (m_max_texbuffer_size < (16 * 0x100000))
|
||||
{
|
||||
LOG_ERROR(RSX, "Max texture buffer size supported is less than 16M which is useless. Expect undefined behaviour.");
|
||||
m_max_texbuffer_size = (16 * 0x100000);
|
||||
}
|
||||
|
||||
const u32 texture_index_offset = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count;
|
||||
|
||||
//Array stream buffer
|
||||
@ -709,11 +714,14 @@ void GLGSRender::on_init_thread()
|
||||
m_index_ring_buffer.reset(new gl::ring_buffer());
|
||||
}
|
||||
|
||||
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
|
||||
m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000);
|
||||
m_transform_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||
m_attrib_ring_buffer->create(gl::buffer::target::texture, std::min<GLsizeiptr>(m_max_texbuffer_size, 256 * 0x100000));
|
||||
m_index_ring_buffer->create(gl::buffer::target::element_array, std::min<GLsizeiptr>(m_max_texbuffer_size, 64 * 0x100000));
|
||||
m_transform_constants_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
|
||||
m_fragment_constants_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
|
||||
m_vertex_state_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
|
||||
|
||||
m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size());
|
||||
m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size());
|
||||
|
||||
m_vao.element_array_buffer = *m_index_ring_buffer;
|
||||
|
||||
@ -999,7 +1007,7 @@ bool GLGSRender::check_program_state()
|
||||
return (rsx::method_registers.shader_program_address() != 0);
|
||||
}
|
||||
|
||||
void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
|
||||
void GLGSRender::load_program(const vertex_upload_info& upload_info)
|
||||
{
|
||||
get_current_fragment_program(fs_sampler_state);
|
||||
verify(HERE), current_fragment_program.valid;
|
||||
@ -1055,11 +1063,11 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
|
||||
fill_scale_offset_data(buf, false);
|
||||
fill_user_clip_data(buf + 64);
|
||||
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
|
||||
*(reinterpret_cast<u32*>(buf + 132)) = vertex_base;
|
||||
*(reinterpret_cast<u32*>(buf + 132)) = upload_info.vertex_index_base;
|
||||
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
|
||||
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
|
||||
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
|
||||
fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast<s32*>(buf + 160));
|
||||
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
|
||||
|
||||
if (m_transform_constants_dirty)
|
||||
{
|
||||
@ -1223,97 +1231,101 @@ void GLGSRender::flip(int buffer)
|
||||
return;
|
||||
}
|
||||
|
||||
gl::screen.clear(gl::buffers::color);
|
||||
|
||||
u32 buffer_width = display_buffers[buffer].width;
|
||||
u32 buffer_height = display_buffers[buffer].height;
|
||||
u32 buffer_pitch = display_buffers[buffer].pitch;
|
||||
|
||||
// Calculate blit coordinates
|
||||
coordi aspect_ratio;
|
||||
sizei csize(m_frame->client_width(), m_frame->client_height());
|
||||
sizei new_size = csize;
|
||||
|
||||
if (!g_cfg.video.stretch_to_display_area)
|
||||
if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch)
|
||||
{
|
||||
const double aq = (double)buffer_width / buffer_height;
|
||||
const double rq = (double)new_size.width / new_size.height;
|
||||
const double q = aq / rq;
|
||||
// Calculate blit coordinates
|
||||
coordi aspect_ratio;
|
||||
sizei csize(m_frame->client_width(), m_frame->client_height());
|
||||
sizei new_size = csize;
|
||||
|
||||
if (q > 1.0)
|
||||
if (!g_cfg.video.stretch_to_display_area)
|
||||
{
|
||||
new_size.height = int(new_size.height / q);
|
||||
aspect_ratio.y = (csize.height - new_size.height) / 2;
|
||||
}
|
||||
else if (q < 1.0)
|
||||
{
|
||||
new_size.width = int(new_size.width * q);
|
||||
aspect_ratio.x = (csize.width - new_size.width) / 2;
|
||||
}
|
||||
}
|
||||
const double aq = (double)buffer_width / buffer_height;
|
||||
const double rq = (double)new_size.width / new_size.height;
|
||||
const double q = aq / rq;
|
||||
|
||||
aspect_ratio.size = new_size;
|
||||
|
||||
// Find the source image
|
||||
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
|
||||
u32 absolute_address = buffer_region.address + buffer_region.base;
|
||||
|
||||
m_flip_fbo.recreate();
|
||||
m_flip_fbo.bind();
|
||||
|
||||
const u32 size = buffer_pitch * buffer_height;
|
||||
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
|
||||
{
|
||||
buffer_width = render_target_texture->width();
|
||||
buffer_height = render_target_texture->height();
|
||||
|
||||
m_flip_fbo.color = *render_target_texture;
|
||||
m_flip_fbo.read_buffer(m_flip_fbo.color);
|
||||
}
|
||||
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address))
|
||||
{
|
||||
//Hack - this should be the first location to check for output
|
||||
//The render might have been done offscreen or in software and a blit used to display
|
||||
m_flip_fbo.color = surface->get_raw_view();
|
||||
m_flip_fbo.read_buffer(m_flip_fbo.color);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
|
||||
|
||||
if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height })
|
||||
{
|
||||
m_flip_tex_color.recreate(gl::texture::target::texture2D);
|
||||
|
||||
m_flip_tex_color.config()
|
||||
.size({ (int)buffer_width, (int)buffer_height })
|
||||
.type(gl::texture::type::uint_8_8_8_8)
|
||||
.format(gl::texture::format::bgra);
|
||||
|
||||
m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4);
|
||||
if (q > 1.0)
|
||||
{
|
||||
new_size.height = int(new_size.height / q);
|
||||
aspect_ratio.y = (csize.height - new_size.height) / 2;
|
||||
}
|
||||
else if (q < 1.0)
|
||||
{
|
||||
new_size.width = int(new_size.width * q);
|
||||
aspect_ratio.x = (csize.width - new_size.width) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer_region.tile)
|
||||
aspect_ratio.size = new_size;
|
||||
|
||||
// Find the source image
|
||||
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
|
||||
u32 absolute_address = buffer_region.address + buffer_region.base;
|
||||
|
||||
m_flip_fbo.recreate();
|
||||
m_flip_fbo.bind();
|
||||
|
||||
const u32 size = buffer_pitch * buffer_height;
|
||||
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
|
||||
{
|
||||
std::unique_ptr<u8[]> temp(new u8[buffer_height * buffer_pitch]);
|
||||
buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch);
|
||||
m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
|
||||
buffer_width = render_target_texture->width();
|
||||
buffer_height = render_target_texture->height();
|
||||
|
||||
m_flip_fbo.color = *render_target_texture;
|
||||
m_flip_fbo.read_buffer(m_flip_fbo.color);
|
||||
}
|
||||
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address))
|
||||
{
|
||||
//Hack - this should be the first location to check for output
|
||||
//The render might have been done offscreen or in software and a blit used to display
|
||||
m_flip_fbo.color = surface->get_raw_view();
|
||||
m_flip_fbo.read_buffer(m_flip_fbo.color);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
|
||||
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
|
||||
|
||||
if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height })
|
||||
{
|
||||
m_flip_tex_color.recreate(gl::texture::target::texture2D);
|
||||
|
||||
m_flip_tex_color.config()
|
||||
.size({ (int)buffer_width, (int)buffer_height })
|
||||
.type(gl::texture::type::uint_8_8_8_8)
|
||||
.format(gl::texture::format::bgra);
|
||||
|
||||
m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4);
|
||||
}
|
||||
|
||||
if (buffer_region.tile)
|
||||
{
|
||||
std::unique_ptr<u8[]> temp(new u8[buffer_height * buffer_pitch]);
|
||||
buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch);
|
||||
m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
|
||||
}
|
||||
|
||||
m_flip_fbo.color = m_flip_tex_color;
|
||||
m_flip_fbo.read_buffer(m_flip_fbo.color);
|
||||
}
|
||||
|
||||
m_flip_fbo.color = m_flip_tex_color;
|
||||
m_flip_fbo.read_buffer(m_flip_fbo.color);
|
||||
// Blit source image to the screen
|
||||
// Disable scissor test (affects blit)
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
|
||||
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
|
||||
}
|
||||
|
||||
// Blit source image to the screen
|
||||
// Disable scissor test (affects blit)
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
|
||||
gl::screen.clear(gl::buffers::color);
|
||||
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
|
||||
|
||||
if (m_custom_ui)
|
||||
{
|
||||
gl::screen.bind();
|
||||
|
@ -255,6 +255,16 @@ struct driver_state
|
||||
}
|
||||
};
|
||||
|
||||
struct vertex_upload_info
|
||||
{
|
||||
u32 vertex_draw_count;
|
||||
u32 allocated_vertex_count;
|
||||
u32 vertex_index_base;
|
||||
u32 persistent_mapping_offset;
|
||||
u32 volatile_mapping_offset;
|
||||
std::optional<std::tuple<GLenum, u32> > index_info;
|
||||
};
|
||||
|
||||
class GLGSRender : public GSRender
|
||||
{
|
||||
private:
|
||||
@ -289,6 +299,7 @@ private:
|
||||
|
||||
GLint m_min_texbuffer_alignment = 256;
|
||||
GLint m_uniform_buffer_offset_align = 256;
|
||||
GLint m_max_texbuffer_size = 65536;
|
||||
|
||||
bool manually_flush_ring_buffers = false;
|
||||
|
||||
@ -326,14 +337,14 @@ private:
|
||||
driver_state gl_state;
|
||||
|
||||
// Return element to draw and in case of indexed draw index type and offset in index buffer
|
||||
std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
|
||||
vertex_upload_info set_vertex_buffer();
|
||||
rsx::vertex_input_layout m_vertex_layout = {};
|
||||
|
||||
void clear_surface(u32 arg);
|
||||
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
|
||||
|
||||
bool check_program_state();
|
||||
void load_program(u32 vertex_base, u32 vertex_count);
|
||||
void load_program(const vertex_upload_info& upload_info);
|
||||
|
||||
void update_draw_state();
|
||||
|
||||
|
@ -259,6 +259,7 @@ namespace gl
|
||||
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
|
||||
return (status == GL_SIGNALED);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool wait_for_signal()
|
||||
@ -831,7 +832,6 @@ namespace gl
|
||||
protected:
|
||||
|
||||
u32 m_data_loc = 0;
|
||||
u32 m_limit = 0;
|
||||
void *m_memory_mapping = nullptr;
|
||||
|
||||
fence m_fence;
|
||||
@ -854,7 +854,7 @@ namespace gl
|
||||
|
||||
verify(HERE), m_memory_mapping != nullptr;
|
||||
m_data_loc = 0;
|
||||
m_limit = ::narrow<u32>(size);
|
||||
m_size = ::narrow<u32>(size);
|
||||
}
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
|
||||
@ -868,7 +868,7 @@ namespace gl
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc) offset = align(offset, alignment);
|
||||
|
||||
if ((offset + alloc_size) > m_limit)
|
||||
if ((offset + alloc_size) > m_size)
|
||||
{
|
||||
if (!m_fence.is_empty())
|
||||
m_fence.wait_for_signal();
|
||||
@ -894,7 +894,7 @@ namespace gl
|
||||
|
||||
m_memory_mapping = nullptr;
|
||||
m_data_loc = 0;
|
||||
m_limit = 0;
|
||||
m_size = 0;
|
||||
}
|
||||
|
||||
glDeleteBuffers(1, &m_id);
|
||||
@ -936,7 +936,7 @@ namespace gl
|
||||
|
||||
m_memory_mapping = nullptr;
|
||||
m_data_loc = 0;
|
||||
m_limit = ::narrow<u32>(size);
|
||||
m_size = ::narrow<u32>(size);
|
||||
}
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
|
||||
@ -954,9 +954,9 @@ namespace gl
|
||||
|
||||
const u32 block_size = align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
|
||||
|
||||
if ((offset + block_size) > m_limit)
|
||||
if ((offset + block_size) > m_size)
|
||||
{
|
||||
buffer::data(m_limit, nullptr);
|
||||
buffer::data(m_size, nullptr);
|
||||
m_data_loc = 0;
|
||||
}
|
||||
|
||||
|
@ -180,7 +180,7 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_buffer()
|
||||
vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||
{
|
||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
||||
|
||||
@ -196,6 +196,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
|
||||
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
|
||||
|
||||
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};
|
||||
vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info };
|
||||
|
||||
if (required.first > 0)
|
||||
{
|
||||
@ -213,7 +214,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
|
||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
|
||||
{
|
||||
in_cache = true;
|
||||
m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, cached->offset_in_heap, required.first);
|
||||
upload_info.persistent_mapping_offset = cached->offset_in_heap;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -224,7 +225,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
|
||||
if (!in_cache)
|
||||
{
|
||||
persistent_mapping = m_attrib_ring_buffer->alloc_from_heap(required.first, m_min_texbuffer_alignment);
|
||||
m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, persistent_mapping.second, required.first);
|
||||
upload_info.persistent_mapping_offset = persistent_mapping.second;
|
||||
|
||||
if (to_store)
|
||||
{
|
||||
@ -237,7 +238,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
|
||||
if (required.second > 0)
|
||||
{
|
||||
volatile_mapping = m_attrib_ring_buffer->alloc_from_heap(required.second, m_min_texbuffer_alignment);
|
||||
m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, volatile_mapping.second, required.second);
|
||||
upload_info.volatile_mapping_offset = volatile_mapping.second;
|
||||
}
|
||||
|
||||
//Write all the data
|
||||
@ -245,7 +246,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
|
||||
|
||||
std::chrono::time_point<steady_clock> now = steady_clock::now();
|
||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
|
||||
return std::make_tuple(result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info);
|
||||
return upload_info;
|
||||
}
|
||||
|
||||
namespace
|
||||
|
@ -1616,6 +1616,8 @@ namespace rsx
|
||||
local_mem_addr = localAddress;
|
||||
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
|
||||
|
||||
memset(display_buffers, 0, sizeof(display_buffers));
|
||||
|
||||
m_used_gcm_commands.clear();
|
||||
|
||||
on_init_rsx();
|
||||
@ -1676,7 +1678,7 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<u32, u32> thread::calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count)
|
||||
std::pair<u32, u32> thread::calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count)
|
||||
{
|
||||
u32 persistent_memory_size = 0;
|
||||
u32 volatile_memory_size = 0;
|
||||
@ -1732,11 +1734,11 @@ namespace rsx
|
||||
return std::make_pair(persistent_memory_size, volatile_memory_size);
|
||||
}
|
||||
|
||||
void thread::fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer)
|
||||
void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base)
|
||||
{
|
||||
std::array<s32, 16> offset_in_block = {};
|
||||
u32 volatile_offset = 0;
|
||||
u32 persistent_offset = 0;
|
||||
u32 volatile_offset = volatile_offset_base;
|
||||
u32 persistent_offset = persistent_offset_base;
|
||||
|
||||
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data
|
||||
if (rsx::method_registers.current_draw_clause.is_immediate_draw)
|
||||
@ -1757,12 +1759,13 @@ namespace rsx
|
||||
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||
{
|
||||
const auto &block = layout.interleaved_blocks[0];
|
||||
u32 inline_data_offset = volatile_offset_base;
|
||||
for (const u8 index : block.locations)
|
||||
{
|
||||
auto &info = rsx::method_registers.vertex_arrays_info[index];
|
||||
|
||||
offset_in_block[index] = persistent_offset; //just because this var is 0 when we enter here; inlined is transient memory
|
||||
persistent_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
offset_in_block[index] = inline_data_offset;
|
||||
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1917,7 +1920,7 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
void thread::write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data)
|
||||
void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data)
|
||||
{
|
||||
char *transient = (char *)volatile_data;
|
||||
char *persistent = (char *)persistent_data;
|
||||
|
@ -396,18 +396,18 @@ namespace rsx
|
||||
* result.first contains persistent memory requirements
|
||||
* result.second contains volatile memory requirements
|
||||
*/
|
||||
std::pair<u32, u32> calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count);
|
||||
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count);
|
||||
|
||||
/**
|
||||
* Generates vertex input descriptors as an array of 16x4 s32s
|
||||
*/
|
||||
void fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer);
|
||||
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
|
||||
|
||||
/**
|
||||
* Uploads vertex data described in the layout descriptor
|
||||
* Copies from local memory to the write-only output buffers provided in a sequential manner
|
||||
*/
|
||||
void write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data);
|
||||
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
|
||||
|
||||
private:
|
||||
std::mutex m_mtx_task;
|
||||
|
@ -2891,6 +2891,7 @@ void VKGSRender::flip(int buffer)
|
||||
|
||||
u32 buffer_width = display_buffers[buffer].width;
|
||||
u32 buffer_height = display_buffers[buffer].height;
|
||||
u32 buffer_pitch = display_buffers[buffer].pitch;
|
||||
|
||||
coordi aspect_ratio;
|
||||
|
||||
@ -2966,18 +2967,21 @@ void VKGSRender::flip(int buffer)
|
||||
//Blit contents to screen..
|
||||
vk::image* image_to_flip = nullptr;
|
||||
|
||||
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
|
||||
u32 absolute_address = buffer_region.address + buffer_region.base;
|
||||
if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch)
|
||||
{
|
||||
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
|
||||
u32 absolute_address = buffer_region.address + buffer_region.base;
|
||||
|
||||
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
|
||||
{
|
||||
image_to_flip = render_target_texture;
|
||||
}
|
||||
else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address))
|
||||
{
|
||||
//Hack - this should be the first location to check for output
|
||||
//The render might have been done offscreen or in software and a blit used to display
|
||||
image_to_flip = surface->get_raw_texture();
|
||||
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
|
||||
{
|
||||
image_to_flip = render_target_texture;
|
||||
}
|
||||
else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address))
|
||||
{
|
||||
//Hack - this should be the first location to check for output
|
||||
//The render might have been done offscreen or in software and a blit used to display
|
||||
image_to_flip = surface->get_raw_texture();
|
||||
}
|
||||
}
|
||||
|
||||
VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_frame->present_image);
|
||||
|
Loading…
Reference in New Issue
Block a user