1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

rsx/gl/vk: Fixes and optimizations

- opengl driver optimization for nvidia. On nvidia glTextureBufferRange performance is horrendous
-- Initialize texture buffer to whole buffer at startup and use absolute offsets to read data instead
-- Over 2x performance in some cases (Resogun, TNT racers)
- gl/vk: Do not flip non-existent display buffers. Fixes spec violation at boot in TNT racers demo
- whitespace fixes for sys_rsx
This commit is contained in:
kd-11 2018-01-21 18:31:35 +03:00
parent ab17b49e15
commit 3d9e3a16f1
8 changed files with 177 additions and 135 deletions

View File

@ -1,6 +1,7 @@
#pragma once
struct RsxDriverInfo {
struct RsxDriverInfo
{
be_t<u32> version_driver; // 0x0
be_t<u32> version_gpu; // 0x4
be_t<u32> memory_size; // 0x8
@ -15,7 +16,9 @@ struct RsxDriverInfo {
be_t<u32> unk3[6]; // 0x38-0x54
be_t<u32> systemModeFlags; // 0x54
u8 unk4[0x1064]; // 0x10B8
struct Head {
struct Head
{
be_t<u64> lastFlipTime; // 0x0 last flip time
be_t<u32> flipFlags; // 0x8 flags to handle flip/queue
be_t<u32> unk1; // 0xC
@ -29,6 +32,7 @@ struct RsxDriverInfo {
be_t<u32> unk; // 0x38 possible u32, 'flip field', top/bottom for interlaced
be_t<u32> unk5; // 0x3C possible high bits of time stamp? used in getlastVBlankTime
} head[8]; // size = 0x40, 0x200
be_t<u32> unk7; // 0x12B8
be_t<u32> unk8; // 0x12BC
be_t<u32> handlers; // 0x12C0 -- flags showing which handlers are set
@ -46,10 +50,12 @@ struct RsxDriverInfo {
be_t<u32> lastError; // 0x12F4 error param for cellGcmSetGraphicsHandler
// todo: theres more to this
};
static_assert(sizeof(RsxDriverInfo) == 0x12F8, "rsxSizeTest");
static_assert(sizeof(RsxDriverInfo::Head) == 0x40, "rsxHeadSizeTest");
struct RsxDmaControl {
struct RsxDmaControl
{
u8 resv[0x40];
atomic_be_t<u32> put;
atomic_be_t<u32> get;
@ -58,30 +64,35 @@ struct RsxDmaControl {
be_t<u32> unk1;
};
struct RsxSemaphore {
struct RsxSemaphore
{
be_t<u32> val;
be_t<u32> pad;
be_t<u64> timestamp;
};
struct RsxNotify {
struct RsxNotify
{
be_t<u64> timestamp;
be_t<u64> zero;
};
struct RsxReport {
struct RsxReport
{
be_t<u64> timestamp;
be_t<u32> val;
be_t<u32> pad;
};
struct RsxReports {
struct RsxReports
{
RsxSemaphore semaphore[0x100];
RsxNotify notify[64];
RsxReport report[2048];
};
struct RsxDisplayInfo {
struct RsxDisplayInfo
{
be_t<u32> offset;
be_t<u32> pitch;
be_t<u32> width;

View File

@ -218,11 +218,7 @@ void GLGSRender::end()
}
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
u32 vertex_draw_count;
u32 actual_vertex_count;
u32 vertex_base;
std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
std::tie(vertex_draw_count, actual_vertex_count, vertex_base, indexed_draw_info) = set_vertex_buffer();
auto upload_info = set_vertex_buffer();
//Load textures
{
@ -294,7 +290,7 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
load_program(vertex_base, actual_vertex_count);
load_program(upload_info);
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
@ -492,10 +488,10 @@ void GLGSRender::end()
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
bool single_draw = !supports_multidraw || (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive);
if (indexed_draw_info)
if (upload_info.index_info)
{
const GLenum index_type = std::get<0>(indexed_draw_info.value());
const u32 index_offset = std::get<1>(indexed_draw_info.value());
const GLenum index_type = std::get<0>(upload_info.index_info.value());
const u32 index_offset = std::get<1>(upload_info.index_info.value());
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
@ -505,7 +501,7 @@ void GLGSRender::end()
if (single_draw)
{
glDrawElements(draw_mode, vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
}
else
{
@ -535,7 +531,7 @@ void GLGSRender::end()
{
if (single_draw)
{
glDrawArrays(draw_mode, 0, vertex_draw_count);
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
}
else
{
@ -652,16 +648,25 @@ void GLGSRender::on_init_thread()
//Use industry standard resource alignment values as defaults
m_uniform_buffer_offset_align = 256;
m_min_texbuffer_alignment = 256;
m_max_texbuffer_size = 0;
glEnable(GL_VERTEX_PROGRAM_POINT_SIZE);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_offset_align);
glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment);
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &m_max_texbuffer_size);
m_vao.create();
//Set min alignment to 16-bytes for SSE optimizations with aligned addresses to work
m_min_texbuffer_alignment = std::max(m_min_texbuffer_alignment, 16);
m_uniform_buffer_offset_align = std::max(m_uniform_buffer_offset_align, 16);
LOG_NOTICE(RSX, "Supported texel buffer size reported: %d bytes", m_max_texbuffer_size);
if (m_max_texbuffer_size < (16 * 0x100000))
{
LOG_ERROR(RSX, "Max texture buffer size supported is less than 16M which is useless. Expect undefined behaviour.");
m_max_texbuffer_size = (16 * 0x100000);
}
const u32 texture_index_offset = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count;
//Array stream buffer
@ -709,11 +714,14 @@ void GLGSRender::on_init_thread()
m_index_ring_buffer.reset(new gl::ring_buffer());
}
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000);
m_transform_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_attrib_ring_buffer->create(gl::buffer::target::texture, std::min<GLsizeiptr>(m_max_texbuffer_size, 256 * 0x100000));
m_index_ring_buffer->create(gl::buffer::target::element_array, std::min<GLsizeiptr>(m_max_texbuffer_size, 64 * 0x100000));
m_transform_constants_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
m_fragment_constants_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
m_vertex_state_buffer->create(gl::buffer::target::uniform, std::min<GLsizeiptr>(m_max_texbuffer_size, 16 * 0x100000));
m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size());
m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, 0, (u32)m_attrib_ring_buffer->size());
m_vao.element_array_buffer = *m_index_ring_buffer;
@ -999,7 +1007,7 @@ bool GLGSRender::check_program_state()
return (rsx::method_registers.shader_program_address() != 0);
}
void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
void GLGSRender::load_program(const vertex_upload_info& upload_info)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;
@ -1055,11 +1063,11 @@ void GLGSRender::load_program(u32 vertex_base, u32 vertex_count)
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = vertex_base;
*(reinterpret_cast<u32*>(buf + 132)) = upload_info.vertex_index_base;
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, vertex_count, reinterpret_cast<s32*>(buf + 160));
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
if (m_transform_constants_dirty)
{
@ -1223,97 +1231,101 @@ void GLGSRender::flip(int buffer)
return;
}
gl::screen.clear(gl::buffers::color);
u32 buffer_width = display_buffers[buffer].width;
u32 buffer_height = display_buffers[buffer].height;
u32 buffer_pitch = display_buffers[buffer].pitch;
// Calculate blit coordinates
coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch)
{
const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;
// Calculate blit coordinates
coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
if (q > 1.0)
if (!g_cfg.video.stretch_to_display_area)
{
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;
aspect_ratio.size = new_size;
// Find the source image
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;
m_flip_fbo.recreate();
m_flip_fbo.bind();
const u32 size = buffer_pitch * buffer_height;
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
m_flip_fbo.color = *render_target_texture;
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
m_flip_fbo.color = surface->get_raw_view();
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else
{
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height })
{
m_flip_tex_color.recreate(gl::texture::target::texture2D);
m_flip_tex_color.config()
.size({ (int)buffer_width, (int)buffer_height })
.type(gl::texture::type::uint_8_8_8_8)
.format(gl::texture::format::bgra);
m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4);
if (q > 1.0)
{
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
if (buffer_region.tile)
aspect_ratio.size = new_size;
// Find the source image
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;
m_flip_fbo.recreate();
m_flip_fbo.bind();
const u32 size = buffer_pitch * buffer_height;
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
std::unique_ptr<u8[]> temp(new u8[buffer_height * buffer_pitch]);
buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch);
m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
m_flip_fbo.color = *render_target_texture;
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions(absolute_address))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
m_flip_fbo.color = surface->get_raw_view();
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
else
{
m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
if (!m_flip_tex_color || m_flip_tex_color.size() != sizei{ (int)buffer_width, (int)buffer_height })
{
m_flip_tex_color.recreate(gl::texture::target::texture2D);
m_flip_tex_color.config()
.size({ (int)buffer_width, (int)buffer_height })
.type(gl::texture::type::uint_8_8_8_8)
.format(gl::texture::format::bgra);
m_flip_tex_color.pixel_unpack_settings().aligment(1).row_length(buffer_pitch / 4);
}
if (buffer_region.tile)
{
std::unique_ptr<u8[]> temp(new u8[buffer_height * buffer_pitch]);
buffer_region.read(temp.get(), buffer_width, buffer_height, buffer_pitch);
m_flip_tex_color.copy_from(temp.get(), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
}
else
{
m_flip_tex_color.copy_from(buffer_region.ptr, gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8);
}
m_flip_fbo.color = m_flip_tex_color;
m_flip_fbo.read_buffer(m_flip_fbo.color);
}
m_flip_fbo.color = m_flip_tex_color;
m_flip_fbo.read_buffer(m_flip_fbo.color);
// Blit source image to the screen
// Disable scissor test (affects blit)
glDisable(GL_SCISSOR_TEST);
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
}
// Blit source image to the screen
// Disable scissor test (affects blit)
glDisable(GL_SCISSOR_TEST);
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
gl::screen.clear(gl::buffers::color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
if (m_custom_ui)
{
gl::screen.bind();

View File

@ -255,6 +255,16 @@ struct driver_state
}
};
struct vertex_upload_info
{
u32 vertex_draw_count;
u32 allocated_vertex_count;
u32 vertex_index_base;
u32 persistent_mapping_offset;
u32 volatile_mapping_offset;
std::optional<std::tuple<GLenum, u32> > index_info;
};
class GLGSRender : public GSRender
{
private:
@ -289,6 +299,7 @@ private:
GLint m_min_texbuffer_alignment = 256;
GLint m_uniform_buffer_offset_align = 256;
GLint m_max_texbuffer_size = 65536;
bool manually_flush_ring_buffers = false;
@ -326,14 +337,14 @@ private:
driver_state gl_state;
// Return element to draw and in case of indexed draw index type and offset in index buffer
std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
vertex_upload_info set_vertex_buffer();
rsx::vertex_input_layout m_vertex_layout = {};
void clear_surface(u32 arg);
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
bool check_program_state();
void load_program(u32 vertex_base, u32 vertex_count);
void load_program(const vertex_upload_info& upload_info);
void update_draw_state();

View File

@ -259,6 +259,7 @@ namespace gl
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
return (status == GL_SIGNALED);
}
return true;
}
bool wait_for_signal()
@ -831,7 +832,6 @@ namespace gl
protected:
u32 m_data_loc = 0;
u32 m_limit = 0;
void *m_memory_mapping = nullptr;
fence m_fence;
@ -854,7 +854,7 @@ namespace gl
verify(HERE), m_memory_mapping != nullptr;
m_data_loc = 0;
m_limit = ::narrow<u32>(size);
m_size = ::narrow<u32>(size);
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
@ -868,7 +868,7 @@ namespace gl
u32 offset = m_data_loc;
if (m_data_loc) offset = align(offset, alignment);
if ((offset + alloc_size) > m_limit)
if ((offset + alloc_size) > m_size)
{
if (!m_fence.is_empty())
m_fence.wait_for_signal();
@ -894,7 +894,7 @@ namespace gl
m_memory_mapping = nullptr;
m_data_loc = 0;
m_limit = 0;
m_size = 0;
}
glDeleteBuffers(1, &m_id);
@ -936,7 +936,7 @@ namespace gl
m_memory_mapping = nullptr;
m_data_loc = 0;
m_limit = ::narrow<u32>(size);
m_size = ::narrow<u32>(size);
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr)
@ -954,9 +954,9 @@ namespace gl
const u32 block_size = align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
if ((offset + block_size) > m_limit)
if ((offset + block_size) > m_size)
{
buffer::data(m_limit, nullptr);
buffer::data(m_size, nullptr);
m_data_loc = 0;
}

View File

@ -180,7 +180,7 @@ namespace
};
}
std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_buffer()
vertex_upload_info GLGSRender::set_vertex_buffer()
{
std::chrono::time_point<steady_clock> then = steady_clock::now();
@ -196,6 +196,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
auto required = calculate_memory_requirements(m_vertex_layout, vertex_count);
std::pair<void*, u32> persistent_mapping = {}, volatile_mapping = {};
vertex_upload_info upload_info = { result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, 0u, 0u, result.index_info };
if (required.first > 0)
{
@ -213,7 +214,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
{
in_cache = true;
m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, cached->offset_in_heap, required.first);
upload_info.persistent_mapping_offset = cached->offset_in_heap;
}
else
{
@ -224,7 +225,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
if (!in_cache)
{
persistent_mapping = m_attrib_ring_buffer->alloc_from_heap(required.first, m_min_texbuffer_alignment);
m_gl_persistent_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, persistent_mapping.second, required.first);
upload_info.persistent_mapping_offset = persistent_mapping.second;
if (to_store)
{
@ -237,7 +238,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
if (required.second > 0)
{
volatile_mapping = m_attrib_ring_buffer->alloc_from_heap(required.second, m_min_texbuffer_alignment);
m_gl_volatile_stream_buffer.copy_from(*m_attrib_ring_buffer, GL_R8UI, volatile_mapping.second, required.second);
upload_info.volatile_mapping_offset = volatile_mapping.second;
}
//Write all the data
@ -245,7 +246,7 @@ std::tuple<u32, u32, u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::se
std::chrono::time_point<steady_clock> now = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
return std::make_tuple(result.vertex_draw_count, result.allocated_vertex_count, result.vertex_index_base, result.index_info);
return upload_info;
}
namespace

View File

@ -1616,6 +1616,8 @@ namespace rsx
local_mem_addr = localAddress;
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
memset(display_buffers, 0, sizeof(display_buffers));
m_used_gcm_commands.clear();
on_init_rsx();
@ -1676,7 +1678,7 @@ namespace rsx
}
}
std::pair<u32, u32> thread::calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count)
std::pair<u32, u32> thread::calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count)
{
u32 persistent_memory_size = 0;
u32 volatile_memory_size = 0;
@ -1732,11 +1734,11 @@ namespace rsx
return std::make_pair(persistent_memory_size, volatile_memory_size);
}
void thread::fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer)
void thread::fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset_base, u32 volatile_offset_base)
{
std::array<s32, 16> offset_in_block = {};
u32 volatile_offset = 0;
u32 persistent_offset = 0;
u32 volatile_offset = volatile_offset_base;
u32 persistent_offset = persistent_offset_base;
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data
if (rsx::method_registers.current_draw_clause.is_immediate_draw)
@ -1757,12 +1759,13 @@ namespace rsx
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
const auto &block = layout.interleaved_blocks[0];
u32 inline_data_offset = volatile_offset_base;
for (const u8 index : block.locations)
{
auto &info = rsx::method_registers.vertex_arrays_info[index];
offset_in_block[index] = persistent_offset; //just because this var is 0 when we enter here; inlined is transient memory
persistent_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
offset_in_block[index] = inline_data_offset;
inline_data_offset += rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
}
else
@ -1917,7 +1920,7 @@ namespace rsx
}
}
void thread::write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data)
void thread::write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data)
{
char *transient = (char *)volatile_data;
char *persistent = (char *)persistent_data;

View File

@ -396,18 +396,18 @@ namespace rsx
* result.first contains persistent memory requirements
* result.second contains volatile memory requirements
*/
std::pair<u32, u32> calculate_memory_requirements(vertex_input_layout& layout, const u32 vertex_count);
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 vertex_count);
/**
* Generates vertex input descriptors as an array of 16x4 s32s
*/
void fill_vertex_layout_state(vertex_input_layout& layout, const u32 vertex_count, s32* buffer);
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
/**
* Uploads vertex data described in the layout descriptor
* Copies from local memory to the write-only output buffers provided in a sequential manner
*/
void write_vertex_data_to_memory(vertex_input_layout &layout, const u32 first_vertex, const u32 vertex_count, void *persistent_data, void *volatile_data);
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
private:
std::mutex m_mtx_task;

View File

@ -2891,6 +2891,7 @@ void VKGSRender::flip(int buffer)
u32 buffer_width = display_buffers[buffer].width;
u32 buffer_height = display_buffers[buffer].height;
u32 buffer_pitch = display_buffers[buffer].pitch;
coordi aspect_ratio;
@ -2966,18 +2967,21 @@ void VKGSRender::flip(int buffer)
//Blit contents to screen..
vk::image* image_to_flip = nullptr;
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;
if (buffer < display_buffers_count && buffer_width && buffer_height && buffer_pitch)
{
rsx::tiled_region buffer_region = get_tiled_address(display_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
image_to_flip = render_target_texture;
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address))
{
image_to_flip = render_target_texture;
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions(absolute_address))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
}
}
VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_frame->present_image);