1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 10:42:36 +01:00

Merge pull request #2037 from vlj/rsx-refactor

Rsx: Remove vertex_draw_count (WIP)
This commit is contained in:
vlj 2016-08-11 20:20:07 +02:00 committed by GitHub
commit 65fe9b0927
10 changed files with 1603 additions and 496 deletions

23
Utilities/LICENSE_1_0.txt Normal file
View File

@ -0,0 +1,23 @@
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

10
Utilities/copyright.txt Normal file
View File

@ -0,0 +1,10 @@
Copyright (C) 2011-2012 Andrzej Krzemienski
Distributed under the Boost Software License, Version 1.0
(see accompanying file LICENSE_1_0.txt or a copy at
http://www.boost.org/LICENSE_1_0.txt)
The idea and interface is based on Boost.Optional library
authored by Fernando Luis Cacciola Carballal
Home at https://github.com/akrzemi1/Optional

1039
Utilities/optional.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -487,7 +487,9 @@ void GLGSRender::end()
} }
} }
u32 offset_in_index_buffer = set_vertex_buffer(); u32 vertex_draw_count;
std::optional<std::tuple<GLenum, u32> > indexed_draw_info;
std::tie(vertex_draw_count, indexed_draw_info) = set_vertex_buffer();
m_vao.bind(); m_vao.bind();
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now(); std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
@ -497,26 +499,9 @@ void GLGSRender::end()
m_program->validate(); m_program->validate();
} }
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) if (indexed_draw_info)
{ {
rsx::index_array_type indexed_type = rsx::method_registers.index_type(); __glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, std::get<0>(indexed_draw_info.value()), (GLvoid *)(std::ptrdiff_t)std::get<1>(indexed_draw_info.value()));
if (indexed_type == rsx::index_array_type::u32)
{
__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
}
else if (indexed_type == rsx::index_array_type::u16)
{
__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
}
else
{
throw std::logic_error("bad index array type");
}
}
else if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer);
} }
else else
{ {

View File

@ -4,6 +4,7 @@
#include "rsx_gl_texture.h" #include "rsx_gl_texture.h"
#include "gl_texture_cache.h" #include "gl_texture_cache.h"
#include "gl_render_targets.h" #include "gl_render_targets.h"
#include <Utilities/optional.hpp>
#define RSX_DEBUG 1 #define RSX_DEBUG 1
@ -56,7 +57,14 @@ public:
private: private:
static u32 enable(u32 enable, u32 cap); static u32 enable(u32 enable, u32 cap);
static u32 enable(u32 enable, u32 cap, u32 index); static u32 enable(u32 enable, u32 cap, u32 index);
u32 set_vertex_buffer();
// Return element to draw and in case of indexed draw index type and offset in index buffer
std::tuple<u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
void upload_vertex_buffers(const u32 &max_index, const u32 &max_vertex_attrib_size, const u32 &input_mask, const u32 &texture_index_offset);
// Returns vertex count
u32 upload_inline_array(const u32 &max_vertex_attrib_size, const u32 &texture_index_offset);
public: public:
bool load_program(); bool load_program();

View File

@ -172,7 +172,20 @@ namespace
} }
} }
u32 GLGSRender::set_vertex_buffer() namespace
{
GLenum get_index_type(rsx::index_array_type type)
{
switch (type)
{
case rsx::index_array_type::u16: return GL_UNSIGNED_SHORT;
case rsx::index_array_type::u32: return GL_UNSIGNED_INT;
}
throw;
}
}
std::tuple<u32, std::optional<std::tuple<GLenum, u32> > > GLGSRender::set_vertex_buffer()
{ {
//initialize vertex attributes //initialize vertex attributes
//merge all vertex arrays //merge all vertex arrays
@ -184,12 +197,7 @@ u32 GLGSRender::set_vertex_buffer()
u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
u32 min_index = 0, max_index = 0; u32 min_index = 0, max_index = 0;
u32 max_vertex_attrib_size = 0; u32 max_vertex_attrib_size = 0;
u32 offset_in_index_buffer = 0; u32 vertex_or_index_count;
vertex_draw_count = 0;
//place holder; replace with actual index buffer
gsl::span<gsl::byte> index_array;
for (u8 index = 0; index < rsx::limits::vertex_count; ++index) for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{ {
@ -199,24 +207,182 @@ u32 GLGSRender::set_vertex_buffer()
} }
} }
std::optional<std::tuple<GLenum, u32> > index_info;
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{ {
rsx::index_array_type type = rsx::method_registers.index_type(); rsx::index_array_type type = rsx::method_registers.index_type();
u32 type_size = ::narrow<u32>(get_index_type_size(type)); u32 type_size = ::narrow<u32>(get_index_type_size(type));
vertex_draw_count += rsx::method_registers.current_draw_clause.get_elements_count(); vertex_or_index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.get_elements_count());
u32 max_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count) * type_size; u32 max_size = vertex_or_index_count * type_size;
auto mapping = m_index_ring_buffer.alloc_and_map(max_size); auto mapping = m_index_ring_buffer.alloc_and_map(max_size);
void *ptr = mapping.first; void *ptr = mapping.first;
offset_in_index_buffer = mapping.second; u32 offset_in_index_buffer = mapping.second;
std::tie(min_index, max_index, vertex_draw_count) = upload_index_buffer(get_raw_index_array(rsx::method_registers.current_draw_clause.first_count_commands), ptr, type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.first_count_commands, vertex_draw_count); std::tie(min_index, max_index, vertex_or_index_count) = upload_index_buffer(get_raw_index_array(rsx::method_registers.current_draw_clause.first_count_commands), ptr, type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.first_count_commands, vertex_or_index_count);
m_index_ring_buffer.unmap(); m_index_ring_buffer.unmap();
index_info = std::make_tuple(get_index_type(type), offset_in_index_buffer);
}
else
{
u32 vertex_count;
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
// We need to go through array to determine vertex count so upload it
vertex_count = upload_inline_array(max_vertex_attrib_size, texture_index_offset);
}
else
{
assert(rsx::method_registers.current_draw_clause.command == rsx::draw_command::array);
vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
max_index = vertex_count - 1;
}
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
u32 offset_in_index_buffer;
std::tie(vertex_or_index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(rsx::method_registers.current_draw_clause.first_count_commands, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
index_info = std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer);
}
else
{
vertex_or_index_count = vertex_count;
}
} }
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
// Already uploaded when determining vertex count, we can return here
return std::make_tuple(vertex_or_index_count, index_info);
}
upload_vertex_buffers(max_index, max_vertex_attrib_size, input_mask, texture_index_offset);
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
return std::make_tuple(vertex_or_index_count, index_info);
}
void GLGSRender::upload_vertex_buffers(const u32 &max_index, const u32 &max_vertex_attrib_size, const u32 &input_mask, const u32 &texture_index_offset)
{
u32 verts_allocated = max_index + 1;
__glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size);
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
int location;
if (!m_program->uniforms.has_location(rsx::vertex_program::input_attrib_names[index] + "_buffer", &location))
continue;
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
{
glActiveTexture(GL_TEXTURE0 + index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + texture_index_offset);
continue;
}
if (rsx::method_registers.vertex_arrays_info[index].size > 0)
{
auto &vertex_info = rsx::method_registers.vertex_arrays_info[index];
// Fill vertex_array
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
//vertex_array.resize(vertex_draw_count * element_size);
u32 data_size = verts_allocated * element_size;
u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size);
auto &texture = m_gl_attrib_buffers[index];
u32 buffer_offset = 0;
// Get source pointer
u32 base_offset = rsx::method_registers.vertex_data_base_offset();
u32 offset = rsx::method_registers.vertex_arrays_info[index].offset();
u32 address = base_offset + rsx::get_address(offset & 0x7fffffff, offset >> 31);
const gsl::byte *src_ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address));
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
size_t offset = 0;
gsl::span<gsl::byte> dest_span(dst, data_size);
prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, verts_allocated);
for (const auto &first_count : rsx::method_registers.current_draw_clause.first_count_commands)
{
write_vertex_array_data_to_buffer(dest_span.subspan(offset), src_ptr, first_count.first, first_count.second, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size));
offset += first_count.second * element_size;
}
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
data_size = (max_index + 1) * element_size;
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
gsl::span<gsl::byte> dest_span(dst, data_size);
prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, verts_allocated);
write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size));
}
texture.copy_from(m_attrib_ring_buffer, gl_type, buffer_offset, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, index + texture_index_offset, texture);
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
auto &vertex_info = rsx::method_registers.register_vertex_info[index];
switch (vertex_info.type)
{
case rsx::vertex_base_type::f:
{
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size);
const size_t data_size = element_size;
auto &texture = m_gl_attrib_buffers[index];
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
u8 *dst = static_cast<u8*>(mapping.first);
memcpy(dst, vertex_info.data.data(), element_size);
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, index + texture_index_offset, texture);
break;
}
default:
LOG_ERROR(RSX, "bad non array vertex data format (type=%d, size=%d)", (u32)vertex_info.type, vertex_info.size);
break;
}
}
else
{
glActiveTexture(GL_TEXTURE0 + index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + texture_index_offset);
continue;
}
}
m_attrib_ring_buffer.unmap();
}
u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32 &texture_index_offset)
{ {
u32 stride = 0; u32 stride = 0;
u32 offsets[rsx::limits::vertex_count] = { 0 }; u32 offsets[rsx::limits::vertex_count] = { 0 };
@ -230,7 +396,7 @@ u32 GLGSRender::set_vertex_buffer()
stride += rsx::get_vertex_type_size_on_host(info.type, info.size); stride += rsx::get_vertex_type_size_on_host(info.type, info.size);
} }
vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; u32 vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride;
m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size); m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size);
for (int index = 0; index < rsx::limits::vertex_count; ++index) for (int index = 0; index < rsx::limits::vertex_count; ++index)
@ -283,141 +449,7 @@ u32 GLGSRender::set_vertex_buffer()
//Link texture to uniform //Link texture to uniform
m_program->uniforms.texture(location, index + texture_index_offset, texture); m_program->uniforms.texture(location, index + texture_index_offset, texture);
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
}
}
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
vertex_draw_count += rsx::method_registers.current_draw_clause.get_elements_count();
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array || rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
u32 verts_allocated = std::max(vertex_draw_count, max_index + 1);
__glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size);
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
int location;
if (!m_program->uniforms.has_location(rsx::vertex_program::input_attrib_names[index] + "_buffer", &location))
continue;
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
{
glActiveTexture(GL_TEXTURE0 + index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + texture_index_offset);
continue;
}
if (rsx::method_registers.vertex_arrays_info[index].size > 0)
{
auto &vertex_info = rsx::method_registers.vertex_arrays_info[index];
// Fill vertex_array
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
//vertex_array.resize(vertex_draw_count * element_size);
u32 data_size = vertex_draw_count * element_size;
u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size);
auto &texture = m_gl_attrib_buffers[index];
u32 buffer_offset = 0;
// Get source pointer
u32 base_offset = rsx::method_registers.vertex_data_base_offset();
u32 offset = rsx::method_registers.vertex_arrays_info[index].offset();
u32 address = base_offset + rsx::get_address(offset & 0x7fffffff, offset >> 31);
const gsl::byte *src_ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address));
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
size_t offset = 0;
gsl::span<gsl::byte> dest_span(dst, data_size);
prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count);
for (const auto &first_count : rsx::method_registers.current_draw_clause.first_count_commands)
{
write_vertex_array_data_to_buffer(dest_span.subspan(offset), src_ptr, first_count.first, first_count.second, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size));
offset += first_count.second * element_size;
}
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
data_size = (max_index + 1) * element_size;
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
gsl::byte *dst = static_cast<gsl::byte*>(mapping.first);
buffer_offset = mapping.second;
gsl::span<gsl::byte> dest_span(dst, data_size);
prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count);
write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size));
}
texture.copy_from(m_attrib_ring_buffer, gl_type, buffer_offset, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, index + texture_index_offset, texture);
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
auto &vertex_info = rsx::method_registers.register_vertex_info[index];
switch (vertex_info.type)
{
case rsx::vertex_base_type::f:
{
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size);
const size_t data_size = element_size;
auto &texture = m_gl_attrib_buffers[index];
auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment);
u8 *dst = static_cast<u8*>(mapping.first);
memcpy(dst, vertex_info.data.data(), element_size);
texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size);
//Link texture to uniform
m_program->uniforms.texture(location, index + texture_index_offset, texture);
break;
}
default:
LOG_ERROR(RSX, "bad non array vertex data format (type=%d, size=%d)", (u32)vertex_info.type, vertex_info.size);
break;
}
}
else
{
glActiveTexture(GL_TEXTURE0 + index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + texture_index_offset);
continue;
}
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array && !gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(rsx::method_registers.current_draw_clause.first_count_commands, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
}
}
m_attrib_ring_buffer.unmap(); m_attrib_ring_buffer.unmap();
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now(); }
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count(); return vertex_draw_count;
return offset_in_index_buffer;
} }

View File

@ -161,8 +161,6 @@ namespace rsx
GcmTileInfo tiles[limits::tiles_count]; GcmTileInfo tiles[limits::tiles_count];
GcmZcullInfo zculls[limits::zculls_count]; GcmZcullInfo zculls[limits::zculls_count];
u32 vertex_draw_count = 0;
// Constant stored for whole frame // Constant stored for whole frame
std::unordered_map<u32, color4f> local_transform_constants; std::unordered_map<u32, color4f> local_transform_constants;

View File

@ -673,14 +673,17 @@ void VKGSRender::end()
vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr); vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
if (!std::get<1>(upload_info)) std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<2>(upload_info);
vkCmdDraw(m_command_buffer, vertex_draw_count, 1, 0, 0);
if (!index_info)
vkCmdDraw(m_command_buffer, std::get<1>(upload_info), 1, 0, 0);
else else
{ {
VkIndexType index_type; VkIndexType index_type;
u32 index_count; u32 index_count = std::get<1>(upload_info);
VkDeviceSize offset; VkDeviceSize offset;
std::tie(std::ignore, std::ignore, index_count, offset, index_type) = upload_info;
std::tie(offset, index_type) = index_info.value();
vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); vkCmdBindIndexBuffer(m_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0); vkCmdDrawIndexed(m_command_buffer, index_count, 1, 0, 0, 0);

View File

@ -4,6 +4,7 @@
#include "VKTextureCache.h" #include "VKTextureCache.h"
#include "VKRenderTargets.h" #include "VKRenderTargets.h"
#include "VKFormats.h" #include "VKFormats.h"
#include <Utilities\optional.hpp>
#define RSX_DEBUG 1 #define RSX_DEBUG 1
@ -85,7 +86,24 @@ private:
void sync_at_semaphore_release(); void sync_at_semaphore_release();
void prepare_rtts(); void prepare_rtts();
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type /// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
std::tuple<VkPrimitiveTopology, bool, u32, VkDeviceSize, VkIndexType> upload_vertex_data(); std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
void upload_vertex_buffers(u32 input_mask, u32 vertex_max_index);
/// returns number of vertex drawn
u32 upload_inlined_array();
/**
* Upload index (and expands it if primitive type is not natively supported).
* Returns min index, max index, index_count, and (offset_in_index_buffer, index_type)
*/
std::tuple<u32, u32, u32, std::tuple<VkDeviceSize, VkIndexType>> upload_index_buffer(const rsx::draw_clause &clause);
/**
* Creates and fills an index buffer emulating unsupported primitive type.
* Returns index_count and (offset_in_index_buffer, index_type)
*/
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType> > generate_emulating_index_buffer(const rsx::draw_clause &clause);
public: public:
bool load_program(); bool load_program();

View File

@ -204,33 +204,11 @@ namespace vk
} }
throw; throw;
} }
std::tuple<u32, u32, VkIndexType> upload_index_buffer(gsl::span<const gsl::byte> raw_index_buffer, rsx::primitive_type type, rsx::index_array_type index_type, void *dst_ptr, bool indexed_draw, u32 vertex_count, u32 index_count, std::vector<std::pair<u32, u32>> first_count_commands)
{
bool emulated = false;
get_appropriate_topology(type, emulated);
if (indexed_draw)
{
u32 min_index, max_index;
size_t index_size = (index_type == rsx::index_array_type::u32) ? 4 : 2;
std::tie(min_index, max_index) = write_index_array_data_to_buffer(gsl::span<gsl::byte>(static_cast<gsl::byte*>(dst_ptr), vertex_count * index_size), raw_index_buffer,
index_type, type, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), first_count_commands,
[](auto prim) { return !is_primitive_native(prim); });
return std::make_tuple(min_index, max_index, get_index_type(index_type));
} }
write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast<char*>(dst_ptr), type, 0, vertex_count); namespace
return std::make_tuple(0, vertex_count-1, VK_INDEX_TYPE_UINT16);
}
}
std::tuple<VkPrimitiveTopology, bool, u32, VkDeviceSize, VkIndexType>
VKGSRender::upload_vertex_data()
{ {
//initialize vertex attributes static constexpr std::array<const char*, 16> s_reg_table =
const std::string reg_table[] =
{ {
"in_pos_buffer", "in_weight_buffer", "in_normal_buffer", "in_pos_buffer", "in_weight_buffer", "in_normal_buffer",
"in_diff_color_buffer", "in_spec_color_buffer", "in_diff_color_buffer", "in_spec_color_buffer",
@ -239,62 +217,173 @@ VKGSRender::upload_vertex_data()
"in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer", "in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer",
"in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer" "in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer"
}; };
}
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > >
VKGSRender::upload_vertex_data()
{
u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
size_t offset_in_index_buffer = -1;
vertex_draw_count = 0;
u32 min_index, max_index; u32 min_index, max_index;
bool is_indexed_draw = (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed); bool is_indexed_draw = (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed);
bool primitives_emulated = false;
u32 index_count = 0; u32 index_count = 0;
VkIndexType index_format = VK_INDEX_TYPE_UINT16; std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info;
if (is_indexed_draw)
{
std::tie(min_index, max_index, index_count, index_info) = upload_index_buffer(rsx::method_registers.current_draw_clause);
}
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitives_emulated); VkPrimitiveTopology prims = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{ {
vertex_draw_count += rsx::method_registers.current_draw_clause.get_elements_count();
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed || primitives_emulated)
{
rsx::index_array_type type = rsx::method_registers.index_type();
u32 type_size = ::narrow<u32>(get_index_type_size(type));
if (is_indexed_draw) //Could be emulated or not, emulated array vertex count already computed above
{
vertex_draw_count += rsx::method_registers.current_draw_clause.get_elements_count();
}
index_count = vertex_draw_count;
u32 upload_size = vertex_draw_count * type_size;
std::vector<std::pair<u32, u32>> ranges = rsx::method_registers.current_draw_clause.first_count_commands;
if (primitives_emulated) if (primitives_emulated)
{ {
index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count); std::tie(index_count, index_info) = generate_emulating_index_buffer(rsx::method_registers.current_draw_clause);
upload_size = index_count * sizeof(u16); }
else
if (is_indexed_draw)
{ {
ranges.resize(0); index_count = rsx::method_registers.current_draw_clause.get_elements_count();
ranges.push_back(std::pair<u32, u32>(0, vertex_draw_count));
}
} }
offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); min_index = 0;
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); max_index = rsx::method_registers.current_draw_clause.get_elements_count() - 1;
std::tie(min_index, max_index, index_format) = vk::upload_index_buffer(get_raw_index_array(ranges), rsx::method_registers.current_draw_clause.primitive, type, buf, is_indexed_draw, vertex_draw_count, index_count, ranges);
m_index_buffer_ring_info.unmap();
is_indexed_draw = true;
} }
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
index_count = upload_inlined_array();
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array || rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
upload_vertex_buffers(input_mask, max_index);
}
return std::make_tuple(prims, index_count, index_info);
}
void VKGSRender::upload_vertex_buffers(u32 input_mask, u32 vertex_max_index)
{
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
bool enabled = !!(input_mask & (1 << index));
if (!m_program->has_uniform(s_reg_table[index]))
continue;
if (!enabled)
{
continue;
}
if (rsx::method_registers.vertex_arrays_info[index].size > 0)
{
auto &vertex_info = rsx::method_registers.vertex_arrays_info[index];
// Fill vertex_array
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
u32 real_element_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size);
u32 upload_size = real_element_size * (vertex_max_index + 1);
bool requires_expansion = vk::requires_component_expansion(vertex_info.type, vertex_info.size);
// Get source pointer
u32 base_offset = rsx::method_registers.vertex_data_base_offset();
u32 offset = rsx::method_registers.vertex_arrays_info[index].offset();
u32 address = base_offset + rsx::get_address(offset & 0x7fffffff, offset >> 31);
const gsl::byte *src_ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address));
u32 num_stored_verts = vertex_max_index + 1;
VkDeviceSize offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size);
vk::prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_max_index + 1);
gsl::span<gsl::byte> dest_span(static_cast<gsl::byte*>(dst), upload_size);
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
VkDeviceSize offset = 0;
for (const auto &first_count : rsx::method_registers.current_draw_clause.first_count_commands)
{
write_vertex_array_data_to_buffer(dest_span.subspan(offset), src_ptr, first_count.first, first_count.second, vertex_info.type, vertex_info.size, vertex_info.stride, real_element_size);
offset += first_count.second * real_element_size;
}
}
else if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, vertex_max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, real_element_size);
}
m_attrib_ring_info.unmap();
const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], descriptor_sets);
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
//Untested!
auto &vertex_info = rsx::method_registers.register_vertex_info[index];
switch (vertex_info.type)
{
case rsx::vertex_base_type::f:
{
size_t data_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size);
u32 offset_in_attrib_buffer = 0;
void *data_ptr = vertex_info.data.data();
if (vk::requires_component_expansion(vertex_info.type, vertex_info.size))
{
const u32 num_stored_verts = static_cast<u32>(data_size / (sizeof(float) * vertex_info.size));
const u32 real_element_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size);
data_size = real_element_size * num_stored_verts;
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
vk::expand_array_components<float, 3, 4, 1>(reinterpret_cast<float*>(vertex_info.data.data()), dst, num_stored_verts);
m_attrib_ring_info.unmap();
}
else
{
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
memcpy(dst, vertex_info.data.data(), data_size);
m_attrib_ring_info.unmap();
}
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], descriptor_sets);
break;
}
default:
fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type);
}
}
else
{
//This section should theoretically be unreachable (data stream without available data)
//Variable is defined in the shaders but no data is available
//Bind a buffer view to keep the driver from crashing if access is attempted.
u32 offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(32);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, 32);
memset(dst, 0, 32);
m_attrib_ring_info.unmap();
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R32_SFLOAT, offset_in_attrib_buffer, 32));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], descriptor_sets);
}
}
}
u32 VKGSRender::upload_inlined_array()
{ {
u32 stride = 0; u32 stride = 0;
u32 offsets[rsx::limits::vertex_count] = { 0 }; u32 offsets[rsx::limits::vertex_count] = { 0 };
@ -308,13 +397,13 @@ VKGSRender::upload_vertex_data()
stride += rsx::get_vertex_type_size_on_host(info.type, info.size); stride += rsx::get_vertex_type_size_on_host(info.type, info.size);
} }
vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; u32 vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride;
for (int index = 0; index < rsx::limits::vertex_count; ++index) for (int index = 0; index < rsx::limits::vertex_count; ++index)
{ {
auto &vertex_info = rsx::method_registers.vertex_arrays_info[index]; auto &vertex_info = rsx::method_registers.vertex_arrays_info[index];
if (!m_program->has_uniform(reg_table[index])) if (!m_program->has_uniform(s_reg_table[index]))
continue; continue;
if (!vertex_info.size) // disabled if (!vertex_info.size) // disabled
@ -363,142 +452,44 @@ VKGSRender::upload_vertex_data()
m_attrib_ring_info.unmap(); m_attrib_ring_info.unmap();
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size)); m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, reg_table[index], descriptor_sets); m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], descriptor_sets);
} }
return vertex_draw_count;
} }
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array || rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) std::tuple<u32, u32, u32, std::tuple<VkDeviceSize, VkIndexType>> VKGSRender::upload_index_buffer(const rsx::draw_clause &clause)
{ {
for (int index = 0; index < rsx::limits::vertex_count; ++index) rsx::index_array_type index_type = rsx::method_registers.index_type();
u32 type_size = gsl::narrow<u32>(get_index_type_size(index_type));
u32 index_count = get_index_count(clause.primitive, clause.get_elements_count());
u32 upload_size = index_count * type_size;
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
u32 min_index, max_index;
std::tie(min_index, max_index) = write_index_array_data_to_buffer(gsl::span<gsl::byte>(static_cast<gsl::byte*>(buf), index_count * type_size), get_raw_index_array(clause.first_count_commands),
index_type, clause.primitive, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), clause.first_count_commands,
[](auto prim) { return !is_primitive_native(prim); });
m_index_buffer_ring_info.unmap();
return std::make_tuple(min_index, max_index, index_count, std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type)));
}
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType> > VKGSRender::generate_emulating_index_buffer(const rsx::draw_clause &clause)
{ {
bool enabled = !!(input_mask & (1 << index)); u32 vertex_count = clause.get_elements_count();
if (!m_program->has_uniform(reg_table[index])) u32 index_count = get_index_count(clause.primitive, vertex_count);
continue; u32 upload_size = index_count * sizeof(u16);
if (!enabled) VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
{ void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
continue;
write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast<char*>(buf), clause.primitive, 0, vertex_count);
m_index_buffer_ring_info.unmap();
return std::make_tuple(index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16));
} }
if (rsx::method_registers.vertex_arrays_info[index].size > 0)
{
auto &vertex_info = rsx::method_registers.vertex_arrays_info[index];
// Fill vertex_array
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
u32 real_element_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size);
u32 upload_size = real_element_size * vertex_draw_count;
u32 offset_in_attrib_buffer = 0;
bool requires_expansion = vk::requires_component_expansion(vertex_info.type, vertex_info.size);
// Get source pointer
u32 base_offset = rsx::method_registers.vertex_data_base_offset();
u32 offset = rsx::method_registers.vertex_arrays_info[index].offset();
u32 address = base_offset + rsx::get_address(offset & 0x7fffffff, offset >> 31);
const gsl::byte *src_ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address));
u32 num_stored_verts = vertex_draw_count;
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
size_t offset = 0;
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size);
vk::prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count);
gsl::span<gsl::byte> dest_span(static_cast<gsl::byte*>(dst), upload_size);
for (const auto &first_count : rsx::method_registers.current_draw_clause.first_count_commands)
{
write_vertex_array_data_to_buffer(dest_span.subspan(offset), src_ptr, first_count.first, first_count.second, vertex_info.type, vertex_info.size, vertex_info.stride, real_element_size);
offset += first_count.second * real_element_size;
}
m_attrib_ring_info.unmap();
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
num_stored_verts = (max_index + 1);
upload_size = real_element_size * num_stored_verts;
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, upload_size);
gsl::span<gsl::byte> dest_span(static_cast<gsl::byte*>(dst), upload_size);
vk::prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, num_stored_verts);
write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, real_element_size);
m_attrib_ring_info.unmap();
}
const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, reg_table[index], descriptor_sets);
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
//Untested!
auto &vertex_info = rsx::method_registers.register_vertex_info[index];
switch (vertex_info.type)
{
case rsx::vertex_base_type::f:
{
size_t data_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size);
const VkFormat format = vk::get_suitable_vk_format(vertex_info.type, vertex_info.size);
u32 offset_in_attrib_buffer = 0;
void *data_ptr = vertex_info.data.data();
if (vk::requires_component_expansion(vertex_info.type, vertex_info.size))
{
const u32 num_stored_verts = static_cast<u32>(data_size / (sizeof(float) * vertex_info.size));
const u32 real_element_size = vk::get_suitable_vk_size(vertex_info.type, vertex_info.size);
data_size = real_element_size * num_stored_verts;
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
vk::expand_array_components<float, 3, 4, 1>(reinterpret_cast<float*>(vertex_info.data.data()), dst, num_stored_verts);
m_attrib_ring_info.unmap();
}
else
{
offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, data_size);
memcpy(dst, vertex_info.data.data(), data_size);
m_attrib_ring_info.unmap();
}
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, reg_table[index], descriptor_sets);
break;
}
default:
LOG_ERROR(RSX, "bad non array vertex data format (type=%d, size=%d)", (u32)vertex_info.type, vertex_info.size);
break;
}
}
else
{
//This section should theoretically be unreachable (data stream without available data)
//Variable is defined in the shaders but no data is available
//Bind a buffer view to keep the driver from crashing if access is attempted.
u32 offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(32);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, 32);
memset(dst, 0, 32);
m_attrib_ring_info.unmap();
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R32_SFLOAT, offset_in_attrib_buffer, 32));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, reg_table[index], descriptor_sets);
}
}
}
return std::make_tuple(prims, is_indexed_draw, index_count, offset_in_index_buffer, index_format);
}