1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 10:42:36 +01:00

gl: Overhaul upload and download routines for textures to go through shared image_to_buffer and buffer_to_image routines.

- This automatically adds support for depth float textures as well
This commit is contained in:
kd-11 2020-09-06 18:17:08 +03:00 committed by kd-11
parent 85dd1b4ea9
commit 85e5b077f7
6 changed files with 526 additions and 325 deletions

View File

@ -110,7 +110,7 @@ namespace gl
u32 m_data_length = 0;
u32 kernel_size = 1;
std::string uniforms, variables, work_kernel, loop_advance, suffix;
std::string uniforms, variables, work_kernel, loop_advance, suffix, method_declarations;
cs_shuffle_base()
{
@ -146,10 +146,8 @@ namespace gl
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
"\n"
"// Depth format conversions\n"
"#define d24x8_to_x8d24(bits) (bits << 8) | (bits >> 24)\n"
"#define d24x8_to_x8d24_swapped(bits) bswap_u32(d24x8_to_x8d24(bits))\n"
"#define x8d24_to_d24x8(bits) (bits >> 8) | (bits << 24)\n"
"#define x8d24_to_d24x8_swapped(bits) x8d24_to_d24x8(bswap_u32(bits))\n"
"#define d24f_to_f32(bits) (bits << 7)\n"
"#define f32_to_d24f(bits) (bits >> 7)\n"
"\n"
"uint linear_invocation_id()\n"
"{\n"
@ -157,6 +155,7 @@ namespace gl
" return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;\n"
"}\n"
"\n"
"%md"
"void main()\n"
"{\n"
" uint invocation_id = linear_invocation_id();\n"
@ -173,6 +172,7 @@ namespace gl
{ "%vars", variables },
{ "%f", function_name },
{ "%ub", uniforms },
{ "%md", method_declarations }
};
m_src = fmt::replace_all(m_src, syntax_replace);
@ -265,35 +265,229 @@ namespace gl
}
};
template<bool _SwapBytes = false>
struct cs_shuffle_d24x8_to_x8d24 : cs_shuffle_base
struct cs_shuffle_d32fx8_to_x8d24f : cs_shuffle_base
{
cs_shuffle_d24x8_to_x8d24()
u32 m_ssbo_length = 0;
cs_shuffle_d32fx8_to_x8d24f()
{
if constexpr (_SwapBytes)
{
cs_shuffle_base::build("d24x8_to_x8d24_swapped");
}
else
{
cs_shuffle_base::build("d24x8_to_x8d24");
}
uniforms = "uniform uint in_ptr, out_ptr;\n";
variables =
" uint in_offset = in_ptr >> 2;\n"
" uint out_offset = out_ptr >> 2;\n"
" uint depth, stencil;\n";
work_kernel =
" depth = data[index * 2 + in_offset];\n"
" stencil = data[index * 2 + (in_offset + 1)] & 0xFFu;\n"
" value = f32_to_d24f(depth) << 8;\n"
" value |= stencil;\n"
" data[index + out_ptr] = bswap_u32(value);\n";
cs_shuffle_base::build("");
}
void bind_resources() override
{
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
}
void run(const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
{
u32 data_offset;
if (src_offset > dst_offset)
{
data_offset = dst_offset;
m_ssbo_length = (src_offset + num_texels * 8) - data_offset;
}
else
{
data_offset = src_offset;
m_ssbo_length = (dst_offset + num_texels * 4) - data_offset;
}
m_program.uniforms["in_ptr"] = src_offset - data_offset;
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
cs_shuffle_base::run(data, num_texels * 4, data_offset);
}
};
template<bool _SwapBytes = false>
struct cs_shuffle_x8d24_to_d24x8 : cs_shuffle_base
struct cs_shuffle_x8d24f_to_d32fx8 : cs_shuffle_base
{
cs_shuffle_x8d24_to_d24x8()
u32 m_ssbo_length = 0;
cs_shuffle_x8d24f_to_d32fx8()
{
if constexpr (_SwapBytes)
{
cs_shuffle_base::build("x8d24_to_d24x8_swapped");
}
else
{
cs_shuffle_base::build("x8d24_to_d24x8");
}
uniforms = "uniform uint texel_count, in_ptr, out_ptr;\n";
variables =
" uint in_offset = in_ptr >> 2;\n"
" uint out_offset = out_ptr >> 2;\n"
" uint depth, stencil;\n";
work_kernel =
" value = data[index + in_offset];\n"
" value = bswap_u32(value);\n"
" stencil = (value & 0xFFu);\n"
" depth = (value >> 8);\n"
" data[index * 2 + out_offset] = d24f_to_f32(depth);\n"
" data[index * 2 + (out_offset + 1)] = stencil;\n";
cs_shuffle_base::build("");
}
void bind_resources() override
{
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
}
void run(const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
{
u32 data_offset;
if (src_offset > dst_offset)
{
data_offset = dst_offset;
m_ssbo_length = (src_offset + num_texels * 4) - data_offset;
}
else
{
data_offset = src_offset;
m_ssbo_length = (dst_offset + num_texels * 8) - data_offset;
}
m_program.uniforms["in_ptr"] = src_offset - data_offset;
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
cs_shuffle_base::run(data, num_texels * 4, data_offset);
}
};
template<typename From, typename To, bool _SwapSrc = false, bool _SwapDst = false>
struct cs_fconvert_task : cs_shuffle_base
{
u32 m_ssbo_length = 0;
void declare_f16_expansion()
{
method_declarations +=
"uvec2 unpack_e4m12_pack16(const in uint value)\n"
"{\n"
" uvec2 result = uvec2(bitfieldExtract(value, 0, 16), bitfieldExtract(value, 16, 16));\n"
" result <<= 11;\n"
" result += (120 << 23);\n"
" return result;\n"
"}\n\n";
}
void declare_f16_contraction()
{
method_declarations +=
"uint pack_e4m12_pack16(const in uvec2 value)\n"
"{\n"
" uvec2 result = (value - (120 << 23)) >> 11;\n"
" return (result.x & 0xFFFF) | (result.y << 16);\n"
"}\n\n";
}
cs_fconvert_task()
{
uniforms =
"uniform uint data_length_in_bytes, in_ptr, out_ptr;\n";
variables =
" uint block_length = data_length_in_bytes >> 2;\n"
" uint in_offset = in_ptr >> 2;\n"
" uint out_offset = out_ptr >> 2;\n"
" uvec4 tmp;\n";
work_kernel =
" if (index >= block_length)\n"
" return;\n";
if constexpr (sizeof(From) == 4)
{
static_assert(sizeof(To) == 2);
declare_f16_contraction();
work_kernel +=
" const uint src_offset = (index * 2) + in_offset;\n"
" const uint dst_offset = index + out_offset;\n"
" tmp.x = data[src_offset];\n"
" tmp.y = data[src_offset + 1];\n";
if constexpr (_SwapSrc)
{
work_kernel +=
" tmp = bswap_u32(tmp);\n";
}
// Convert
work_kernel += " tmp.z = pack_e4m12_pack16(tmp.xy);\n";
if constexpr (_SwapDst)
{
work_kernel += " tmp.z = bswap_u16(tmp.z);\n";
}
work_kernel += " data[dst_offset] = tmp.z;\n";
}
else
{
static_assert(sizeof(To) == 4);
declare_f16_expansion();
work_kernel +=
" const uint src_offset = index + in_offset;\n"
" const uint dst_offset = (index * 2) + out_offset;\n"
" tmp.x = data[src_offset];\n";
if constexpr (_SwapSrc)
{
work_kernel +=
" tmp.x = bswap_u16(tmp.x);\n";
}
// Convert
work_kernel += " tmp.yz = unpack_e4m12_pack16(tmp.x);\n";
if constexpr (_SwapDst)
{
work_kernel += " tmp.yz = bswap_u32(tmp.yz);\n";
}
work_kernel +=
" data[dst_offset] = tmp.y;\n"
" data[dst_offset + 1] = tmp.z;\n";
}
cs_shuffle_base::build("");
}
void bind_resources() override
{
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
}
void run(const gl::buffer* data, u32 src_offset, u32 src_length, u32 dst_offset)
{
u32 data_offset;
if (src_offset > dst_offset)
{
m_ssbo_length = (src_offset + src_length) - dst_offset;
data_offset = dst_offset;
}
else
{
m_ssbo_length = (dst_offset - src_offset) + (src_length / sizeof(From)) * sizeof(To);
data_offset = src_offset;
}
m_program.uniforms["data_length_in_bytes"] = src_length;
m_program.uniforms["in_ptr"] = src_offset - data_offset;
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
cs_shuffle_base::run(data, src_length, data_offset);
}
};

View File

@ -1479,14 +1479,6 @@ namespace gl
enum class internal_format
{
r = GL_RED,
rg = GL_RG,
rgb = GL_RGB,
rgba = GL_RGBA,
bgr = GL_BGR,
bgra = GL_BGRA,
stencil8 = GL_STENCIL_INDEX8,
depth16 = GL_DEPTH_COMPONENT16,
depth32f = GL_DEPTH_COMPONENT32F,
@ -1821,7 +1813,7 @@ namespace gl
return m_component_layout;
}
void copy_from(const void* src, texture::format format, texture::type type, const coord3u region, const pixel_unpack_settings& pixel_settings)
void copy_from(const void* src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings)
{
pixel_settings.apply();
@ -1829,25 +1821,25 @@ namespace gl
{
case GL_TEXTURE_1D:
{
DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, 0, region.x, region.width, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, level, region.x, region.width, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
break;
}
case GL_TEXTURE_2D:
{
DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, 0, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, level, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
break;
}
case GL_TEXTURE_3D:
case GL_TEXTURE_2D_ARRAY:
{
DSA_CALL(TextureSubImage3D, m_id, target_, 0, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
DSA_CALL(TextureSubImage3D, m_id, target_, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
break;
}
case GL_TEXTURE_CUBE_MAP:
{
if (get_driver_caps().ARB_dsa_supported)
{
glTextureSubImage3D(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
glTextureSubImage3D(m_id, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
}
else
{
@ -1856,7 +1848,7 @@ namespace gl
const auto end = std::min(6u, region.z + region.depth);
for (unsigned face = region.z; face < end; ++face)
{
glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), ptr);
glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, level, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), ptr);
ptr += (region.width * region.height * 4); //TODO
}
}
@ -1868,7 +1860,7 @@ namespace gl
void copy_from(const void* src, texture::format format, texture::type type, const pixel_unpack_settings& pixel_settings)
{
const coord3u region = { {}, size3D() };
copy_from(src, format, type, region, pixel_settings);
copy_from(src, format, type, 0, region, pixel_settings);
}
void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length)
@ -1884,7 +1876,7 @@ namespace gl
copy_from(*view.value(), view.format(), view.offset(), view.range());
}
void copy_to(void* dst, texture::format format, texture::type type, const coord3u& region, const pixel_pack_settings& pixel_settings) const
void copy_to(void* dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const
{
pixel_settings.apply();
const auto& caps = get_driver_caps();
@ -1893,13 +1885,13 @@ namespace gl
region.width == m_width && region.height == m_height && region.depth == m_depth)
{
if (caps.ARB_dsa_supported)
glGetTextureImage(m_id, 0, static_cast<GLenum>(format), static_cast<GLenum>(type), INT32_MAX, dst);
glGetTextureImage(m_id, level, static_cast<GLenum>(format), static_cast<GLenum>(type), INT32_MAX, dst);
else
glGetTextureImageEXT(m_id, static_cast<GLenum>(m_target), 0, static_cast<GLenum>(format), static_cast<GLenum>(type), dst);
glGetTextureImageEXT(m_id, static_cast<GLenum>(m_target), level, static_cast<GLenum>(format), static_cast<GLenum>(type), dst);
}
else if (caps.ARB_dsa_supported)
{
glGetTextureSubImage(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth,
glGetTextureSubImage(m_id, level, region.x, region.y, region.z, region.width, region.height, region.depth,
static_cast<GLenum>(format), static_cast<GLenum>(type), INT32_MAX, dst);
}
else
@ -1907,18 +1899,18 @@ namespace gl
// Worst case scenario. For some reason, EXT_dsa does not have glGetTextureSubImage
const auto target_ = static_cast<GLenum>(m_target);
texture tmp{ target_, region.width, region.height, region.depth, 1, static_cast<GLenum>(m_internal_format) };
glCopyImageSubData(m_id, target_, 0, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0,
glCopyImageSubData(m_id, target_, level, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0,
region.width, region.height, region.depth);
const coord3u region2 = { {0, 0, 0}, region.size };
tmp.copy_to(dst, format, type, region2, pixel_settings);
tmp.copy_to(dst, format, type, 0, region2, pixel_settings);
}
}
void copy_to(void* dst, texture::format format, texture::type type, const pixel_pack_settings& pixel_settings) const
{
const coord3u region = { {}, size3D() };
copy_to(dst, format, type, region, pixel_settings);
copy_to(dst, format, type, 0, region, pixel_settings);
}
};

View File

@ -73,7 +73,7 @@ depth_format rsx::internals::surface_depth_format_to_gl(rsx::surface_depth_forma
case rsx::surface_depth_format2::z16_uint:
return{ ::gl::texture::type::ushort, ::gl::texture::format::depth, ::gl::texture::internal_format::depth16 };
case rsx::surface_depth_format2::z16_float:
return{ ::gl::texture::type::f16, ::gl::texture::format::depth, ::gl::texture::internal_format::depth32f };
return{ ::gl::texture::type::f32, ::gl::texture::format::depth, ::gl::texture::internal_format::depth32f };
case rsx::surface_depth_format2::z24s8_uint:
if (g_cfg.video.force_high_precision_z_buffer && ::gl::get_driver_caps().ARB_depth_buffer_float_supported)
@ -81,8 +81,7 @@ depth_format rsx::internals::surface_depth_format_to_gl(rsx::surface_depth_forma
else
return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth24_stencil8 };
case rsx::surface_depth_format2::z24s8_float:
// TODO, requires separate aspect transfer for reading
return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth32f_stencil8 };
return{ ::gl::texture::type::float32_uint8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth32f_stencil8 };
default:
fmt::throw_exception("Unsupported depth format 0x%x" HERE, static_cast<u32>(depth_format));
@ -468,14 +467,12 @@ void gl::render_target::load_memory(gl::command_context& cmd)
// TODO: MSAA support
if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]]
{
gl::upload_texture(id(), gcm_format, surface_width, surface_height, 1, 1,
false, rsx::texture_dimension_extended::texture_dimension_2d, { subres });
gl::upload_texture(this, gcm_format, false, { subres });
}
else
{
auto tmp = std::make_unique<gl::texture>(GL_TEXTURE_2D, subres.width_in_block, subres.height_in_block, 1, 1, static_cast<GLenum>(get_internal_format()));
gl::upload_texture(tmp->id(), gcm_format, surface_width, surface_height, 1, 1,
false, rsx::texture_dimension_extended::texture_dimension_2d, { subres });
gl::upload_texture(tmp.get(), gcm_format, false, { subres });
gl::g_hw_blitter->scale_image(cmd, tmp.get(), this,
{ 0, 0, subres.width_in_block, subres.height_in_block },

View File

@ -454,6 +454,161 @@ namespace gl
fmt::throw_exception("Unknown format 0x%x" HERE, texture_format);
}
cs_shuffle_base* get_trivial_transform_job(const pixel_buffer_layout& pack_info)
{
if (!pack_info.swap_bytes)
{
return nullptr;
}
switch (pack_info.size)
{
case 1:
return nullptr;
case 2:
return get_compute_task<gl::cs_shuffle_16>();
break;
case 4:
return get_compute_task<gl::cs_shuffle_32>();
break;
default:
fmt::throw_exception("Unsupported format");
}
}
void* copy_image_to_buffer(const pixel_buffer_layout& pack_info, const gl::texture* src, gl::buffer* dst,
const int src_level, const coord3u& src_region, image_memory_requirements* mem_info)
{
auto initialize_scratch_mem = [&]()
{
const u64 max_mem = (mem_info->memory_required) ? mem_info->memory_required : mem_info->image_size_in_bytes;
if (!(*dst) || max_mem > static_cast<u64>(dst->size()))
{
if (*dst) dst->remove();
dst->create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
}
dst->bind(buffer::target::pixel_pack);
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_level, src_region, {});
};
void* result = nullptr;
if (src->aspect() == image_aspect::color ||
pack_info.type == GL_UNSIGNED_SHORT ||
pack_info.type == GL_UNSIGNED_INT_24_8)
{
initialize_scratch_mem();
if (auto job = get_trivial_transform_job(pack_info))
{
job->run(dst, static_cast<u32>(mem_info->image_size_in_bytes));
}
}
else if (pack_info.type == GL_FLOAT)
{
verify(HERE), mem_info->image_size_in_bytes == (mem_info->image_size_in_texels * 4);
mem_info->memory_required = (mem_info->image_size_in_texels * 6);
initialize_scratch_mem();
get_compute_task<cs_fconvert_task<f32, f16, false, true>>()->run(dst, 0,
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes);
}
else if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
{
verify(HERE), mem_info->image_size_in_bytes == (mem_info->image_size_in_texels * 8);
mem_info->memory_required = (mem_info->image_size_in_texels * 12);
initialize_scratch_mem();
get_compute_task<cs_shuffle_d32fx8_to_x8d24f>()->run(dst, 0,
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes);
}
else
{
fmt::throw_exception("Invalid depth/stencil type 0x%x" HERE, pack_info.type);
}
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT);
return result;
}
void copy_buffer_to_image(const pixel_buffer_layout& unpack_info, gl::buffer* src, gl::texture* dst,
const void* src_offset, const int dst_level, const coord3u& dst_region, image_memory_requirements* mem_info)
{
buffer scratch_mem;
buffer* transfer_buf = src;
bool skip_barrier = false;
u32 in_offset = static_cast<u32>(reinterpret_cast<u64>(src_offset));
u32 out_offset = in_offset;
auto initialize_scratch_mem = [&]()
{
if (in_offset >= mem_info->memory_required)
{
return;
}
const u64 max_mem = mem_info->memory_required + mem_info->image_size_in_bytes;
if ((max_mem + in_offset) <= static_cast<u64>(src->size()))
{
out_offset = static_cast<u32>(in_offset + mem_info->image_size_in_bytes);
return;
}
scratch_mem.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
src->copy_to(&scratch_mem, in_offset, 0, mem_info->image_size_in_bytes);
in_offset = 0;
out_offset = static_cast<u32>(mem_info->image_size_in_bytes);
transfer_buf = &scratch_mem;
};
if (dst->aspect() == image_aspect::color ||
unpack_info.type == GL_UNSIGNED_SHORT ||
unpack_info.type == GL_UNSIGNED_INT_24_8)
{
if (auto job = get_trivial_transform_job(unpack_info))
{
job->run(src, static_cast<u32>(mem_info->image_size_in_bytes), in_offset);
}
else
{
skip_barrier = true;
}
}
else if (unpack_info.type == GL_FLOAT)
{
mem_info->memory_required = (mem_info->image_size_in_texels * 4);
initialize_scratch_mem();
get_compute_task<cs_fconvert_task<f16, f32, true, false>>()->run(transfer_buf, in_offset, static_cast<u32>(mem_info->image_size_in_bytes), out_offset);
}
else if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
{
mem_info->memory_required = (mem_info->image_size_in_texels * 8);
initialize_scratch_mem();
get_compute_task<cs_shuffle_x8d24f_to_d32fx8>()->run(transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
}
else
{
fmt::throw_exception("Invalid depth/stencil type 0x%x" HERE, unpack_info.type);
}
if (!skip_barrier)
{
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
transfer_buf->bind(buffer::target::pixel_unpack);
dst->copy_from(reinterpret_cast<void*>(u64(out_offset)), static_cast<texture::format>(unpack_info.format),
static_cast<texture::type>(unpack_info.type), dst_level, dst_region, {});
if (scratch_mem) scratch_mem.remove();
}
gl::viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps,
rsx::texture_dimension_extended type)
{
@ -488,8 +643,9 @@ namespace gl
return new gl::viewable_image(target, width, height, depth, mipmaps, internal_format, format_class);
}
void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth,
const std::vector<rsx::subresource_layout> &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
void fill_texture(texture* dst, int format,
const std::vector<rsx::subresource_layout> &input_layouts,
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
{
rsx::texture_uploader_capabilities caps{ true, false, false, 4 };
@ -500,9 +656,11 @@ namespace gl
{
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
unpack_settings.row_length(align(width, 4));
unpack_settings.row_length(align(dst->width(), 4));
unpack_settings.apply();
glBindTexture(static_cast<GLenum>(dst->get_target()), dst->id());
const GLsizei format_block_size = (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16;
for (const rsx::subresource_layout& layout : input_layouts)
@ -510,27 +668,27 @@ namespace gl
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
const sizei image_size{ align(layout.width_in_texel, 4), align(layout.height_in_texel, 4) };
switch (dim)
switch (dst->get_target())
{
case rsx::texture_dimension_extended::texture_dimension_1d:
case texture::target::texture1D:
{
const GLsizei size = layout.width_in_block * format_block_size;
glCompressedTexSubImage1D(GL_TEXTURE_1D, layout.level, 0, image_size.width, gl_format, size, staging_buffer.data());
break;
}
case rsx::texture_dimension_extended::texture_dimension_2d:
case texture::target::texture2D:
{
const GLsizei size = layout.width_in_block * layout.height_in_block * format_block_size;
glCompressedTexSubImage2D(GL_TEXTURE_2D, layout.level, 0, 0, image_size.width, image_size.height, gl_format, size, staging_buffer.data());
break;
}
case rsx::texture_dimension_extended::texture_dimension_cubemap:
case texture::target::textureCUBE:
{
const GLsizei size = layout.width_in_block * layout.height_in_block * format_block_size;
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + layout.layer, layout.level, 0, 0, image_size.width, image_size.height, gl_format, size, staging_buffer.data());
break;
}
case rsx::texture_dimension_extended::texture_dimension_3d:
case texture::target::texture3D:
{
const GLsizei size = layout.width_in_block * layout.height_in_block * layout.depth * format_block_size;
glCompressedTexSubImage3D(GL_TEXTURE_3D, layout.level, 0, 0, 0, image_size.width, image_size.height, layout.depth, gl_format, size, staging_buffer.data());
@ -547,9 +705,11 @@ namespace gl
else
{
bool apply_settings = true;
bool use_compute_transform = false;
buffer upload_scratch_mem, compute_scratch_mem;
image_memory_requirements mem_info;
pixel_buffer_layout mem_layout;
cs_shuffle_base* pixel_transform = nullptr;
gsl::span<gsl::byte> dst_buffer = staging_buffer;
void* out_pointer = staging_buffer.data();
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
@ -569,90 +729,72 @@ namespace gl
apply_settings = (gl_format == GL_RED);
caps.supports_byteswap = apply_settings;
break;
case GL_UNSIGNED_INT_24_8:
if (gl::get_driver_caps().ARB_compute_shader_supported)
{
apply_settings = false;
pixel_transform = gl::get_compute_task<cs_shuffle_x8d24_to_d24x8<true>>();
}
break;
case GL_FLOAT:
// TODO: Expand depth16f to depth32f
gl_type = GL_HALF_FLOAT;
break;
case GL_UNSIGNED_INT_24_8:
case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
// TODO: Expand depth24 to depth32f
gl_type = GL_UNSIGNED_INT_24_8;
break;
default:
mem_layout.format = gl_format;
mem_layout.type = gl_type;
mem_layout.swap_bytes = true;
mem_layout.size = 4;
use_compute_transform = true;
apply_settings = false;
break;
}
if (!apply_settings)
{
unpack_settings.apply();
}
if (pixel_transform)
if (use_compute_transform)
{
upload_scratch_mem.create(staging_buffer.size(), nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
compute_scratch_mem.create(staging_buffer.size(), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
compute_scratch_mem.create(std::max<GLsizeiptr>(512, staging_buffer.size() * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
out_pointer = nullptr;
}
for (const rsx::subresource_layout& layout : input_layouts)
{
if (pixel_transform)
if (use_compute_transform)
{
const u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment);
const u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
dst_buffer = { reinterpret_cast<gsl::byte*>(upload_scratch_mem.map(buffer::access::write)), image_linear_size };
}
auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps);
if (pixel_transform)
// Define upload region
coord3u region;
region.x = 0;
region.y = 0;
region.z = layout.layer;
region.width = layout.width_in_texel;
region.height = layout.height_in_texel;
region.depth = layout.depth;
if (use_compute_transform)
{
// 1. Unmap buffer
upload_scratch_mem.unmap();
// 2. Execute compute job
// 2. Upload memory to GPU
upload_scratch_mem.copy_to(&compute_scratch_mem, 0, 0, image_linear_size);
pixel_transform->run(&compute_scratch_mem, image_linear_size);
// 3. Bind compute buffer as pixel unpack buffer
glMemoryBarrier(GL_PIXEL_UNPACK_BUFFER);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
compute_scratch_mem.bind(buffer::target::pixel_unpack);
// 3. Dispatch compute routines
mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes;
mem_info.image_size_in_bytes = image_linear_size;
mem_info.memory_required = 0;
copy_buffer_to_image(mem_layout, &compute_scratch_mem, dst, nullptr, layout.level, region, & mem_info);
}
else if (apply_settings)
else
{
unpack_settings.swap_bytes(op.require_swap);
unpack_settings.apply();
apply_settings = false;
}
if (apply_settings)
{
unpack_settings.swap_bytes(op.require_swap);
apply_settings = false;
}
switch (dim)
{
case rsx::texture_dimension_extended::texture_dimension_1d:
glTexSubImage1D(GL_TEXTURE_1D, layout.level, 0, layout.width_in_texel, gl_format, gl_type, out_pointer);
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
glTexSubImage2D(GL_TEXTURE_2D, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, out_pointer);
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + layout.layer, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, out_pointer);
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
glTexSubImage3D(GL_TEXTURE_3D, layout.layer, 0, 0, 0, layout.width_in_texel, layout.height_in_texel, depth, gl_format, gl_type, out_pointer);
break;
default:
ASSUME(0);
fmt::throw_exception("Unreachable" HERE);
dst->copy_from(out_pointer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
}
}
if (pixel_transform)
if (use_compute_transform)
{
upload_scratch_mem.remove();
compute_scratch_mem.remove();
@ -693,41 +835,18 @@ namespace gl
return remap_values;
}
void upload_texture(GLuint id, u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type,
const std::vector<rsx::subresource_layout>& subresources_layout)
void upload_texture(texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
{
GLenum target;
switch (type)
{
case rsx::texture_dimension_extended::texture_dimension_1d:
target = GL_TEXTURE_1D;
break;
case rsx::texture_dimension_extended::texture_dimension_2d:
target = GL_TEXTURE_2D;
break;
case rsx::texture_dimension_extended::texture_dimension_3d:
target = GL_TEXTURE_3D;
break;
case rsx::texture_dimension_extended::texture_dimension_cubemap:
target = GL_TEXTURE_CUBE_MAP;
break;
}
glBindTexture(target, id);
glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1);
// The rest of sampler state is now handled by sampler state objects
// Calculate staging buffer size
const u32 aligned_pitch = align<u32>(width * rsx::get_format_block_size_in_bytes(gcm_format), 4);
size_t texture_data_sz = depth * height * aligned_pitch;
const u32 aligned_pitch = align<u32>(dst->pitch(), 4);
size_t texture_data_sz = dst->depth() * dst->height() * aligned_pitch;
std::vector<std::byte> data_upload_buf(texture_data_sz);
// TODO: GL drivers support byteswapping and this should be used instead of doing so manually
const auto format_type = get_format_type(gcm_format);
const GLenum gl_format = std::get<0>(format_type);
const GLenum gl_type = std::get<1>(format_type);
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
fill_texture(dst, gcm_format, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
}
u32 get_format_texel_width(GLenum format)
@ -821,111 +940,12 @@ namespace gl
return false;
}
cs_shuffle_base* get_trivial_transform_job(const pixel_buffer_layout& pack_info)
{
if (!pack_info.swap_bytes)
{
return nullptr;
}
switch (pack_info.size)
{
case 1:
return nullptr;
case 2:
return gl::get_compute_task<gl::cs_shuffle_16>();
break;
case 4:
return gl::get_compute_task<gl::cs_shuffle_32>();
break;
default:
fmt::throw_exception("Unsupported format");
}
}
cs_shuffle_base* get_image_to_buffer_job(const pixel_buffer_layout& pack_info, u32 aspect_mask)
{
switch (aspect_mask)
{
case image_aspect::color:
{
return get_trivial_transform_job(pack_info);
}
case image_aspect::depth:
{
if (pack_info.type == GL_FLOAT)
{
// TODO: D16F
return nullptr;
}
return get_trivial_transform_job(pack_info);
}
case image_aspect::depth | image_aspect::stencil:
{
verify(HERE), pack_info.swap_bytes;
if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
{
// TODO: D24FX8
return nullptr;
}
return gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
}
default:
{
fmt::throw_exception("Invalid aspect mask 0x%x" HERE, aspect_mask);
}
}
}
cs_shuffle_base* get_buffer_to_image_job(const pixel_buffer_layout& unpack_info, u32 aspect_mask)
{
switch (aspect_mask)
{
case image_aspect::color:
{
return get_trivial_transform_job(unpack_info);
}
case image_aspect::depth:
{
if (unpack_info.type == GL_FLOAT)
{
// TODO: D16F
return nullptr;
}
return get_trivial_transform_job(unpack_info);
}
case image_aspect::depth | image_aspect::stencil:
{
verify(HERE), unpack_info.swap_bytes;
if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
{
// TODO: D24FX8
return nullptr;
}
return gl::get_compute_task<gl::cs_shuffle_x8d24_to_d24x8<true>>();
}
default:
{
fmt::throw_exception("Invalid aspect mask 0x%x" HERE, aspect_mask);
}
}
}
void copy_typeless(texture * dst, const texture * src, const coord3u& dst_region, const coord3u& src_region)
{
const u32 src_mem = src->pitch() * src_region.height;
const u32 dst_mem = dst->pitch() * dst_region.height;
auto max_mem = std::max(src_mem, dst_mem);
if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size())
{
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
}
const auto src_bpp = src->pitch() / src->width();
const auto dst_bpp = dst->pitch() / dst->width();
image_memory_requirements src_mem = { src_region.width * src_region.height, src_region.width * src_bpp * src_region.height, 0ull };
image_memory_requirements dst_mem = { dst_region.width * dst_region.height, dst_region.width * dst_bpp * dst_region.height, 0ull };
const auto& caps = gl::get_driver_caps();
auto pack_info = get_format_type(src);
@ -954,54 +974,31 @@ namespace gl
}
// Start pack operation
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
void* transfer_offset = nullptr;
if (caps.ARB_compute_shader_supported) [[likely]]
{
// Raw copy
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_region, {});
}
else
{
pixel_pack_settings pack_settings{};
pack_settings.swap_bytes(pack_info.swap_bytes);
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_region, pack_settings);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
// Start unpack operation
pixel_unpack_settings unpack_settings{};
if (caps.ARB_compute_shader_supported) [[likely]]
{
auto src_transform = get_image_to_buffer_job(pack_info, src->aspect());
auto dst_transform = get_buffer_to_image_job(unpack_info, dst->aspect());
if (src->aspect() == gl::image_aspect::color && dst->aspect() == gl::image_aspect::color)
// Apply transformation
bool skip_transform = false;
if ((src->aspect() | dst->aspect()) == gl::image_aspect::color)
{
if (src_transform == dst_transform)
{
src_transform = dst_transform = nullptr;
}
else if (src_transform && dst_transform)
{
src_transform = gl::get_compute_task<cs_shuffle_32_16>();
dst_transform = nullptr;
}
skip_transform = (pack_info.format == unpack_info.format &&
pack_info.type == unpack_info.type &&
pack_info.swap_bytes == unpack_info.swap_bytes &&
pack_info.size == unpack_info.size);
}
const auto job_length = std::min(src_mem, dst_mem);
if (src_transform)
if (skip_transform) [[likely]]
{
src_transform->run(&g_typeless_transfer_buffer, job_length);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT);
}
const bool old_swap_bytes = pack_info.swap_bytes;
pack_info.swap_bytes = false;
if (dst_transform)
copy_image_to_buffer(pack_info, src, &g_typeless_transfer_buffer, 0, src_region, &src_mem);
pack_info.swap_bytes = old_swap_bytes;
}
else
{
dst_transform->run(&g_typeless_transfer_buffer, job_length);
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
void* data_ptr = copy_image_to_buffer(pack_info, src, &g_typeless_transfer_buffer, 0, src_region, &src_mem);
copy_buffer_to_image(unpack_info, &g_typeless_transfer_buffer, dst, data_ptr, 0, dst_region, &dst_mem);
}
// NOTE: glBindBufferRange also binds the buffer to the old-school target.
@ -1009,12 +1006,33 @@ namespace gl
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
}
else
{
const u64 max_mem = std::max(src_mem.image_size_in_bytes, dst_mem.image_size_in_bytes);
if (!g_typeless_transfer_buffer || max_mem > static_cast<u64>(g_typeless_transfer_buffer.size()))
{
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
}
pixel_pack_settings pack_settings{};
pack_settings.swap_bytes(pack_info.swap_bytes);
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), 0, src_region, pack_settings);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
// Start unpack operation
pixel_unpack_settings unpack_settings{};
if (!caps.ARB_compute_shader_supported) [[unlikely]]
{
unpack_settings.swap_bytes(unpack_info.swap_bytes);
}
g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack);
dst->copy_from(nullptr, static_cast<texture::format>(unpack_info.format), static_cast<texture::type>(unpack_info.type), dst_region, unpack_settings);
dst->copy_from(transfer_offset, static_cast<texture::format>(unpack_info.format), static_cast<texture::type>(unpack_info.type), 0, dst_region, unpack_settings);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
}

View File

@ -21,6 +21,13 @@ namespace gl
bool swap_bytes;
};
struct image_memory_requirements
{
u64 image_size_in_texels;
u64 image_size_in_bytes;
u64 memory_required;
};
GLenum get_target(rsx::texture_dimension_extended type);
GLenum get_sized_internal_format(u32 texture_format);
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format);
@ -35,16 +42,13 @@ namespace gl
void copy_typeless(texture* dst, const texture* src, const coord3u& dst_region, const coord3u& src_region);
void copy_typeless(texture* dst, const texture* src);
/**
* is_swizzled - determines whether input bytes are in morton order
* subresources_layout - descriptor of the mipmap levels in memory
* decoded_remap - two vectors, first one contains index to read, e.g if v[0] = 1 then component 0[A] in the texture should read as component 1[R]
* - layout of vector is in A-R-G-B
* - second vector contains overrides to force the value to either 0 or 1 instead of reading from texture
* static_state - set up the texture without consideration for sampler state (useful for vertex textures which have no real sampler state on RSX)
*/
void upload_texture(GLuint id, u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type,
const std::vector<rsx::subresource_layout>& subresources_layout);
void* copy_image_to_buffer(const pixel_buffer_layout& pack_info, const gl::texture* src, gl::buffer* dst,
const int src_level, const coord3u& src_region, image_memory_requirements* mem_info);
void copy_buffer_to_image(const pixel_buffer_layout& unpack_info, gl::buffer* src, gl::texture* dst,
const void* src_offset, const int dst_level, const coord3u& dst_region, image_memory_requirements* mem_info);
void upload_texture(texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout);
class sampler_state
{

View File

@ -15,7 +15,6 @@
#include "GLRenderTargets.h"
#include "GLOverlays.h"
#include "GLTexture.h"
#include "GLCompute.h"
#include "../Common/TextureUtils.h"
#include "../Common/texture_cache.h"
@ -163,38 +162,39 @@ namespace gl
pack_unpack_swap_bytes = format_info.swap_bytes;
}
real_pitch = src->pitch();
rsx_pitch = pitch;
bool use_driver_pixel_transform = true;
if (get_driver_caps().ARB_compute_shader_supported) [[likely]]
{
if (src->aspect() & image_aspect::stencil)
if (src->aspect() & image_aspect::depth)
{
buffer scratch_mem;
scratch_mem.create(buffer::target::pixel_pack, pbo.size(), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
scratch_mem.bind();
pixel_pack_settings pack_settings;
pack_settings.alignment(1);
src->copy_to(nullptr, format, type, pack_settings);
// Invoke compute
if (auto error = glGetError(); !error) [[likely]]
{
cs_shuffle_base * job;
if (pack_unpack_swap_bytes)
{
job = get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
}
else
{
job = get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<false>>();
}
pixel_buffer_layout pack_info{};
image_memory_requirements mem_info{};
const auto job_length = src->pitch() * src->height();
job->run(&scratch_mem, job_length);
pack_info.format = static_cast<GLenum>(format);
pack_info.type = static_cast<GLenum>(type);
pack_info.size = (src->aspect() & image_aspect::stencil) ? 4 : 2;
pack_info.swap_bytes = true;
mem_info.image_size_in_texels = src->width() * src->height();
mem_info.image_size_in_bytes = src->pitch() * src->height();
mem_info.memory_required = 0;
void* out_offset = copy_image_to_buffer(pack_info, src, &scratch_mem, 0, { {}, src->size3D() }, &mem_info);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
scratch_mem.copy_to(&pbo, 0, 0, job_length);
real_pitch = pack_info.size * src->width();
const u64 data_length = pack_info.size * mem_info.image_size_in_texels;
scratch_mem.copy_to(&pbo, reinterpret_cast<u64>(out_offset), 0, data_length);
}
else
{
@ -222,9 +222,6 @@ namespace gl
src->copy_to(nullptr, format, type, pack_settings);
}
real_pitch = src->pitch();
rsx_pitch = pitch;
if (auto error = glGetError())
{
if (error == GL_OUT_OF_MEMORY && ::gl::get_driver_caps().vendor_AMD)
@ -561,7 +558,7 @@ namespace gl
sized_internal_fmt = gl::get_sized_internal_format(gcm_format);
}
std::unique_ptr<gl::texture> dst = std::make_unique<gl::viewable_image>(dst_type, width, height, depth, mipmaps, sized_internal_fmt);
std::unique_ptr<gl::texture> dst = std::make_unique<gl::viewable_image>(dst_type, width, height, depth, mipmaps, sized_internal_fmt, rsx::classify_format(gcm_format));
if (copy)
{
@ -939,8 +936,7 @@ namespace gl
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, input_swizzled,
rsx::texture_create_flags::default_component_order);
gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps,
input_swizzled, type, subresource_layout);
gl::upload_texture(section->get_raw_texture(), gcm_format, input_swizzled, subresource_layout);
section->last_write_tag = rsx::get_shared_tag();
return section;