mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
gl: Overhaul upload and download routines for textures to go through shared image_to_buffer and buffer_to_image routines.
- This automatically adds support for depth float textures as well
This commit is contained in:
parent
85dd1b4ea9
commit
85e5b077f7
@ -110,7 +110,7 @@ namespace gl
|
||||
u32 m_data_length = 0;
|
||||
u32 kernel_size = 1;
|
||||
|
||||
std::string uniforms, variables, work_kernel, loop_advance, suffix;
|
||||
std::string uniforms, variables, work_kernel, loop_advance, suffix, method_declarations;
|
||||
|
||||
cs_shuffle_base()
|
||||
{
|
||||
@ -146,10 +146,8 @@ namespace gl
|
||||
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
||||
"\n"
|
||||
"// Depth format conversions\n"
|
||||
"#define d24x8_to_x8d24(bits) (bits << 8) | (bits >> 24)\n"
|
||||
"#define d24x8_to_x8d24_swapped(bits) bswap_u32(d24x8_to_x8d24(bits))\n"
|
||||
"#define x8d24_to_d24x8(bits) (bits >> 8) | (bits << 24)\n"
|
||||
"#define x8d24_to_d24x8_swapped(bits) x8d24_to_d24x8(bswap_u32(bits))\n"
|
||||
"#define d24f_to_f32(bits) (bits << 7)\n"
|
||||
"#define f32_to_d24f(bits) (bits >> 7)\n"
|
||||
"\n"
|
||||
"uint linear_invocation_id()\n"
|
||||
"{\n"
|
||||
@ -157,6 +155,7 @@ namespace gl
|
||||
" return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"%md"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" uint invocation_id = linear_invocation_id();\n"
|
||||
@ -173,6 +172,7 @@ namespace gl
|
||||
{ "%vars", variables },
|
||||
{ "%f", function_name },
|
||||
{ "%ub", uniforms },
|
||||
{ "%md", method_declarations }
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||
@ -265,35 +265,229 @@ namespace gl
|
||||
}
|
||||
};
|
||||
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_shuffle_d24x8_to_x8d24 : cs_shuffle_base
|
||||
struct cs_shuffle_d32fx8_to_x8d24f : cs_shuffle_base
|
||||
{
|
||||
cs_shuffle_d24x8_to_x8d24()
|
||||
u32 m_ssbo_length = 0;
|
||||
|
||||
cs_shuffle_d32fx8_to_x8d24f()
|
||||
{
|
||||
if constexpr (_SwapBytes)
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_x8d24_swapped");
|
||||
}
|
||||
else
|
||||
{
|
||||
cs_shuffle_base::build("d24x8_to_x8d24");
|
||||
}
|
||||
uniforms = "uniform uint in_ptr, out_ptr;\n";
|
||||
|
||||
variables =
|
||||
" uint in_offset = in_ptr >> 2;\n"
|
||||
" uint out_offset = out_ptr >> 2;\n"
|
||||
" uint depth, stencil;\n";
|
||||
|
||||
work_kernel =
|
||||
" depth = data[index * 2 + in_offset];\n"
|
||||
" stencil = data[index * 2 + (in_offset + 1)] & 0xFFu;\n"
|
||||
" value = f32_to_d24f(depth) << 8;\n"
|
||||
" value |= stencil;\n"
|
||||
" data[index + out_ptr] = bswap_u32(value);\n";
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
|
||||
void bind_resources() override
|
||||
{
|
||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
||||
}
|
||||
|
||||
void run(const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||
{
|
||||
u32 data_offset;
|
||||
if (src_offset > dst_offset)
|
||||
{
|
||||
data_offset = dst_offset;
|
||||
m_ssbo_length = (src_offset + num_texels * 8) - data_offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
data_offset = src_offset;
|
||||
m_ssbo_length = (dst_offset + num_texels * 4) - data_offset;
|
||||
}
|
||||
|
||||
m_program.uniforms["in_ptr"] = src_offset - data_offset;
|
||||
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
|
||||
cs_shuffle_base::run(data, num_texels * 4, data_offset);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool _SwapBytes = false>
|
||||
struct cs_shuffle_x8d24_to_d24x8 : cs_shuffle_base
|
||||
struct cs_shuffle_x8d24f_to_d32fx8 : cs_shuffle_base
|
||||
{
|
||||
cs_shuffle_x8d24_to_d24x8()
|
||||
u32 m_ssbo_length = 0;
|
||||
|
||||
cs_shuffle_x8d24f_to_d32fx8()
|
||||
{
|
||||
if constexpr (_SwapBytes)
|
||||
{
|
||||
cs_shuffle_base::build("x8d24_to_d24x8_swapped");
|
||||
}
|
||||
else
|
||||
{
|
||||
cs_shuffle_base::build("x8d24_to_d24x8");
|
||||
}
|
||||
uniforms = "uniform uint texel_count, in_ptr, out_ptr;\n";
|
||||
|
||||
variables =
|
||||
" uint in_offset = in_ptr >> 2;\n"
|
||||
" uint out_offset = out_ptr >> 2;\n"
|
||||
" uint depth, stencil;\n";
|
||||
|
||||
work_kernel =
|
||||
" value = data[index + in_offset];\n"
|
||||
" value = bswap_u32(value);\n"
|
||||
" stencil = (value & 0xFFu);\n"
|
||||
" depth = (value >> 8);\n"
|
||||
" data[index * 2 + out_offset] = d24f_to_f32(depth);\n"
|
||||
" data[index * 2 + (out_offset + 1)] = stencil;\n";
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
|
||||
void bind_resources() override
|
||||
{
|
||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
||||
}
|
||||
|
||||
void run(const gl::buffer* data, u32 src_offset, u32 dst_offset, u32 num_texels)
|
||||
{
|
||||
u32 data_offset;
|
||||
if (src_offset > dst_offset)
|
||||
{
|
||||
data_offset = dst_offset;
|
||||
m_ssbo_length = (src_offset + num_texels * 4) - data_offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
data_offset = src_offset;
|
||||
m_ssbo_length = (dst_offset + num_texels * 8) - data_offset;
|
||||
}
|
||||
|
||||
m_program.uniforms["in_ptr"] = src_offset - data_offset;
|
||||
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
|
||||
cs_shuffle_base::run(data, num_texels * 4, data_offset);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename From, typename To, bool _SwapSrc = false, bool _SwapDst = false>
|
||||
struct cs_fconvert_task : cs_shuffle_base
|
||||
{
|
||||
u32 m_ssbo_length = 0;
|
||||
|
||||
void declare_f16_expansion()
|
||||
{
|
||||
method_declarations +=
|
||||
"uvec2 unpack_e4m12_pack16(const in uint value)\n"
|
||||
"{\n"
|
||||
" uvec2 result = uvec2(bitfieldExtract(value, 0, 16), bitfieldExtract(value, 16, 16));\n"
|
||||
" result <<= 11;\n"
|
||||
" result += (120 << 23);\n"
|
||||
" return result;\n"
|
||||
"}\n\n";
|
||||
}
|
||||
|
||||
void declare_f16_contraction()
|
||||
{
|
||||
method_declarations +=
|
||||
"uint pack_e4m12_pack16(const in uvec2 value)\n"
|
||||
"{\n"
|
||||
" uvec2 result = (value - (120 << 23)) >> 11;\n"
|
||||
" return (result.x & 0xFFFF) | (result.y << 16);\n"
|
||||
"}\n\n";
|
||||
}
|
||||
|
||||
cs_fconvert_task()
|
||||
{
|
||||
uniforms =
|
||||
"uniform uint data_length_in_bytes, in_ptr, out_ptr;\n";
|
||||
|
||||
variables =
|
||||
" uint block_length = data_length_in_bytes >> 2;\n"
|
||||
" uint in_offset = in_ptr >> 2;\n"
|
||||
" uint out_offset = out_ptr >> 2;\n"
|
||||
" uvec4 tmp;\n";
|
||||
|
||||
work_kernel =
|
||||
" if (index >= block_length)\n"
|
||||
" return;\n";
|
||||
|
||||
if constexpr (sizeof(From) == 4)
|
||||
{
|
||||
static_assert(sizeof(To) == 2);
|
||||
declare_f16_contraction();
|
||||
|
||||
work_kernel +=
|
||||
" const uint src_offset = (index * 2) + in_offset;\n"
|
||||
" const uint dst_offset = index + out_offset;\n"
|
||||
" tmp.x = data[src_offset];\n"
|
||||
" tmp.y = data[src_offset + 1];\n";
|
||||
|
||||
if constexpr (_SwapSrc)
|
||||
{
|
||||
work_kernel +=
|
||||
" tmp = bswap_u32(tmp);\n";
|
||||
}
|
||||
|
||||
// Convert
|
||||
work_kernel += " tmp.z = pack_e4m12_pack16(tmp.xy);\n";
|
||||
|
||||
if constexpr (_SwapDst)
|
||||
{
|
||||
work_kernel += " tmp.z = bswap_u16(tmp.z);\n";
|
||||
}
|
||||
|
||||
work_kernel += " data[dst_offset] = tmp.z;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(sizeof(To) == 4);
|
||||
declare_f16_expansion();
|
||||
|
||||
work_kernel +=
|
||||
" const uint src_offset = index + in_offset;\n"
|
||||
" const uint dst_offset = (index * 2) + out_offset;\n"
|
||||
" tmp.x = data[src_offset];\n";
|
||||
|
||||
if constexpr (_SwapSrc)
|
||||
{
|
||||
work_kernel +=
|
||||
" tmp.x = bswap_u16(tmp.x);\n";
|
||||
}
|
||||
|
||||
// Convert
|
||||
work_kernel += " tmp.yz = unpack_e4m12_pack16(tmp.x);\n";
|
||||
|
||||
if constexpr (_SwapDst)
|
||||
{
|
||||
work_kernel += " tmp.yz = bswap_u32(tmp.yz);\n";
|
||||
}
|
||||
|
||||
work_kernel +=
|
||||
" data[dst_offset] = tmp.y;\n"
|
||||
" data[dst_offset + 1] = tmp.z;\n";
|
||||
}
|
||||
|
||||
cs_shuffle_base::build("");
|
||||
}
|
||||
|
||||
void bind_resources() override
|
||||
{
|
||||
m_data->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), m_data_offset, m_ssbo_length);
|
||||
}
|
||||
|
||||
void run(const gl::buffer* data, u32 src_offset, u32 src_length, u32 dst_offset)
|
||||
{
|
||||
u32 data_offset;
|
||||
if (src_offset > dst_offset)
|
||||
{
|
||||
m_ssbo_length = (src_offset + src_length) - dst_offset;
|
||||
data_offset = dst_offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_ssbo_length = (dst_offset - src_offset) + (src_length / sizeof(From)) * sizeof(To);
|
||||
data_offset = src_offset;
|
||||
}
|
||||
|
||||
m_program.uniforms["data_length_in_bytes"] = src_length;
|
||||
m_program.uniforms["in_ptr"] = src_offset - data_offset;
|
||||
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
|
||||
|
||||
cs_shuffle_base::run(data, src_length, data_offset);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1479,14 +1479,6 @@ namespace gl
|
||||
|
||||
enum class internal_format
|
||||
{
|
||||
r = GL_RED,
|
||||
rg = GL_RG,
|
||||
rgb = GL_RGB,
|
||||
rgba = GL_RGBA,
|
||||
|
||||
bgr = GL_BGR,
|
||||
bgra = GL_BGRA,
|
||||
|
||||
stencil8 = GL_STENCIL_INDEX8,
|
||||
depth16 = GL_DEPTH_COMPONENT16,
|
||||
depth32f = GL_DEPTH_COMPONENT32F,
|
||||
@ -1821,7 +1813,7 @@ namespace gl
|
||||
return m_component_layout;
|
||||
}
|
||||
|
||||
void copy_from(const void* src, texture::format format, texture::type type, const coord3u region, const pixel_unpack_settings& pixel_settings)
|
||||
void copy_from(const void* src, texture::format format, texture::type type, int level, const coord3u region, const pixel_unpack_settings& pixel_settings)
|
||||
{
|
||||
pixel_settings.apply();
|
||||
|
||||
@ -1829,25 +1821,25 @@ namespace gl
|
||||
{
|
||||
case GL_TEXTURE_1D:
|
||||
{
|
||||
DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, 0, region.x, region.width, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, level, region.x, region.width, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
break;
|
||||
}
|
||||
case GL_TEXTURE_2D:
|
||||
{
|
||||
DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, 0, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, level, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
break;
|
||||
}
|
||||
case GL_TEXTURE_3D:
|
||||
case GL_TEXTURE_2D_ARRAY:
|
||||
{
|
||||
DSA_CALL(TextureSubImage3D, m_id, target_, 0, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
DSA_CALL(TextureSubImage3D, m_id, target_, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
break;
|
||||
}
|
||||
case GL_TEXTURE_CUBE_MAP:
|
||||
{
|
||||
if (get_driver_caps().ARB_dsa_supported)
|
||||
{
|
||||
glTextureSubImage3D(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
glTextureSubImage3D(m_id, level, region.x, region.y, region.z, region.width, region.height, region.depth, static_cast<GLenum>(format), static_cast<GLenum>(type), src);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1856,7 +1848,7 @@ namespace gl
|
||||
const auto end = std::min(6u, region.z + region.depth);
|
||||
for (unsigned face = region.z; face < end; ++face)
|
||||
{
|
||||
glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), ptr);
|
||||
glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, level, region.x, region.y, region.width, region.height, static_cast<GLenum>(format), static_cast<GLenum>(type), ptr);
|
||||
ptr += (region.width * region.height * 4); //TODO
|
||||
}
|
||||
}
|
||||
@ -1868,7 +1860,7 @@ namespace gl
|
||||
void copy_from(const void* src, texture::format format, texture::type type, const pixel_unpack_settings& pixel_settings)
|
||||
{
|
||||
const coord3u region = { {}, size3D() };
|
||||
copy_from(src, format, type, region, pixel_settings);
|
||||
copy_from(src, format, type, 0, region, pixel_settings);
|
||||
}
|
||||
|
||||
void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length)
|
||||
@ -1884,7 +1876,7 @@ namespace gl
|
||||
copy_from(*view.value(), view.format(), view.offset(), view.range());
|
||||
}
|
||||
|
||||
void copy_to(void* dst, texture::format format, texture::type type, const coord3u& region, const pixel_pack_settings& pixel_settings) const
|
||||
void copy_to(void* dst, texture::format format, texture::type type, int level, const coord3u& region, const pixel_pack_settings& pixel_settings) const
|
||||
{
|
||||
pixel_settings.apply();
|
||||
const auto& caps = get_driver_caps();
|
||||
@ -1893,13 +1885,13 @@ namespace gl
|
||||
region.width == m_width && region.height == m_height && region.depth == m_depth)
|
||||
{
|
||||
if (caps.ARB_dsa_supported)
|
||||
glGetTextureImage(m_id, 0, static_cast<GLenum>(format), static_cast<GLenum>(type), INT32_MAX, dst);
|
||||
glGetTextureImage(m_id, level, static_cast<GLenum>(format), static_cast<GLenum>(type), INT32_MAX, dst);
|
||||
else
|
||||
glGetTextureImageEXT(m_id, static_cast<GLenum>(m_target), 0, static_cast<GLenum>(format), static_cast<GLenum>(type), dst);
|
||||
glGetTextureImageEXT(m_id, static_cast<GLenum>(m_target), level, static_cast<GLenum>(format), static_cast<GLenum>(type), dst);
|
||||
}
|
||||
else if (caps.ARB_dsa_supported)
|
||||
{
|
||||
glGetTextureSubImage(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth,
|
||||
glGetTextureSubImage(m_id, level, region.x, region.y, region.z, region.width, region.height, region.depth,
|
||||
static_cast<GLenum>(format), static_cast<GLenum>(type), INT32_MAX, dst);
|
||||
}
|
||||
else
|
||||
@ -1907,18 +1899,18 @@ namespace gl
|
||||
// Worst case scenario. For some reason, EXT_dsa does not have glGetTextureSubImage
|
||||
const auto target_ = static_cast<GLenum>(m_target);
|
||||
texture tmp{ target_, region.width, region.height, region.depth, 1, static_cast<GLenum>(m_internal_format) };
|
||||
glCopyImageSubData(m_id, target_, 0, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0,
|
||||
glCopyImageSubData(m_id, target_, level, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0,
|
||||
region.width, region.height, region.depth);
|
||||
|
||||
const coord3u region2 = { {0, 0, 0}, region.size };
|
||||
tmp.copy_to(dst, format, type, region2, pixel_settings);
|
||||
tmp.copy_to(dst, format, type, 0, region2, pixel_settings);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_to(void* dst, texture::format format, texture::type type, const pixel_pack_settings& pixel_settings) const
|
||||
{
|
||||
const coord3u region = { {}, size3D() };
|
||||
copy_to(dst, format, type, region, pixel_settings);
|
||||
copy_to(dst, format, type, 0, region, pixel_settings);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -73,7 +73,7 @@ depth_format rsx::internals::surface_depth_format_to_gl(rsx::surface_depth_forma
|
||||
case rsx::surface_depth_format2::z16_uint:
|
||||
return{ ::gl::texture::type::ushort, ::gl::texture::format::depth, ::gl::texture::internal_format::depth16 };
|
||||
case rsx::surface_depth_format2::z16_float:
|
||||
return{ ::gl::texture::type::f16, ::gl::texture::format::depth, ::gl::texture::internal_format::depth32f };
|
||||
return{ ::gl::texture::type::f32, ::gl::texture::format::depth, ::gl::texture::internal_format::depth32f };
|
||||
|
||||
case rsx::surface_depth_format2::z24s8_uint:
|
||||
if (g_cfg.video.force_high_precision_z_buffer && ::gl::get_driver_caps().ARB_depth_buffer_float_supported)
|
||||
@ -81,8 +81,7 @@ depth_format rsx::internals::surface_depth_format_to_gl(rsx::surface_depth_forma
|
||||
else
|
||||
return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth24_stencil8 };
|
||||
case rsx::surface_depth_format2::z24s8_float:
|
||||
// TODO, requires separate aspect transfer for reading
|
||||
return{ ::gl::texture::type::uint_24_8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth32f_stencil8 };
|
||||
return{ ::gl::texture::type::float32_uint8, ::gl::texture::format::depth_stencil, ::gl::texture::internal_format::depth32f_stencil8 };
|
||||
|
||||
default:
|
||||
fmt::throw_exception("Unsupported depth format 0x%x" HERE, static_cast<u32>(depth_format));
|
||||
@ -468,14 +467,12 @@ void gl::render_target::load_memory(gl::command_context& cmd)
|
||||
// TODO: MSAA support
|
||||
if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]]
|
||||
{
|
||||
gl::upload_texture(id(), gcm_format, surface_width, surface_height, 1, 1,
|
||||
false, rsx::texture_dimension_extended::texture_dimension_2d, { subres });
|
||||
gl::upload_texture(this, gcm_format, false, { subres });
|
||||
}
|
||||
else
|
||||
{
|
||||
auto tmp = std::make_unique<gl::texture>(GL_TEXTURE_2D, subres.width_in_block, subres.height_in_block, 1, 1, static_cast<GLenum>(get_internal_format()));
|
||||
gl::upload_texture(tmp->id(), gcm_format, surface_width, surface_height, 1, 1,
|
||||
false, rsx::texture_dimension_extended::texture_dimension_2d, { subres });
|
||||
gl::upload_texture(tmp.get(), gcm_format, false, { subres });
|
||||
|
||||
gl::g_hw_blitter->scale_image(cmd, tmp.get(), this,
|
||||
{ 0, 0, subres.width_in_block, subres.height_in_block },
|
||||
|
@ -454,6 +454,161 @@ namespace gl
|
||||
fmt::throw_exception("Unknown format 0x%x" HERE, texture_format);
|
||||
}
|
||||
|
||||
cs_shuffle_base* get_trivial_transform_job(const pixel_buffer_layout& pack_info)
|
||||
{
|
||||
if (!pack_info.swap_bytes)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
switch (pack_info.size)
|
||||
{
|
||||
case 1:
|
||||
return nullptr;
|
||||
case 2:
|
||||
return get_compute_task<gl::cs_shuffle_16>();
|
||||
break;
|
||||
case 4:
|
||||
return get_compute_task<gl::cs_shuffle_32>();
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported format");
|
||||
}
|
||||
}
|
||||
|
||||
void* copy_image_to_buffer(const pixel_buffer_layout& pack_info, const gl::texture* src, gl::buffer* dst,
|
||||
const int src_level, const coord3u& src_region, image_memory_requirements* mem_info)
|
||||
{
|
||||
auto initialize_scratch_mem = [&]()
|
||||
{
|
||||
const u64 max_mem = (mem_info->memory_required) ? mem_info->memory_required : mem_info->image_size_in_bytes;
|
||||
if (!(*dst) || max_mem > static_cast<u64>(dst->size()))
|
||||
{
|
||||
if (*dst) dst->remove();
|
||||
dst->create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
}
|
||||
|
||||
dst->bind(buffer::target::pixel_pack);
|
||||
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_level, src_region, {});
|
||||
};
|
||||
|
||||
void* result = nullptr;
|
||||
if (src->aspect() == image_aspect::color ||
|
||||
pack_info.type == GL_UNSIGNED_SHORT ||
|
||||
pack_info.type == GL_UNSIGNED_INT_24_8)
|
||||
{
|
||||
initialize_scratch_mem();
|
||||
if (auto job = get_trivial_transform_job(pack_info))
|
||||
{
|
||||
job->run(dst, static_cast<u32>(mem_info->image_size_in_bytes));
|
||||
}
|
||||
}
|
||||
else if (pack_info.type == GL_FLOAT)
|
||||
{
|
||||
verify(HERE), mem_info->image_size_in_bytes == (mem_info->image_size_in_texels * 4);
|
||||
mem_info->memory_required = (mem_info->image_size_in_texels * 6);
|
||||
initialize_scratch_mem();
|
||||
|
||||
get_compute_task<cs_fconvert_task<f32, f16, false, true>>()->run(dst, 0,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_bytes));
|
||||
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes);
|
||||
}
|
||||
else if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||
{
|
||||
verify(HERE), mem_info->image_size_in_bytes == (mem_info->image_size_in_texels * 8);
|
||||
mem_info->memory_required = (mem_info->image_size_in_texels * 12);
|
||||
initialize_scratch_mem();
|
||||
|
||||
get_compute_task<cs_shuffle_d32fx8_to_x8d24f>()->run(dst, 0,
|
||||
static_cast<u32>(mem_info->image_size_in_bytes), static_cast<u32>(mem_info->image_size_in_texels));
|
||||
result = reinterpret_cast<void*>(mem_info->image_size_in_bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Invalid depth/stencil type 0x%x" HERE, pack_info.type);
|
||||
}
|
||||
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT);
|
||||
return result;
|
||||
}
|
||||
|
||||
void copy_buffer_to_image(const pixel_buffer_layout& unpack_info, gl::buffer* src, gl::texture* dst,
|
||||
const void* src_offset, const int dst_level, const coord3u& dst_region, image_memory_requirements* mem_info)
|
||||
{
|
||||
buffer scratch_mem;
|
||||
buffer* transfer_buf = src;
|
||||
bool skip_barrier = false;
|
||||
u32 in_offset = static_cast<u32>(reinterpret_cast<u64>(src_offset));
|
||||
u32 out_offset = in_offset;
|
||||
|
||||
auto initialize_scratch_mem = [&]()
|
||||
{
|
||||
if (in_offset >= mem_info->memory_required)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 max_mem = mem_info->memory_required + mem_info->image_size_in_bytes;
|
||||
if ((max_mem + in_offset) <= static_cast<u64>(src->size()))
|
||||
{
|
||||
out_offset = static_cast<u32>(in_offset + mem_info->image_size_in_bytes);
|
||||
return;
|
||||
}
|
||||
|
||||
scratch_mem.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
src->copy_to(&scratch_mem, in_offset, 0, mem_info->image_size_in_bytes);
|
||||
|
||||
in_offset = 0;
|
||||
out_offset = static_cast<u32>(mem_info->image_size_in_bytes);
|
||||
transfer_buf = &scratch_mem;
|
||||
};
|
||||
|
||||
if (dst->aspect() == image_aspect::color ||
|
||||
unpack_info.type == GL_UNSIGNED_SHORT ||
|
||||
unpack_info.type == GL_UNSIGNED_INT_24_8)
|
||||
{
|
||||
if (auto job = get_trivial_transform_job(unpack_info))
|
||||
{
|
||||
job->run(src, static_cast<u32>(mem_info->image_size_in_bytes), in_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
skip_barrier = true;
|
||||
}
|
||||
}
|
||||
else if (unpack_info.type == GL_FLOAT)
|
||||
{
|
||||
mem_info->memory_required = (mem_info->image_size_in_texels * 4);
|
||||
initialize_scratch_mem();
|
||||
get_compute_task<cs_fconvert_task<f16, f32, true, false>>()->run(transfer_buf, in_offset, static_cast<u32>(mem_info->image_size_in_bytes), out_offset);
|
||||
}
|
||||
else if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||
{
|
||||
mem_info->memory_required = (mem_info->image_size_in_texels * 8);
|
||||
initialize_scratch_mem();
|
||||
get_compute_task<cs_shuffle_x8d24f_to_d32fx8>()->run(transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Invalid depth/stencil type 0x%x" HERE, unpack_info.type);
|
||||
}
|
||||
|
||||
if (!skip_barrier)
|
||||
{
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
transfer_buf->bind(buffer::target::pixel_unpack);
|
||||
|
||||
dst->copy_from(reinterpret_cast<void*>(u64(out_offset)), static_cast<texture::format>(unpack_info.format),
|
||||
static_cast<texture::type>(unpack_info.type), dst_level, dst_region, {});
|
||||
|
||||
if (scratch_mem) scratch_mem.remove();
|
||||
}
|
||||
|
||||
gl::viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps,
|
||||
rsx::texture_dimension_extended type)
|
||||
{
|
||||
@ -488,8 +643,9 @@ namespace gl
|
||||
return new gl::viewable_image(target, width, height, depth, mipmaps, internal_format, format_class);
|
||||
}
|
||||
|
||||
void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth,
|
||||
const std::vector<rsx::subresource_layout> &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
|
||||
void fill_texture(texture* dst, int format,
|
||||
const std::vector<rsx::subresource_layout> &input_layouts,
|
||||
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
|
||||
{
|
||||
rsx::texture_uploader_capabilities caps{ true, false, false, 4 };
|
||||
|
||||
@ -500,9 +656,11 @@ namespace gl
|
||||
{
|
||||
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
|
||||
|
||||
unpack_settings.row_length(align(width, 4));
|
||||
unpack_settings.row_length(align(dst->width(), 4));
|
||||
unpack_settings.apply();
|
||||
|
||||
glBindTexture(static_cast<GLenum>(dst->get_target()), dst->id());
|
||||
|
||||
const GLsizei format_block_size = (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16;
|
||||
|
||||
for (const rsx::subresource_layout& layout : input_layouts)
|
||||
@ -510,27 +668,27 @@ namespace gl
|
||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
|
||||
const sizei image_size{ align(layout.width_in_texel, 4), align(layout.height_in_texel, 4) };
|
||||
|
||||
switch (dim)
|
||||
switch (dst->get_target())
|
||||
{
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
case texture::target::texture1D:
|
||||
{
|
||||
const GLsizei size = layout.width_in_block * format_block_size;
|
||||
glCompressedTexSubImage1D(GL_TEXTURE_1D, layout.level, 0, image_size.width, gl_format, size, staging_buffer.data());
|
||||
break;
|
||||
}
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
case texture::target::texture2D:
|
||||
{
|
||||
const GLsizei size = layout.width_in_block * layout.height_in_block * format_block_size;
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_2D, layout.level, 0, 0, image_size.width, image_size.height, gl_format, size, staging_buffer.data());
|
||||
break;
|
||||
}
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
case texture::target::textureCUBE:
|
||||
{
|
||||
const GLsizei size = layout.width_in_block * layout.height_in_block * format_block_size;
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + layout.layer, layout.level, 0, 0, image_size.width, image_size.height, gl_format, size, staging_buffer.data());
|
||||
break;
|
||||
}
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
case texture::target::texture3D:
|
||||
{
|
||||
const GLsizei size = layout.width_in_block * layout.height_in_block * layout.depth * format_block_size;
|
||||
glCompressedTexSubImage3D(GL_TEXTURE_3D, layout.level, 0, 0, 0, image_size.width, image_size.height, layout.depth, gl_format, size, staging_buffer.data());
|
||||
@ -547,9 +705,11 @@ namespace gl
|
||||
else
|
||||
{
|
||||
bool apply_settings = true;
|
||||
bool use_compute_transform = false;
|
||||
buffer upload_scratch_mem, compute_scratch_mem;
|
||||
image_memory_requirements mem_info;
|
||||
pixel_buffer_layout mem_layout;
|
||||
|
||||
cs_shuffle_base* pixel_transform = nullptr;
|
||||
gsl::span<gsl::byte> dst_buffer = staging_buffer;
|
||||
void* out_pointer = staging_buffer.data();
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
@ -569,90 +729,72 @@ namespace gl
|
||||
apply_settings = (gl_format == GL_RED);
|
||||
caps.supports_byteswap = apply_settings;
|
||||
break;
|
||||
case GL_UNSIGNED_INT_24_8:
|
||||
if (gl::get_driver_caps().ARB_compute_shader_supported)
|
||||
{
|
||||
apply_settings = false;
|
||||
pixel_transform = gl::get_compute_task<cs_shuffle_x8d24_to_d24x8<true>>();
|
||||
}
|
||||
break;
|
||||
case GL_FLOAT:
|
||||
// TODO: Expand depth16f to depth32f
|
||||
gl_type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case GL_UNSIGNED_INT_24_8:
|
||||
case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
|
||||
// TODO: Expand depth24 to depth32f
|
||||
gl_type = GL_UNSIGNED_INT_24_8;
|
||||
break;
|
||||
default:
|
||||
mem_layout.format = gl_format;
|
||||
mem_layout.type = gl_type;
|
||||
mem_layout.swap_bytes = true;
|
||||
mem_layout.size = 4;
|
||||
use_compute_transform = true;
|
||||
apply_settings = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!apply_settings)
|
||||
{
|
||||
unpack_settings.apply();
|
||||
}
|
||||
|
||||
if (pixel_transform)
|
||||
if (use_compute_transform)
|
||||
{
|
||||
upload_scratch_mem.create(staging_buffer.size(), nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
|
||||
compute_scratch_mem.create(staging_buffer.size(), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
compute_scratch_mem.create(std::max<GLsizeiptr>(512, staging_buffer.size() * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
out_pointer = nullptr;
|
||||
}
|
||||
|
||||
for (const rsx::subresource_layout& layout : input_layouts)
|
||||
{
|
||||
if (pixel_transform)
|
||||
if (use_compute_transform)
|
||||
{
|
||||
const u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment);
|
||||
const u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
|
||||
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
|
||||
dst_buffer = { reinterpret_cast<gsl::byte*>(upload_scratch_mem.map(buffer::access::write)), image_linear_size };
|
||||
}
|
||||
|
||||
auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps);
|
||||
|
||||
if (pixel_transform)
|
||||
// Define upload region
|
||||
coord3u region;
|
||||
region.x = 0;
|
||||
region.y = 0;
|
||||
region.z = layout.layer;
|
||||
region.width = layout.width_in_texel;
|
||||
region.height = layout.height_in_texel;
|
||||
region.depth = layout.depth;
|
||||
|
||||
if (use_compute_transform)
|
||||
{
|
||||
// 1. Unmap buffer
|
||||
upload_scratch_mem.unmap();
|
||||
|
||||
// 2. Execute compute job
|
||||
// 2. Upload memory to GPU
|
||||
upload_scratch_mem.copy_to(&compute_scratch_mem, 0, 0, image_linear_size);
|
||||
pixel_transform->run(&compute_scratch_mem, image_linear_size);
|
||||
|
||||
// 3. Bind compute buffer as pixel unpack buffer
|
||||
glMemoryBarrier(GL_PIXEL_UNPACK_BUFFER);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
compute_scratch_mem.bind(buffer::target::pixel_unpack);
|
||||
// 3. Dispatch compute routines
|
||||
mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes;
|
||||
mem_info.image_size_in_bytes = image_linear_size;
|
||||
mem_info.memory_required = 0;
|
||||
copy_buffer_to_image(mem_layout, &compute_scratch_mem, dst, nullptr, layout.level, region, & mem_info);
|
||||
}
|
||||
else if (apply_settings)
|
||||
else
|
||||
{
|
||||
unpack_settings.swap_bytes(op.require_swap);
|
||||
unpack_settings.apply();
|
||||
apply_settings = false;
|
||||
}
|
||||
if (apply_settings)
|
||||
{
|
||||
unpack_settings.swap_bytes(op.require_swap);
|
||||
apply_settings = false;
|
||||
}
|
||||
|
||||
switch (dim)
|
||||
{
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
glTexSubImage1D(GL_TEXTURE_1D, layout.level, 0, layout.width_in_texel, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
glTexSubImage2D(GL_TEXTURE_2D, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + layout.layer, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
glTexSubImage3D(GL_TEXTURE_3D, layout.layer, 0, 0, 0, layout.width_in_texel, layout.height_in_texel, depth, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
default:
|
||||
ASSUME(0);
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
dst->copy_from(out_pointer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
|
||||
}
|
||||
}
|
||||
|
||||
if (pixel_transform)
|
||||
if (use_compute_transform)
|
||||
{
|
||||
upload_scratch_mem.remove();
|
||||
compute_scratch_mem.remove();
|
||||
@ -693,41 +835,18 @@ namespace gl
|
||||
return remap_values;
|
||||
}
|
||||
|
||||
void upload_texture(GLuint id, u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type,
|
||||
const std::vector<rsx::subresource_layout>& subresources_layout)
|
||||
void upload_texture(texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
|
||||
{
|
||||
GLenum target;
|
||||
switch (type)
|
||||
{
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
target = GL_TEXTURE_1D;
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
target = GL_TEXTURE_2D;
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
target = GL_TEXTURE_3D;
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
target = GL_TEXTURE_CUBE_MAP;
|
||||
break;
|
||||
}
|
||||
|
||||
glBindTexture(target, id);
|
||||
glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0);
|
||||
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1);
|
||||
// The rest of sampler state is now handled by sampler state objects
|
||||
|
||||
// Calculate staging buffer size
|
||||
const u32 aligned_pitch = align<u32>(width * rsx::get_format_block_size_in_bytes(gcm_format), 4);
|
||||
size_t texture_data_sz = depth * height * aligned_pitch;
|
||||
const u32 aligned_pitch = align<u32>(dst->pitch(), 4);
|
||||
size_t texture_data_sz = dst->depth() * dst->height() * aligned_pitch;
|
||||
std::vector<std::byte> data_upload_buf(texture_data_sz);
|
||||
|
||||
// TODO: GL drivers support byteswapping and this should be used instead of doing so manually
|
||||
const auto format_type = get_format_type(gcm_format);
|
||||
const GLenum gl_format = std::get<0>(format_type);
|
||||
const GLenum gl_type = std::get<1>(format_type);
|
||||
fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
|
||||
fill_texture(dst, gcm_format, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
|
||||
}
|
||||
|
||||
u32 get_format_texel_width(GLenum format)
|
||||
@ -821,111 +940,12 @@ namespace gl
|
||||
return false;
|
||||
}
|
||||
|
||||
cs_shuffle_base* get_trivial_transform_job(const pixel_buffer_layout& pack_info)
|
||||
{
|
||||
if (!pack_info.swap_bytes)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
switch (pack_info.size)
|
||||
{
|
||||
case 1:
|
||||
return nullptr;
|
||||
case 2:
|
||||
return gl::get_compute_task<gl::cs_shuffle_16>();
|
||||
break;
|
||||
case 4:
|
||||
return gl::get_compute_task<gl::cs_shuffle_32>();
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported format");
|
||||
}
|
||||
}
|
||||
|
||||
cs_shuffle_base* get_image_to_buffer_job(const pixel_buffer_layout& pack_info, u32 aspect_mask)
|
||||
{
|
||||
switch (aspect_mask)
|
||||
{
|
||||
case image_aspect::color:
|
||||
{
|
||||
return get_trivial_transform_job(pack_info);
|
||||
}
|
||||
case image_aspect::depth:
|
||||
{
|
||||
if (pack_info.type == GL_FLOAT)
|
||||
{
|
||||
// TODO: D16F
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return get_trivial_transform_job(pack_info);
|
||||
}
|
||||
case image_aspect::depth | image_aspect::stencil:
|
||||
{
|
||||
verify(HERE), pack_info.swap_bytes;
|
||||
if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||
{
|
||||
// TODO: D24FX8
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
||||
}
|
||||
default:
|
||||
{
|
||||
fmt::throw_exception("Invalid aspect mask 0x%x" HERE, aspect_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cs_shuffle_base* get_buffer_to_image_job(const pixel_buffer_layout& unpack_info, u32 aspect_mask)
|
||||
{
|
||||
switch (aspect_mask)
|
||||
{
|
||||
case image_aspect::color:
|
||||
{
|
||||
return get_trivial_transform_job(unpack_info);
|
||||
}
|
||||
case image_aspect::depth:
|
||||
{
|
||||
if (unpack_info.type == GL_FLOAT)
|
||||
{
|
||||
// TODO: D16F
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return get_trivial_transform_job(unpack_info);
|
||||
}
|
||||
case image_aspect::depth | image_aspect::stencil:
|
||||
{
|
||||
verify(HERE), unpack_info.swap_bytes;
|
||||
if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||
{
|
||||
// TODO: D24FX8
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return gl::get_compute_task<gl::cs_shuffle_x8d24_to_d24x8<true>>();
|
||||
}
|
||||
default:
|
||||
{
|
||||
fmt::throw_exception("Invalid aspect mask 0x%x" HERE, aspect_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_typeless(texture * dst, const texture * src, const coord3u& dst_region, const coord3u& src_region)
|
||||
{
|
||||
const u32 src_mem = src->pitch() * src_region.height;
|
||||
const u32 dst_mem = dst->pitch() * dst_region.height;
|
||||
|
||||
auto max_mem = std::max(src_mem, dst_mem);
|
||||
if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size())
|
||||
{
|
||||
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
|
||||
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
}
|
||||
const auto src_bpp = src->pitch() / src->width();
|
||||
const auto dst_bpp = dst->pitch() / dst->width();
|
||||
image_memory_requirements src_mem = { src_region.width * src_region.height, src_region.width * src_bpp * src_region.height, 0ull };
|
||||
image_memory_requirements dst_mem = { dst_region.width * dst_region.height, dst_region.width * dst_bpp * dst_region.height, 0ull };
|
||||
|
||||
const auto& caps = gl::get_driver_caps();
|
||||
auto pack_info = get_format_type(src);
|
||||
@ -954,54 +974,31 @@ namespace gl
|
||||
}
|
||||
|
||||
// Start pack operation
|
||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
|
||||
|
||||
void* transfer_offset = nullptr;
|
||||
if (caps.ARB_compute_shader_supported) [[likely]]
|
||||
{
|
||||
// Raw copy
|
||||
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_region, {});
|
||||
}
|
||||
else
|
||||
{
|
||||
pixel_pack_settings pack_settings{};
|
||||
pack_settings.swap_bytes(pack_info.swap_bytes);
|
||||
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_region, pack_settings);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
// Start unpack operation
|
||||
pixel_unpack_settings unpack_settings{};
|
||||
|
||||
if (caps.ARB_compute_shader_supported) [[likely]]
|
||||
{
|
||||
auto src_transform = get_image_to_buffer_job(pack_info, src->aspect());
|
||||
auto dst_transform = get_buffer_to_image_job(unpack_info, dst->aspect());
|
||||
|
||||
if (src->aspect() == gl::image_aspect::color && dst->aspect() == gl::image_aspect::color)
|
||||
// Apply transformation
|
||||
bool skip_transform = false;
|
||||
if ((src->aspect() | dst->aspect()) == gl::image_aspect::color)
|
||||
{
|
||||
if (src_transform == dst_transform)
|
||||
{
|
||||
src_transform = dst_transform = nullptr;
|
||||
}
|
||||
else if (src_transform && dst_transform)
|
||||
{
|
||||
src_transform = gl::get_compute_task<cs_shuffle_32_16>();
|
||||
dst_transform = nullptr;
|
||||
}
|
||||
skip_transform = (pack_info.format == unpack_info.format &&
|
||||
pack_info.type == unpack_info.type &&
|
||||
pack_info.swap_bytes == unpack_info.swap_bytes &&
|
||||
pack_info.size == unpack_info.size);
|
||||
}
|
||||
|
||||
const auto job_length = std::min(src_mem, dst_mem);
|
||||
if (src_transform)
|
||||
if (skip_transform) [[likely]]
|
||||
{
|
||||
src_transform->run(&g_typeless_transfer_buffer, job_length);
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
const bool old_swap_bytes = pack_info.swap_bytes;
|
||||
pack_info.swap_bytes = false;
|
||||
|
||||
if (dst_transform)
|
||||
copy_image_to_buffer(pack_info, src, &g_typeless_transfer_buffer, 0, src_region, &src_mem);
|
||||
pack_info.swap_bytes = old_swap_bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_transform->run(&g_typeless_transfer_buffer, job_length);
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
|
||||
void* data_ptr = copy_image_to_buffer(pack_info, src, &g_typeless_transfer_buffer, 0, src_region, &src_mem);
|
||||
copy_buffer_to_image(unpack_info, &g_typeless_transfer_buffer, dst, data_ptr, 0, dst_region, &dst_mem);
|
||||
}
|
||||
|
||||
// NOTE: glBindBufferRange also binds the buffer to the old-school target.
|
||||
@ -1009,12 +1006,33 @@ namespace gl
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
}
|
||||
else
|
||||
{
|
||||
const u64 max_mem = std::max(src_mem.image_size_in_bytes, dst_mem.image_size_in_bytes);
|
||||
if (!g_typeless_transfer_buffer || max_mem > static_cast<u64>(g_typeless_transfer_buffer.size()))
|
||||
{
|
||||
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
|
||||
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
}
|
||||
|
||||
pixel_pack_settings pack_settings{};
|
||||
pack_settings.swap_bytes(pack_info.swap_bytes);
|
||||
|
||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
|
||||
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), 0, src_region, pack_settings);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
// Start unpack operation
|
||||
pixel_unpack_settings unpack_settings{};
|
||||
|
||||
if (!caps.ARB_compute_shader_supported) [[unlikely]]
|
||||
{
|
||||
unpack_settings.swap_bytes(unpack_info.swap_bytes);
|
||||
}
|
||||
|
||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack);
|
||||
dst->copy_from(nullptr, static_cast<texture::format>(unpack_info.format), static_cast<texture::type>(unpack_info.type), dst_region, unpack_settings);
|
||||
dst->copy_from(transfer_offset, static_cast<texture::format>(unpack_info.format), static_cast<texture::type>(unpack_info.type), 0, dst_region, unpack_settings);
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,13 @@ namespace gl
|
||||
bool swap_bytes;
|
||||
};
|
||||
|
||||
struct image_memory_requirements
|
||||
{
|
||||
u64 image_size_in_texels;
|
||||
u64 image_size_in_bytes;
|
||||
u64 memory_required;
|
||||
};
|
||||
|
||||
GLenum get_target(rsx::texture_dimension_extended type);
|
||||
GLenum get_sized_internal_format(u32 texture_format);
|
||||
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format);
|
||||
@ -35,16 +42,13 @@ namespace gl
|
||||
void copy_typeless(texture* dst, const texture* src, const coord3u& dst_region, const coord3u& src_region);
|
||||
void copy_typeless(texture* dst, const texture* src);
|
||||
|
||||
/**
|
||||
* is_swizzled - determines whether input bytes are in morton order
|
||||
* subresources_layout - descriptor of the mipmap levels in memory
|
||||
* decoded_remap - two vectors, first one contains index to read, e.g if v[0] = 1 then component 0[A] in the texture should read as component 1[R]
|
||||
* - layout of vector is in A-R-G-B
|
||||
* - second vector contains overrides to force the value to either 0 or 1 instead of reading from texture
|
||||
* static_state - set up the texture without consideration for sampler state (useful for vertex textures which have no real sampler state on RSX)
|
||||
*/
|
||||
void upload_texture(GLuint id, u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, bool is_swizzled, rsx::texture_dimension_extended type,
|
||||
const std::vector<rsx::subresource_layout>& subresources_layout);
|
||||
void* copy_image_to_buffer(const pixel_buffer_layout& pack_info, const gl::texture* src, gl::buffer* dst,
|
||||
const int src_level, const coord3u& src_region, image_memory_requirements* mem_info);
|
||||
|
||||
void copy_buffer_to_image(const pixel_buffer_layout& unpack_info, gl::buffer* src, gl::texture* dst,
|
||||
const void* src_offset, const int dst_level, const coord3u& dst_region, image_memory_requirements* mem_info);
|
||||
|
||||
void upload_texture(texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout);
|
||||
|
||||
class sampler_state
|
||||
{
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include "GLRenderTargets.h"
|
||||
#include "GLOverlays.h"
|
||||
#include "GLTexture.h"
|
||||
#include "GLCompute.h"
|
||||
#include "../Common/TextureUtils.h"
|
||||
#include "../Common/texture_cache.h"
|
||||
|
||||
@ -163,38 +162,39 @@ namespace gl
|
||||
pack_unpack_swap_bytes = format_info.swap_bytes;
|
||||
}
|
||||
|
||||
real_pitch = src->pitch();
|
||||
rsx_pitch = pitch;
|
||||
|
||||
bool use_driver_pixel_transform = true;
|
||||
if (get_driver_caps().ARB_compute_shader_supported) [[likely]]
|
||||
{
|
||||
if (src->aspect() & image_aspect::stencil)
|
||||
if (src->aspect() & image_aspect::depth)
|
||||
{
|
||||
buffer scratch_mem;
|
||||
scratch_mem.create(buffer::target::pixel_pack, pbo.size(), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
scratch_mem.bind();
|
||||
|
||||
pixel_pack_settings pack_settings;
|
||||
pack_settings.alignment(1);
|
||||
src->copy_to(nullptr, format, type, pack_settings);
|
||||
|
||||
// Invoke compute
|
||||
if (auto error = glGetError(); !error) [[likely]]
|
||||
{
|
||||
cs_shuffle_base * job;
|
||||
if (pack_unpack_swap_bytes)
|
||||
{
|
||||
job = get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
||||
}
|
||||
else
|
||||
{
|
||||
job = get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<false>>();
|
||||
}
|
||||
pixel_buffer_layout pack_info{};
|
||||
image_memory_requirements mem_info{};
|
||||
|
||||
const auto job_length = src->pitch() * src->height();
|
||||
job->run(&scratch_mem, job_length);
|
||||
pack_info.format = static_cast<GLenum>(format);
|
||||
pack_info.type = static_cast<GLenum>(type);
|
||||
pack_info.size = (src->aspect() & image_aspect::stencil) ? 4 : 2;
|
||||
pack_info.swap_bytes = true;
|
||||
|
||||
mem_info.image_size_in_texels = src->width() * src->height();
|
||||
mem_info.image_size_in_bytes = src->pitch() * src->height();
|
||||
mem_info.memory_required = 0;
|
||||
|
||||
void* out_offset = copy_image_to_buffer(pack_info, src, &scratch_mem, 0, { {}, src->size3D() }, &mem_info);
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
scratch_mem.copy_to(&pbo, 0, 0, job_length);
|
||||
|
||||
real_pitch = pack_info.size * src->width();
|
||||
const u64 data_length = pack_info.size * mem_info.image_size_in_texels;
|
||||
scratch_mem.copy_to(&pbo, reinterpret_cast<u64>(out_offset), 0, data_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -222,9 +222,6 @@ namespace gl
|
||||
src->copy_to(nullptr, format, type, pack_settings);
|
||||
}
|
||||
|
||||
real_pitch = src->pitch();
|
||||
rsx_pitch = pitch;
|
||||
|
||||
if (auto error = glGetError())
|
||||
{
|
||||
if (error == GL_OUT_OF_MEMORY && ::gl::get_driver_caps().vendor_AMD)
|
||||
@ -561,7 +558,7 @@ namespace gl
|
||||
sized_internal_fmt = gl::get_sized_internal_format(gcm_format);
|
||||
}
|
||||
|
||||
std::unique_ptr<gl::texture> dst = std::make_unique<gl::viewable_image>(dst_type, width, height, depth, mipmaps, sized_internal_fmt);
|
||||
std::unique_ptr<gl::texture> dst = std::make_unique<gl::viewable_image>(dst_type, width, height, depth, mipmaps, sized_internal_fmt, rsx::classify_format(gcm_format));
|
||||
|
||||
if (copy)
|
||||
{
|
||||
@ -939,8 +936,7 @@ namespace gl
|
||||
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, input_swizzled,
|
||||
rsx::texture_create_flags::default_component_order);
|
||||
|
||||
gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps,
|
||||
input_swizzled, type, subresource_layout);
|
||||
gl::upload_texture(section->get_raw_texture(), gcm_format, input_swizzled, subresource_layout);
|
||||
|
||||
section->last_write_tag = rsx::get_shared_tag();
|
||||
return section;
|
||||
|
Loading…
Reference in New Issue
Block a user