1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

rsx: Texture cache refactor

- gl: Include an execution state wrapper to ensure state changes are consistent. Also removes a lot of required 'cleanup' for helper methods
- texture_cache: Make execition context a mandatory field as it is required for all operations. Also removes a lot of situations where duplicate argument is added in for both fixed and vararg fields
- Explicit read/write barrier for framebuffer resources depending on
  usage. Allows for operations like optional memory initialization before
  reading
This commit is contained in:
kd-11 2018-12-29 16:28:12 +03:00 committed by kd-11
parent 0f64583c7a
commit 2a62fa892b
17 changed files with 494 additions and 151 deletions

View File

@ -389,7 +389,7 @@ namespace rsx
*/
private:
template <typename ...Args>
void flush_set(thrashed_set& data, Args&&... extras)
void flush_set(commandbuffer_type& cmd, thrashed_set& data, Args&&... extras)
{
AUDIT(!data.flushed);
@ -411,11 +411,11 @@ namespace rsx
const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp;
if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp())
{
surface->copy_texture(true, std::forward<Args>(extras)...);
surface->copy_texture(cmd, true, std::forward<Args>(extras)...);
}
}
surface->flush(std::forward<Args>(extras)...);
surface->flush(cmd, std::forward<Args>(extras)...);
// Exclude this region when flushing other sections that should not trample it
// If we overlap an excluded RO, set it as dirty
@ -676,7 +676,7 @@ namespace rsx
//Invalidate range base implementation
template <typename ...Args>
thrashed_set invalidate_range_impl_base(const address_range &fault_range_in, invalidation_cause cause, Args&&... extras)
thrashed_set invalidate_range_impl_base(commandbuffer_type& cmd, const address_range &fault_range_in, invalidation_cause cause, Args&&... extras)
{
#ifdef TEXTURE_CACHE_DEBUG
// Check that the cache has the correct protections
@ -840,7 +840,7 @@ namespace rsx
// or there is nothing to flush but we have something to unprotect
if (has_flushables && !cause.skip_flush())
{
flush_set(result, std::forward<Args>(extras)...);
flush_set(cmd, result, std::forward<Args>(extras)...);
}
unprotect_set(result);
@ -1113,7 +1113,7 @@ namespace rsx
}
template <typename ...FlushArgs, typename ...Args>
void lock_memory_region(image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple<FlushArgs...>&& flush_extras, Args&&... extras)
void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple<FlushArgs...>&& flush_extras, Args&&... extras)
{
AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled
@ -1134,7 +1134,7 @@ namespace rsx
{
// Invalidate sections from surface cache occupying same address range
std::apply(&texture_cache::invalidate_range_impl_base<FlushArgs...>, std::tuple_cat(
std::make_tuple(this, rsx_range, invalidation_cause::superseded_by_fbo),
std::forward_as_tuple(this, cmd, rsx_range, invalidation_cause::superseded_by_fbo),
std::forward<std::tuple<FlushArgs...> >(flush_extras)
));
}
@ -1261,7 +1261,7 @@ namespace rsx
}
template <typename ...Args>
thrashed_set invalidate_address(u32 address, invalidation_cause cause, Args&&... extras)
thrashed_set invalidate_address(commandbuffer_type& cmd, u32 address, invalidation_cause cause, Args&&... extras)
{
//Test before trying to acquire the lock
const auto range = page_for(address);
@ -1269,22 +1269,22 @@ namespace rsx
return{};
std::lock_guard lock(m_cache_mutex);
return invalidate_range_impl_base(range, cause, std::forward<Args>(extras)...);
return invalidate_range_impl_base(cmd, range, cause, std::forward<Args>(extras)...);
}
template <typename ...Args>
thrashed_set invalidate_range(const address_range &range, invalidation_cause cause, Args&&... extras)
thrashed_set invalidate_range(commandbuffer_type& cmd, const address_range &range, invalidation_cause cause, Args&&... extras)
{
//Test before trying to acquire the lock
if (!region_intersects_cache(range, !cause.is_read()))
return {};
std::lock_guard lock(m_cache_mutex);
return invalidate_range_impl_base(range, cause, std::forward<Args>(extras)...);
return invalidate_range_impl_base(cmd, range, cause, std::forward<Args>(extras)...);
}
template <typename ...Args>
bool flush_all(thrashed_set& data, Args&&... extras)
bool flush_all(commandbuffer_type& cmd, thrashed_set& data, Args&&... extras)
{
std::lock_guard lock(m_cache_mutex);
@ -1294,7 +1294,7 @@ namespace rsx
if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag)
{
//1. Write memory to cpu side
flush_set(data, std::forward<Args>(extras)...);
flush_set(cmd, data, std::forward<Args>(extras)...);
//2. Release all obsolete sections
unprotect_set(data);
@ -1302,14 +1302,14 @@ namespace rsx
else
{
// The cache contents have changed between the two readings. This means the data held is useless
invalidate_range_impl_base(data.fault_range, data.cause.undefer(), std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, data.fault_range, data.cause.undefer(), std::forward<Args>(extras)...);
}
return true;
}
template <typename ...Args>
bool flush_if_cache_miss_likely(const address_range &range, Args&&... extras)
bool flush_if_cache_miss_likely(commandbuffer_type& cmd, const address_range &range, Args&&... extras)
{
u32 cur_flushes_this_frame = (m_flushes_this_frame + m_speculations_this_frame);
@ -1340,7 +1340,7 @@ namespace rsx
lock.upgrade();
region.copy_texture(false, std::forward<Args>(extras)...);
region.copy_texture(cmd, false, std::forward<Args>(extras)...);
result = true;
cur_flushes_this_frame++;
@ -1466,7 +1466,7 @@ namespace rsx
{
for (auto &section : overlapping)
{
section.surface->memory_barrier(cmd);
section.surface->read_barrier(cmd);
surfaces.push_back
({
@ -1504,7 +1504,7 @@ namespace rsx
u32 internal_height = tex_height;
get_native_dimensions(internal_width, internal_height, texptr);
texptr->memory_barrier(cmd);
texptr->read_barrier(cmd);
if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d &&
extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d)
@ -1597,7 +1597,7 @@ namespace rsx
for (auto &section : overlapping)
{
section.surface->memory_barrier(cmd);
section.surface->read_barrier(cmd);
result.external_subresource_desc.sections_to_copy.push_back
({
@ -1713,7 +1713,7 @@ namespace rsx
else
{
m_rtts.invalidate_surface_address(texaddr, false);
invalidate_address(texaddr, invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_address(cmd, texaddr, invalidation_cause::read, std::forward<Args>(extras)...);
}
}
@ -1729,7 +1729,7 @@ namespace rsx
else
{
m_rtts.invalidate_surface_address(texaddr, true);
invalidate_address(texaddr, invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_address(cmd, texaddr, invalidation_cause::read, std::forward<Args>(extras)...);
}
}
}
@ -1751,7 +1751,7 @@ namespace rsx
if (!rsc.surface->test() && !m_rtts.address_is_bound(rsc.base_address, rsc.is_depth_surface))
{
m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface);
invalidate_address(rsc.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_address(cmd, rsc.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
}
else if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d &&
extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d)
@ -1868,7 +1868,7 @@ namespace rsx
lock.upgrade();
//Invalidate
invalidate_range_impl_base(tex_range, invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, tex_range, invalidation_cause::read, std::forward<Args>(extras)...);
//NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB
return{ upload_image_from_cpu(cmd, tex_range, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format,
@ -1932,14 +1932,14 @@ namespace rsx
if (src_is_render_target && !src_subres.surface->test() && !m_rtts.address_is_bound(src_subres.base_address, src_subres.is_depth_surface))
{
m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface);
invalidate_address(src_subres.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_address(cmd, src_subres.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
src_is_render_target = false;
}
if (dst_is_render_target && !dst_subres.surface->test() && !m_rtts.address_is_bound(dst_subres.base_address, dst_subres.is_depth_surface))
{
m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface);
invalidate_address(dst_subres.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_address(cmd, dst_subres.base_address, invalidation_cause::read, std::forward<Args>(extras)...);
dst_is_render_target = false;
}
@ -2005,8 +2005,8 @@ namespace rsx
const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
lock.upgrade();
invalidate_range_impl_base(address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_range_impl_base(address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward<Args>(extras)...);
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
}
@ -2140,7 +2140,7 @@ namespace rsx
lock.upgrade();
const auto rsx_range = address_range::start_length(src_address, src.pitch * src.slice_h);
invalidate_range_impl_base(rsx_range, invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::read, std::forward<Args>(extras)...);
const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1;
std::vector<rsx_subresource_layout> subresource_layout;
@ -2228,7 +2228,7 @@ namespace rsx
lock.upgrade();
// Invalidate as the memory is not reusable now
invalidate_range_impl_base(cached_dest->get_section_range(), invalidation_cause::write, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, cached_dest->get_section_range(), invalidation_cause::write, std::forward<Args>(extras)...);
AUDIT(!cached_dest->is_locked());
dest_texture = 0;
@ -2282,7 +2282,7 @@ namespace rsx
lock.upgrade();
const auto rsx_range = address_range::start_length(dst.rsx_address, section_length);
invalidate_range_impl_base(rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward<Args>(extras)...);
const u16 pitch_in_block = dst_is_argb8 ? dst.pitch >> 2 : dst.pitch >> 1;
std::vector<rsx_subresource_layout> subresource_layout;
@ -2372,7 +2372,7 @@ namespace rsx
}
typeless_info.analyse();
blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info);
blitter.scale_image(cmd, vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit, typeless_info);
notify_surface_changed(dst.rsx_address);
blit_op_result result = true;

View File

@ -0,0 +1,246 @@
#pragma once
#include "Utilities/types.h"
#include "Utilities/geometry.h"
#include "OpenGL.h"
#include <unordered_map>
namespace gl
{
struct driver_state
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002;
const u32 DEPTH_RANGE_MIN = 0xFFFF0003;
const u32 DEPTH_RANGE_MAX = 0xFFFF0004;
std::unordered_map<GLenum, u32> properties = {};
std::unordered_map<GLenum, std::array<u32, 4>> indexed_properties = {};
bool enable(u32 test, GLenum cap)
{
auto found = properties.find(cap);
if (found != properties.end() && found->second == test)
return !!test;
properties[cap] = test;
if (test)
glEnable(cap);
else
glDisable(cap);
return !!test;
}
bool enablei(u32 test, GLenum cap, u32 index)
{
auto found = indexed_properties.find(cap);
const bool exists = found != indexed_properties.end();
if (!exists)
{
indexed_properties[cap] = {};
indexed_properties[cap][index] = test;
}
else
{
if (found->second[index] == test)
return !!test;
found->second[index] = test;
}
if (test)
glEnablei(cap, index);
else
glDisablei(cap, index);
return !!test;
}
inline bool test_property(GLenum property, u32 test) const
{
auto found = properties.find(property);
if (found == properties.end())
return false;
return (found->second == test);
}
void depth_func(GLenum func)
{
if (!test_property(GL_DEPTH_FUNC, func))
{
glDepthFunc(func);
properties[GL_DEPTH_FUNC] = func;
}
}
void depth_mask(GLboolean mask)
{
if (!test_property(GL_DEPTH_WRITEMASK, mask))
{
glDepthMask(mask);
properties[GL_DEPTH_WRITEMASK] = mask;
}
}
void clear_depth(GLfloat depth)
{
u32 value = (u32&)depth;
if (!test_property(GL_DEPTH_CLEAR_VALUE, value))
{
glClearDepth(depth);
properties[GL_DEPTH_CLEAR_VALUE] = value;
}
}
void stencil_mask(GLuint mask)
{
if (!test_property(GL_STENCIL_WRITEMASK, mask))
{
glStencilMask(mask);
properties[GL_STENCIL_WRITEMASK] = mask;
}
}
void clear_stencil(GLint stencil)
{
u32 value = (u32&)stencil;
if (!test_property(GL_STENCIL_CLEAR_VALUE, value))
{
glClearStencil(stencil);
properties[GL_STENCIL_CLEAR_VALUE] = value;
}
}
void color_mask(u32 mask)
{
if (!test_property(GL_COLOR_WRITEMASK, mask))
{
glColorMask(((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0));
properties[GL_COLOR_WRITEMASK] = mask;
}
}
void color_mask(bool r, bool g, bool b, bool a)
{
u32 mask = 0;
if (r) mask |= 0x10;
if (g) mask |= 0x20;
if (b) mask |= 0x40;
if (a) mask |= 0x80;
color_mask(mask);
}
void clear_color(u8 r, u8 g, u8 b, u8 a)
{
u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24;
if (!test_property(GL_COLOR_CLEAR_VALUE, value))
{
glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f);
properties[GL_COLOR_CLEAR_VALUE] = value;
}
}
void clear_color(const color4f& color)
{
clear_color(u8(color.r * 255), u8(color.g * 255), u8(color.b * 255), u8(color.a * 255));
}
void depth_bounds(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max))
{
glDepthBoundsEXT(min, max);
properties[DEPTH_BOUNDS_MIN] = depth_min;
properties[DEPTH_BOUNDS_MAX] = depth_max;
}
}
void depth_range(float min, float max)
{
u32 depth_min = (u32&)min;
u32 depth_max = (u32&)max;
if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max))
{
glDepthRange(min, max);
properties[DEPTH_RANGE_MIN] = depth_min;
properties[DEPTH_RANGE_MAX] = depth_max;
}
}
void logic_op(GLenum op)
{
if (!test_property(GL_COLOR_LOGIC_OP, op))
{
glLogicOp(op);
properties[GL_COLOR_LOGIC_OP] = op;
}
}
void line_width(GLfloat width)
{
u32 value = (u32&)width;
if (!test_property(GL_LINE_WIDTH, value))
{
glLineWidth(width);
properties[GL_LINE_WIDTH] = value;
}
}
void front_face(GLenum face)
{
if (!test_property(GL_FRONT_FACE, face))
{
glFrontFace(face);
properties[GL_FRONT_FACE] = face;
}
}
void cull_face(GLenum mode)
{
if (!test_property(GL_CULL_FACE_MODE, mode))
{
glCullFace(mode);
properties[GL_CULL_FACE_MODE] = mode;
}
}
void polygon_offset(float factor, float units)
{
u32 _units = (u32&)units;
u32 _factor = (u32&)factor;
if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor))
{
glPolygonOffset(factor, units);
properties[GL_POLYGON_OFFSET_UNITS] = _units;
properties[GL_POLYGON_OFFSET_FACTOR] = _factor;
}
}
};
struct command_context
{
driver_state* drv;
command_context()
: drv(nullptr)
{}
command_context(driver_state& drv_)
: drv(&drv_)
{}
};
}

View File

@ -210,6 +210,7 @@ void GLGSRender::end()
}
};
gl::command_context cmd{ gl_state };
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
// Handle special memory barrier for ARGB8->D24S8 in an active DSV
@ -227,7 +228,6 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
std::lock_guard lock(m_sampler_mutex);
void* unused = nullptr;
bool update_framebuffer_sourced = false;
if (surface_store_tag != m_rtts.cache_tag)
@ -248,7 +248,7 @@ void GLGSRender::end()
if (rsx::method_registers.fragment_textures[i].enabled())
{
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.fragment_textures[i], m_rtts);
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.fragment_textures[i], m_rtts);
if (m_textures_dirty[i])
m_fs_sampler_states[i].apply(rsx::method_registers.fragment_textures[i], fs_sampler_state[i].get());
@ -274,7 +274,7 @@ void GLGSRender::end()
if (rsx::method_registers.vertex_textures[i].enabled())
{
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.vertex_textures[i], m_rtts);
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts);
if (m_vertex_textures_dirty[i])
m_vs_sampler_states[i].apply(rsx::method_registers.vertex_textures[i], vs_sampler_state[i].get());
@ -313,7 +313,6 @@ void GLGSRender::end()
//Bind textures and resolve external copy operations
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
void *unused = nullptr;
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
@ -329,7 +328,7 @@ void GLGSRender::end()
if (!view && sampler_state->external_subresource_desc.external_handle)
{
view = m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc);
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
}
}
@ -373,8 +372,7 @@ void GLGSRender::end()
}
else if (sampler_state->external_subresource_desc.external_handle)
{
void *unused = nullptr;
m_gl_texture_cache.create_temporary_subresource(unused, sampler_state->external_subresource_desc)->bind();
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
}
else
{
@ -393,13 +391,13 @@ void GLGSRender::end()
{
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
if (ds) ds->memory_barrier();
if (ds) ds->write_barrier(cmd);
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
surface->memory_barrier();
surface->write_barrier(cmd);
}
}
}
@ -1140,7 +1138,7 @@ void GLGSRender::clear_surface(u32 arg)
{
u8 clear_stencil = rsx::method_registers.stencil_clear_value();
gl_state.stencil_mask(rsx::method_registers.stencil_mask());
gl_state.stencil_mask(0xFF);
gl_state.clear_stencil(clear_stencil);
mask |= GLenum(gl::buffers::stencil);
@ -1807,7 +1805,9 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
const rsx::invalidation_cause cause =
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
auto result = m_gl_texture_cache.invalidate_address(address, cause);
gl::command_context null_cmd;
auto result = m_gl_texture_cache.invalidate_address(null_cmd, address, cause);
if (!result.violation_handled)
return false;
@ -1831,7 +1831,8 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range)
{
//Discard all memory in that range without bothering with writeback (Force it for strict?)
auto data = std::move(m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap));
gl::command_context cmd{ gl_state };
auto data = std::move(m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::unmap));
AUDIT(data.empty());
if (data.violation_handled)
@ -1856,7 +1857,8 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
{
if (q.processed) continue;
q.result = m_gl_texture_cache.flush_all(q.section_data);
gl::command_context cmd{ gl_state };
q.result = m_gl_texture_cache.flush_all(cmd, q.section_data);
q.processed = true;
}
}
@ -1902,7 +1904,8 @@ work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrash
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
if (m_gl_texture_cache.blit(src, dst, interpolate, m_rtts))
gl::command_context cmd{ gl_state };
if (m_gl_texture_cache.blit(cmd, src, dst, interpolate, m_rtts))
{
m_samplers_dirty.store(true);
return true;

View File

@ -354,7 +354,7 @@ public:
private:
driver_state gl_state;
gl::driver_state gl_state;
// Return element to draw and in case of indexed draw index type and offset in index buffer
gl::vertex_upload_info set_vertex_buffer();

View File

@ -362,7 +362,7 @@ namespace gl
return attrib_t(index);
}
void blitter::scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
void blitter::scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
bool is_depth_copy, const rsx::typeless_xfer& xfer_info)
{
std::unique_ptr<texture> typeless_src;
@ -400,9 +400,6 @@ namespace gl
dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint);
}
s32 old_fbo = 0;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest;
GLenum attachment;
gl::buffers target;
@ -427,6 +424,10 @@ namespace gl
target = gl::buffers::color;
}
save_binding_state saved;
cmd.drv->enable(GL_FALSE, GL_STENCIL_TEST);
blit_src.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0);
blit_src.check();
@ -435,10 +436,6 @@ namespace gl
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_id, 0);
blit_dst.check();
GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST);
if (scissor_test_enabled)
glDisable(GL_SCISSOR_TEST);
blit_src.blit(blit_dst, src_rect, dst_rect, target, interp);
if (xfer_info.dst_is_typeless)
@ -452,10 +449,53 @@ namespace gl
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0);
}
if (scissor_test_enabled)
glEnable(GL_SCISSOR_TEST);
void blitter::fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color)
{
save_binding_state saved;
glBindFramebuffer(GL_FRAMEBUFFER, old_fbo);
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst->id(), 0);
blit_dst.check();
cmd.drv->clear_color(color);
cmd.drv->color_mask(true, true, true, true);
glClear(GL_COLOR_BUFFER_BIT);
}
void blitter::fast_clear_image(gl::command_context& cmd, const texture* dst, float depth, u8 stencil)
{
GLenum attachment;
GLbitfield clear_mask;
switch (const auto fmt = dst->get_internal_format())
{
case texture::internal_format::depth:
case texture::internal_format::depth16:
clear_mask = GL_DEPTH_BUFFER_BIT;
attachment = GL_DEPTH_ATTACHMENT;
break;
case texture::internal_format::depth_stencil:
case texture::internal_format::depth24_stencil8:
case texture::internal_format::depth32f_stencil8:
clear_mask = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
attachment = GL_DEPTH_STENCIL_ATTACHMENT;
break;
default:
fmt::throw_exception("Invalid texture passed to clear depth function, format=0x%x", (u32)fmt);
}
save_binding_state saved;
blit_dst.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst->id(), 0);
blit_dst.check();
cmd.drv->depth_mask(GL_TRUE);
cmd.drv->stencil_mask(0xFF);
glClear(clear_mask);
}
}

View File

@ -8,7 +8,7 @@
#include <unordered_map>
#include <algorithm>
#include "OpenGL.h"
#include "GLExecutionState.h"
#include "../GCM.h"
#include "../Common/TextureUtils.h"
@ -2796,6 +2796,21 @@ public:
class blitter
{
struct save_binding_state
{
GLuint old_fbo;
save_binding_state()
{
glGetIntegerv(GL_FRAMEBUFFER_BINDING, (GLint*)&old_fbo);
}
~save_binding_state()
{
glBindFramebuffer(GL_FRAMEBUFFER, old_fbo);
}
};
fbo blit_src;
fbo blit_dst;
@ -2813,7 +2828,10 @@ public:
blit_src.remove();
}
void scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
void scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation,
bool is_depth_copy, const rsx::typeless_xfer& xfer_info);
void fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color);
void fast_clear_image(gl::command_context& cmd, const texture* dst, float depth, u8 stencil);
};
}

View File

@ -227,6 +227,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto color_offsets = get_offsets();
const auto color_locations = get_locations();
gl::command_context cmd{ gl_state };
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers)
@ -239,7 +241,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const utils::address_range surface_range = m_surface_info[i].get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(surface_range);
m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range);
}
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
@ -270,7 +272,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_gl_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_gl_texture_cache.flush_if_cache_miss_likely(surface_range);
m_gl_texture_cache.flush_if_cache_miss_likely(cmd, surface_range);
}
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
@ -383,7 +385,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes);
}
}
@ -394,7 +396,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true);
}
}
@ -464,8 +466,6 @@ void GLGSRender::read_buffers()
}
else
{
m_gl_texture_cache.invalidate_range(range, rsx::invalidation_cause::read);
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
@ -554,21 +554,8 @@ void GLGSRender::read_buffers()
}
}
void gl::render_target::memory_barrier(void*)
void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init)
{
if (!old_contents)
{
// No memory to inherit
return;
}
auto src_texture = static_cast<gl::render_target*>(old_contents);
if (src_texture->get_rsx_pitch() != get_rsx_pitch())
{
LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory");
return;
}
auto is_depth = [](gl::texture::internal_format format)
{
// TODO: Change this to image aspect semantics
@ -583,6 +570,33 @@ void gl::render_target::memory_barrier(void*)
}
};
if (!old_contents)
{
// No memory to inherit
if (dirty && force_init)
{
// Initialize memory contents if we did not find anything usable
// TODO: Properly sync with Cell
if (is_depth(get_internal_format()))
{
gl::g_hw_blitter->fast_clear_image(cmd, this, 1.f, 255);
}
else
{
gl::g_hw_blitter->fast_clear_image(cmd, this, {});
}
}
return;
}
auto src_texture = static_cast<gl::render_target*>(old_contents);
if (src_texture->get_rsx_pitch() != get_rsx_pitch())
{
LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory");
return;
}
auto src_bpp = src_texture->get_native_pitch() / src_texture->width();
auto dst_bpp = get_native_pitch() / width();
rsx::typeless_xfer typeless_info{};
@ -609,7 +623,7 @@ void gl::render_target::memory_barrier(void*)
}
}
gl::g_hw_blitter->scale_image(old_contents, this,
gl::g_hw_blitter->scale_image(cmd, old_contents, this,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region) , std::get<3>(region) },
!dst_is_depth, dst_is_depth, typeless_info);

View File

@ -131,7 +131,9 @@ namespace gl
return (rsx::apply_resolution_scale(_width, true) == internal_width) && (rsx::apply_resolution_scale(_height, true) == internal_height);
}
void memory_barrier(void* = nullptr);
void memory_barrier(gl::command_context& cmd, bool force_init = false);
void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, true); }
void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, false); }
};
struct framebuffer_holder : public gl::fbo, public rsx::ref_counted

View File

@ -35,7 +35,7 @@ namespace gl
struct texture_cache_traits
{
using commandbuffer_type = void*;
using commandbuffer_type = gl::command_context;
using section_storage_type = gl::cached_texture_section;
using texture_cache_type = gl::texture_cache;
using texture_cache_base_type = rsx::texture_cache<texture_cache_type, texture_cache_traits>;
@ -252,7 +252,7 @@ namespace gl
}
}
void copy_texture(bool manage_lifetime)
void copy_texture(gl::command_context& cmd, bool manage_lifetime)
{
ASSERT(exists());
@ -314,7 +314,7 @@ namespace gl
const bool is_depth = is_depth_texture();
const bool linear_interp = is_depth? false : true;
g_hw_blitter->scale_image(vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, is_depth, {});
g_hw_blitter->scale_image(cmd, vram_texture, scaled_texture.get(), src_area, dst_area, linear_interp, is_depth, {});
target_texture = scaled_texture.get();
}
}
@ -376,12 +376,12 @@ namespace gl
/**
* Flush
*/
void synchronize(bool blocking)
void synchronize(bool blocking, gl::command_context& cmd)
{
if (synchronized)
return;
copy_texture(blocking);
copy_texture(cmd, blocking);
if (blocking)
{
@ -476,8 +476,6 @@ namespace gl
}
}
/**
* Misc
*/
@ -727,20 +725,20 @@ namespace gl
protected:
gl::texture_view* create_temporary_subresource_view(void*&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
const texture_channel_remap_t& remap_vector) override
{
return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true);
}
gl::texture_view* create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
const texture_channel_remap_t& remap_vector) override
{
return create_temporary_subresource_impl(src, (GLenum)src->get_internal_format(),
GL_TEXTURE_2D, gcm_format, x, y, w, h, remap_vector, true);
}
gl::texture_view* generate_cubemap_from_images(void*&, u32 gcm_format, u16 size, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
gl::texture_view* generate_cubemap_from_images(gl::command_context&, u32 gcm_format, u16 size, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
{
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
std::unique_ptr<gl::texture> dst_image = std::make_unique<gl::viewable_image>(GL_TEXTURE_CUBE_MAP, size, size, 1, 1, ifmt);
@ -769,7 +767,7 @@ namespace gl
return result;
}
gl::texture_view* generate_3d_from_2d_images(void*&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
gl::texture_view* generate_3d_from_2d_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector<copy_region_descriptor>& sources, const texture_channel_remap_t& /*remap_vector*/) override
{
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
std::unique_ptr<gl::texture> dst_image = std::make_unique<gl::viewable_image>(GL_TEXTURE_3D, width, height, depth, 1, ifmt);
@ -798,7 +796,7 @@ namespace gl
return result;
}
gl::texture_view* generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy,
gl::texture_view* generate_atlas_from_images(gl::command_context&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy,
const texture_channel_remap_t& remap_vector) override
{
auto result = create_temporary_subresource_impl(nullptr, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, remap_vector, false);
@ -812,13 +810,13 @@ namespace gl
return result;
}
void update_image_contents(void*&, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override
void update_image_contents(gl::command_context&, gl::texture_view* dst, gl::texture* src, u16 width, u16 height) override
{
glCopyImageSubData(src->id(), GL_TEXTURE_2D, 0, 0, 0, 0,
dst->image()->id(), GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
}
cached_texture_section* create_new_texture(void*&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,
cached_texture_section* create_new_texture(gl::command_context&, const utils::address_range &rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch,
u32 gcm_format, rsx::texture_upload_context context, rsx::texture_dimension_extended type, rsx::texture_create_flags flags) override
{
auto image = gl::create_texture(gcm_format, width, height, depth, mipmaps, type);
@ -886,11 +884,10 @@ namespace gl
return &cached;
}
cached_texture_section* upload_image_from_cpu(void*&, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
cached_texture_section* upload_image_from_cpu(gl::command_context &cmd, const utils::address_range& rsx_range, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, u32 gcm_format,
rsx::texture_upload_context context, const std::vector<rsx_subresource_layout>& subresource_layout, rsx::texture_dimension_extended type, bool input_swizzled) override
{
void* unused = nullptr;
auto section = create_new_texture(unused, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type,
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type,
rsx::texture_create_flags::default_component_order);
gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps,
@ -913,7 +910,7 @@ namespace gl
section.set_view_flags(flags);
}
void insert_texture_barrier(void*&, gl::texture*) override
void insert_texture_barrier(gl::command_context&, gl::texture*) override
{
auto &caps = gl::get_driver_caps();
@ -1013,10 +1010,9 @@ namespace gl
baseclass::on_frame_end();
}
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts)
bool blit(gl::command_context &cmd, rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts)
{
void* unused = nullptr;
auto result = upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter);
auto result = upload_scaled_image(src, dst, linear_interpolate, cmd, m_rtts, m_hw_blitter);
if (result.succeeded)
{
@ -1034,7 +1030,7 @@ namespace gl
gl::texture::format::depth_stencil : gl::texture::format::depth;
}
flush_if_cache_miss_likely(result.to_address_range());
flush_if_cache_miss_likely(cmd, result.to_address_range());
}
return true;

View File

@ -855,7 +855,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
std::lock_guard lock(m_secondary_cb_guard);
const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read;
result = std::move(m_texture_cache.invalidate_address(address, cause, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause, m_swapchain->get_graphics_queue()));
}
if (!result.violation_handled)
@ -928,7 +928,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
m_flush_requests.producer_wait();
}
m_texture_cache.flush_all(result, m_secondary_command_buffer, m_swapchain->get_graphics_queue());
m_texture_cache.flush_all(m_secondary_command_buffer, result, m_swapchain->get_graphics_queue());
if (has_queue_ref)
{
@ -944,7 +944,7 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
{
std::lock_guard lock(m_secondary_cb_guard);
auto data = std::move(m_texture_cache.invalidate_range(range, rsx::invalidation_cause::unmap, m_secondary_command_buffer, m_swapchain->get_graphics_queue()));
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap, m_swapchain->get_graphics_queue()));
AUDIT(data.empty());
if (data.violation_handled)
@ -1650,13 +1650,13 @@ void VKGSRender::end()
// Apply write memory barriers
if (g_cfg.video.strict_rendering_mode)
{
if (ds) ds->memory_barrier(*m_current_command_buffer);
if (ds) ds->write_barrier(*m_current_command_buffer);
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
surface->memory_barrier(*m_current_command_buffer);
surface->write_barrier(*m_current_command_buffer);
}
}
}
@ -1694,7 +1694,7 @@ void VKGSRender::end()
if (UNLIKELY(!buffers_to_clear.empty()))
{
VkClearRect rect = { {{0, 0}, {m_draw_fbo->width(), m_draw_fbo->height()}}, 0, 1 };
vkCmdClearAttachments(*m_current_command_buffer, (u32)buffers_to_clear.size(),
vkCmdClearAttachments(*m_current_command_buffer, buffers_to_clear.size(),
buffers_to_clear.data(), 1, &rect);
}
@ -2860,7 +2860,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const utils::address_range rsx_range = m_surface_info[i].get_memory_range();
m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(rsx_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range, m_swapchain->get_graphics_queue());
}
m_surface_info[i].address = m_surface_info[i].pitch = 0;
@ -2876,7 +2876,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range();
m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once);
m_texture_cache.flush_if_cache_miss_likely(surface_range, *m_current_command_buffer, m_swapchain->get_graphics_queue());
m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
@ -2929,8 +2929,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<vk::command_buffer&, VkQueue>{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second);
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second);
}
}
@ -2940,8 +2940,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
{
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<vk::command_buffer&, VkQueue>{ *m_current_command_buffer, m_swapchain->get_graphics_queue() }, gcm_format, false);
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, gcm_format, false);
}
}
@ -3301,7 +3301,7 @@ void VKGSRender::flip(int buffer)
{
if (section->get_protection() == utils::protection::no)
{
section->copy_texture(false, *m_current_command_buffer, m_swapchain->get_graphics_queue());
section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue());
flush_queue = true;
}
}
@ -3312,7 +3312,7 @@ void VKGSRender::flip(int buffer)
flush_command_queue();
}
m_texture_cache.invalidate_range(range, rsx::invalidation_cause::read, *m_current_command_buffer, m_swapchain->get_graphics_queue());
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read, m_swapchain->get_graphics_queue());
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
}
}

View File

@ -409,7 +409,7 @@ namespace vk
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 1, &barrier, 0, nullptr);
}
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range)
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{
//Prepare an image to match the new layout..
VkImageMemoryBarrier barrier = {};
@ -479,7 +479,7 @@ namespace vk
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range)
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{
if (image->current_layout == new_layout) return;

View File

@ -139,8 +139,8 @@ namespace vk
VkImageAspectFlags flags, vk::data_heap &upload_heap);
//Other texture management helpers
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range);
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range);
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range);
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
void copy_image_typeless(const command_buffer &cmd, const image *src, const image *dst, const areai& src_rect, const areai& dst_rect,
@ -2981,13 +2981,8 @@ public:
}
};
class blitter
struct blitter
{
vk::command_buffer* commands;
public:
blitter(vk::command_buffer *c) : commands(c) {}
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info);
void scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info);
};
}

View File

@ -57,10 +57,34 @@ namespace vk
return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height());
}
void memory_barrier(vk::command_buffer& cmd)
void memory_barrier(vk::command_buffer& cmd, bool force_init = false)
{
if (!old_contents)
{
if (dirty && force_init)
{
// Initialize memory contents if we did not find anything usable
// TODO: Properly sync with Cell
VkImageSubresourceRange range{ attachment_aspect_flag, 0, 1, 0, 1 };
const auto old_layout = current_layout;
change_image_layout(cmd, this, VK_IMAGE_LAYOUT_GENERAL, range);
if (attachment_aspect_flag & VK_IMAGE_ASPECT_COLOR_BIT)
{
VkClearColorValue color{};
vkCmdClearColorImage(cmd, value, VK_IMAGE_LAYOUT_GENERAL, &color, 1, &range);
}
else
{
VkClearDepthStencilValue clear{ 1.f, 255 };
vkCmdClearDepthStencilImage(cmd, value, VK_IMAGE_LAYOUT_GENERAL, &clear, 1, &range);
}
change_image_layout(cmd, this, old_layout, range);
on_write();
}
return;
}
@ -96,14 +120,17 @@ namespace vk
}
}
vk::blitter hw_blitter(&cmd);
hw_blitter.scale_image(old_contents, this,
vk::blitter hw_blitter;
hw_blitter.scale_image(cmd, old_contents, this,
{ 0, 0, std::get<0>(region), std::get<1>(region) },
{ 0, 0, std::get<2>(region) , std::get<3>(region) },
/*linear?*/false, /*depth?(unused)*/false, typeless_info);
on_write();
}
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, true); }
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, false); }
};
struct framebuffer_holder: public vk::framebuffer, public rsx::ref_counted

View File

@ -532,7 +532,7 @@ namespace vk
return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] };
}
void blitter::scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info)
void blitter::scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info)
{
const auto src_aspect = vk::get_aspect_flags(src->info.format);
const auto dst_aspect = vk::get_aspect_flags(dst->info.format);
@ -552,7 +552,7 @@ namespace vk
src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint);
src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint);
vk::copy_image_typeless(*commands, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1,
vk::copy_image_typeless(cmd, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1,
vk::get_aspect_flags(src->info.format), vk::get_aspect_flags(format));
}
@ -568,7 +568,7 @@ namespace vk
dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint);
dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint);
vk::copy_image_typeless(*commands, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1,
vk::copy_image_typeless(cmd, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1,
vk::get_aspect_flags(dst->info.format), vk::get_aspect_flags(format));
}
else if (xfer_info.dst_context == rsx::texture_upload_context::framebuffer_storage)
@ -591,24 +591,24 @@ namespace vk
const auto data_length = src->info.extent.width * src->info.extent.height * 4;
const auto current_layout = src->current_layout;
vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdCopyImageToBuffer(*commands, src->value, src->current_layout, scratch_buf->value, 1, &copy);
vk::change_image_layout(*commands, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vk::change_image_layout(cmd, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, scratch_buf->value, 1, &copy);
vk::change_image_layout(cmd, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length,
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, data_length,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
vk::get_compute_task<vk::cs_shuffle_32>()->run(*commands, scratch_buf, data_length);
vk::get_compute_task<vk::cs_shuffle_32>()->run(cmd, scratch_buf, data_length);
vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length,
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, data_length,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
real_src = vk::get_typeless_helper(src->info.format, src->width(), src->height());
vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vk::change_image_layout(cmd, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, &copy);
vkCmdCopyBufferToImage(cmd, scratch_buf->value, real_src->value, real_src->current_layout, 1, &copy);
}
}
}
@ -637,17 +637,17 @@ namespace vk
const auto dst_width = dst_area.x2 - dst_area.x1;
const auto dst_height = dst_area.y2 - dst_area.y1;
copy_scaled_image(*commands, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
copy_scaled_image(cmd, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, dst_aspect, real_src->info.format == real_dst->info.format,
interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, real_src->info.format, real_dst->info.format);
if (real_dst != dst)
{
auto internal_width = dst->width() * xfer_info.dst_scaling_hint;
vk::copy_image_typeless(*commands, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1,
vk::copy_image_typeless(cmd, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1,
vk::get_aspect_flags(real_dst->info.format), vk::get_aspect_flags(dst->info.format));
}
change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { (VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers });
change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { (VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers });
}
}

View File

@ -148,7 +148,7 @@ namespace vk
return flushed;
}
void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, VkQueue submit_queue)
void copy_texture(vk::command_buffer& cmd, bool manage_cb_lifetime, VkQueue submit_queue)
{
ASSERT(exists());
@ -320,7 +320,7 @@ namespace vk
m_device = &cmd.get_command_pool().get_owner();
}
copy_texture(blocking, cmd, submit_queue);
copy_texture(cmd, blocking, submit_queue);
}
void* map_synchronized(u32 offset, u32 size)
@ -1036,7 +1036,7 @@ namespace vk
template<typename RsxTextureType>
sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts)
{
return upload_texture(cmd, tex, m_rtts, cmd, const_cast<const VkQueue>(m_submit_queue));
return upload_texture(cmd, tex, m_rtts, const_cast<const VkQueue>(m_submit_queue));
}
vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height)
@ -1094,14 +1094,14 @@ namespace vk
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
{
blitter helper(&cmd);
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast<const VkQueue>(m_submit_queue));
blitter helper;
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, const_cast<const VkQueue>(m_submit_queue));
if (reply.succeeded)
{
if (reply.real_dst_size)
{
flush_if_cache_miss_likely(reply.to_address_range(), cmd, m_submit_queue);
flush_if_cache_miss_likely(cmd, reply.to_address_range(), m_submit_queue);
}
return true;

View File

@ -71,6 +71,7 @@
</ProjectReference>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
<ClInclude Include="Emu\RSX\GL\GLCommonDecompiler.h" />

View File

@ -25,5 +25,6 @@
<ClInclude Include="Emu\RSX\GL\GLRenderTargets.h" />
<ClInclude Include="Emu\RSX\GL\GLTextOut.h" />
<ClInclude Include="Emu\RSX\GL\GLOverlays.h" />
<ClInclude Include="Emu\RSX\GL\GLExecutionState.h" />
</ItemGroup>
</Project>