Merge pull request #1502 from kd-11/gl_staging

gl: Enable texture and render target caching for OpenGL
2024-11-22 10:42:36 +01:00 · 2016-02-18 17:14:16 +01:00 · 2016-02-18 17:14:16 +01:00 · b6323861a5
commit b6323861a5
parent fe551148d8 a451bb574e
6 changed files with 724 additions and 78 deletions
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@ -400,7 +400,7 @@ void GLGSRender::end()

 			m_gl_textures[i].set_target(target);

-			__glcheck m_gl_textures[i].init(i, textures[i]);
+			__glcheck m_gl_texture_cache.upload_texture(i, textures[i], m_gl_textures[i]);
 			glProgramUniform1i(m_program->id(), location, i);
 		}
 	}
@ -771,6 +771,8 @@ void GLGSRender::on_init_thread()
 		buf = new gl::buffer();
 		buf->create();
 	}
+
+	m_gl_texture_cache.initialize_rtt_cache();
 }

 void GLGSRender::on_exit()
@ -1208,7 +1210,11 @@ void GLGSRender::read_buffers()
 			u32 width = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL] >> 16;
 			u32 height = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL] >> 16;

-
+			/**
+			 * Read color buffers is useless if write buffers is enabled. I havent encountered a case where it is necessary
+			 * since the output is usually fed back into the pipeline as a fragment shader input texture
+			 * It is included here for completeness
+			 */
 			for (int i = index; i < index + count; ++i)
 			{
 				u32 offset = rsx::method_registers[mr_color_offset[i]];
@ -1218,20 +1224,28 @@ void GLGSRender::read_buffers()
 				if (pitch <= 64)
 					continue;

-				m_draw_tex_color[i].pixel_unpack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
-
 				rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
+				u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));

-				if (!color_buffer.tile)
-				{
-					__glcheck m_draw_tex_color[i].copy_from(color_buffer.ptr, color_format.format, color_format.type);
-				}
-				else
-				{
-					std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
-					color_buffer.read(buffer.get(), width, height, pitch);
+				bool success = m_gl_texture_cache.explicit_writeback(m_draw_tex_color[i], texaddr, pitch);
 				
-					__glcheck m_draw_tex_color[i].copy_from(buffer.get(), color_format.format, color_format.type);
+				//Fall back to slower methods if the image could not be fetched.
+				if (!success)
+				{
+					if (!color_buffer.tile)
+					{
+						m_draw_tex_color[i].copy_from(color_buffer.ptr, color_format.format, color_format.type);
+					}
+					else
+					{
+						u32 range = pitch * height;
+						m_gl_texture_cache.remove_in_range(texaddr, range);
+
+						std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
+						color_buffer.read(buffer.get(), width, height, pitch);
+
+						__glcheck m_draw_tex_color[i].copy_from(buffer.get(), color_format.format, color_format.type);
+					}
 				}
 			}
 		};
@ -1271,8 +1285,15 @@ void GLGSRender::read_buffers()
 		if (pitch <= 64)
 			return;

-		auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
+		u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
+		bool in_cache = m_gl_texture_cache.explicit_writeback(m_draw_tex_depth_stencil, depth_address, pitch);

+		if (in_cache)
+			return;
+
+		//Read failed. Fall back to slow s/w path...
+
+		auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
 		int pixel_size = get_pixel_size(m_surface.depth_format);
 		gl::buffer pbo_depth;

@ -1312,9 +1333,6 @@ void GLGSRender::write_buffers()

 	if (rpcs3::state.config.rsx.opengl.write_color_buffers)
 	{
-		//gl::buffer pbo_color;
-		//__glcheck pbo_color.create(m_draw_tex_color[0].width() * m_draw_tex_color[0].height() * 4);
-
 		auto color_format = surface_color_format_to_gl(m_surface.color_format);

 		auto write_color_buffers = [&](int index, int count)
@ -1324,23 +1342,6 @@ void GLGSRender::write_buffers()

 			for (int i = index; i < index + count; ++i)
 			{
-				//TODO: swizzle
-				//__glcheck m_draw_tex_color[i].copy_to(pbo_color, color_format.format, color_format.type);
-
-				//pbo_color.map([&](GLubyte* pixels)
-				//{
-				//	u32 color_address = rsx::get_address(rsx::method_registers[mr_color_offset[i]], rsx::method_registers[mr_color_dma[i]]);
-				//	//u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
-
-				//	const u32 *src = (const u32*)pixels;
-				//	be_t<u32>* dst = vm::ps3::_ptr<u32>(color_address);
-				//	for (int i = 0, end = m_draw_tex_color[i].width() * m_draw_tex_color[i].height(); i < end; ++i)
-				//	{
-				//		dst[i] = src[i];
-				//	}
-
-				//}, gl::buffer::access::read);
-
 				u32 offset = rsx::method_registers[mr_color_offset[i]];
 				u32 location = rsx::method_registers[mr_color_dma[i]];
 				u32 pitch = rsx::method_registers[mr_color_pitch[i]];
@ -1348,22 +1349,15 @@ void GLGSRender::write_buffers()
 				if (pitch <= 64)
 					continue;

-				m_draw_tex_color[i].pixel_pack_settings().row_length(pitch / (color_format.channel_size * color_format.channel_count));
-
 				rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
+				u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
+				u32 range = pitch * height;

-				if (!color_buffer.tile)
-				{
-					__glcheck m_draw_tex_color[i].copy_to(color_buffer.ptr, color_format.format, color_format.type);
-				}
-				else
-				{
-					std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
-
-					__glcheck m_draw_tex_color[i].copy_to(buffer.get(), color_format.format, color_format.type);
-
-					color_buffer.write(buffer.get(), width, height, pitch);
-				}
+				/**Even tiles are loaded as whole textures during read_buffers from testing.
+				 * Need further evaluation to determine correct behavior. Separate paths for both show no difference,
+				 * but using the GPU to perform the caching is many times faster.
+				 */
+				__glcheck m_gl_texture_cache.save_render_target(texaddr, range, m_draw_tex_color[i]);
 			}
 		};

@ -1403,38 +1397,12 @@ void GLGSRender::write_buffers()
 			return;

 		auto depth_format = surface_depth_format_to_gl(m_surface.depth_format);
+		u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
+		u32 range = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height() * 2;

-		gl::buffer pbo_depth;
+		if (m_surface.depth_format != rsx::surface_depth_format::z16) range *= 2;

-		int pixel_size = get_pixel_size(m_surface.depth_format);
-
-		__glcheck pbo_depth.create(m_surface.width * m_surface.height * pixel_size);
-		__glcheck m_draw_tex_depth_stencil.copy_to(pbo_depth, depth_format.second, depth_format.first);
-
-		__glcheck pbo_depth.map([&](GLubyte* pixels)
-		{
-			u32 depth_address = rsx::get_address(rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET], rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA]);
-
-			if (m_surface.depth_format == rsx::surface_depth_format::z16)
-			{
-				const u16 *src = (const u16*)pixels;
-				be_t<u16>* dst = vm::ps3::_ptr<u16>(depth_address);
-				for (int i = 0, end = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height(); i < end; ++i)
-				{
-					dst[i] = src[i];
-				}
-			}
-			else
-			{
-				const u32 *src = (const u32*)pixels;
-				be_t<u32>* dst = vm::ps3::_ptr<u32>(depth_address);
-				for (int i = 0, end = m_draw_tex_depth_stencil.width() * m_draw_tex_depth_stencil.height(); i < end; ++i)
-				{
-					dst[i] = src[i];
-				}
-			}
-
-		}, gl::buffer::access::read);
+		m_gl_texture_cache.save_render_target(depth_address, range, m_draw_tex_depth_stencil);
 	}
 }

@ -1449,7 +1417,10 @@ void GLGSRender::flip(int buffer)

 	bool skip_read = false;

-	if (draw_fbo && !rpcs3::state.config.rsx.opengl.write_color_buffers)
+	/**
+	 * Calling read_buffers will overwrite cached content
+	 */
+	if (draw_fbo)
 	{
 		skip_read = true;
 		/*
@ -1558,3 +1529,9 @@ u64 GLGSRender::timestamp() const
 	glGetInteger64v(GL_TIMESTAMP, &result);
 	return result;
 }
+
+bool GLGSRender::on_access_violation(u32 address, bool is_writing)
+{
+	if (is_writing) return m_gl_texture_cache.mark_as_dirty(address);
+	return false;
+}
--- a/rpcs3/Emu/RSX/GL/GLGSRender.h
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.h
@ -2,6 +2,7 @@
 #include "Emu/RSX/GSRender.h"
 #include "gl_helpers.h"
 #include "rsx_gl_texture.h"
+#include "gl_texture_cache.h"

 #define RSX_DEBUG 1

@ -29,6 +30,8 @@ private:
 	}
 	m_gl_attrib_buffers[rsx::limits::vertex_count];

+	gl::gl_texture_cache m_gl_texture_cache;
+
 public:
 	gl::fbo draw_fbo;

@ -73,4 +76,6 @@ protected:
 	bool do_method(u32 id, u32 arg) override;
 	void flip(int buffer) override;
 	u64 timestamp() const override;
+
+	bool on_access_violation(u32 address, bool is_writing) override;
 };
--- a/rpcs3/Emu/RSX/GL/GLProcTable.h
+++ b/rpcs3/Emu/RSX/GL/GLProcTable.h
@ -170,6 +170,9 @@ OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase);
 OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer);
 OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT);

+//ARB_Copy_Image
+OPENGL_PROC(PFNGLCOPYIMAGESUBDATAPROC, CopyImageSubData);
+
 //KHR_debug
 OPENGL_PROC(PFNGLDEBUGMESSAGECONTROLARBPROC, DebugMessageControlARB);
 OPENGL_PROC(PFNGLDEBUGMESSAGEINSERTARBPROC, DebugMessageInsertARB);
--- a/rpcs3/Emu/RSX/GL/gl_texture_cache.h
+++ b/rpcs3/Emu/RSX/GL/gl_texture_cache.h
@ -0,0 +1,659 @@
+#pragma once
+
+#include "stdafx.h"
+
+#include <exception>
+#include <string>
+#include <functional>
+#include <vector>
+#include <memory>
+#include <unordered_map>
+
+#include "GLGSRender.h"
+#include "../Common/TextureUtils.h"
+#include <chrono>
+
+namespace gl
+{
+	class gl_texture_cache
+	{
+	public:
+
+		struct gl_cached_texture
+		{
+			u32 gl_id;
+			u32 w;
+			u32 h;
+			u64 data_addr;
+			u32 block_sz;
+			u32 frame_ctr;
+			u32 protected_block_start;
+			u32 protected_block_sz;
+			u16 mipmap;
+			bool deleted;
+			bool locked;
+		};
+
+		struct invalid_cache_area
+		{
+			u32 block_base;
+			u32 block_sz;
+		};
+
+		struct cached_rtt
+		{
+			u32 copy_glid;
+			u32 data_addr;
+			u32 block_sz;
+
+			bool is_dirty;
+			bool is_depth;
+			bool valid;
+
+			u32 current_width;
+			u32 current_height;
+
+			bool locked;
+			cached_rtt() : valid(false) {}
+		};
+
+	private:
+		std::vector<gl_cached_texture> texture_cache;
+		std::vector<cached_rtt> rtt_cache;
+		u32 frame_ctr;
+
+		bool lock_memory_region(u32 start, u32 size)
+		{
+			static const u32 memory_page_size = 4096;
+			start = start & ~(memory_page_size - 1);
+			size = (u32)align(size, memory_page_size);
+
+			return vm::page_protect(start, size, 0, 0, vm::page_writable);
+		}
+
+		bool unlock_memory_region(u32 start, u32 size)
+		{
+			static const u32 memory_page_size = 4096;
+			start = start & ~(memory_page_size - 1);
+			size = (u32)align(size, memory_page_size);
+
+			return vm::page_protect(start, size, 0, vm::page_writable, 0);
+		}
+
+		void lock_gl_object(gl_cached_texture &obj)
+		{
+			static const u32 memory_page_size = 4096;
+			obj.protected_block_start = obj.data_addr & ~(memory_page_size - 1);
+			obj.protected_block_sz = (u32)align(obj.block_sz, memory_page_size);
+
+			if (!lock_memory_region(obj.protected_block_start, obj.protected_block_sz))
+				LOG_ERROR(RSX, "lock_gl_object failed!");
+			else
+				obj.locked = true;
+		}
+
+		void unlock_gl_object(gl_cached_texture &obj)
+		{
+			if (!unlock_memory_region(obj.protected_block_start, obj.protected_block_sz))
+				LOG_ERROR(RSX, "unlock_gl_object failed! Will probably crash soon...");
+			else
+				obj.locked = false;
+		}
+
+		gl_cached_texture *find_obj_for_params(u64 texaddr, u32 w, u32 h, u16 mipmap)
+		{
+			for (gl_cached_texture &tex: texture_cache)
+			{
+				if (tex.gl_id && tex.data_addr == texaddr)
+				{
+					if (w && h && mipmap && (tex.h != h || tex.w != w || tex.mipmap != mipmap))
+					{
+						LOG_ERROR(RSX, "Texture params are invalid for block starting 0x%X!", tex.data_addr);
+						LOG_ERROR(RSX, "Params passed w=%d, h=%d, mip=%d, found w=%d, h=%d, mip=%d", w, h, mipmap, tex.w, tex.h, tex.mipmap);
+
+						continue;
+					}
+
+					tex.frame_ctr = frame_ctr;
+					return &tex;
+				}
+			}
+
+			return nullptr;
+		}
+
+		gl_cached_texture& create_obj_for_params(u32 gl_id, u64 texaddr, u32 w, u32 h, u16 mipmap)
+		{
+			gl_cached_texture obj = { 0 };
+			
+			obj.gl_id = gl_id;
+			obj.data_addr = texaddr;
+			obj.w = w;
+			obj.h = h;
+			obj.mipmap = mipmap;
+			obj.deleted = false;
+			obj.locked = false;
+
+			for (gl_cached_texture &tex : texture_cache)
+			{
+				if (tex.gl_id == 0 || (tex.deleted && (frame_ctr - tex.frame_ctr) > 32768))
+				{
+					if (tex.gl_id)
+					{
+						LOG_NOTICE(RSX, "Reclaiming GL texture %d, cache_size=%d, master_ctr=%d, ctr=%d", tex.gl_id, texture_cache.size(), frame_ctr, tex.frame_ctr);
+						__glcheck glDeleteTextures(1, &tex.gl_id);
+						unlock_gl_object(tex);
+						tex.gl_id = 0;
+					}
+
+					tex = obj;
+					return tex;
+				}
+			}
+
+			texture_cache.push_back(obj);
+			return texture_cache[texture_cache.size()-1];
+		}
+
+		void remove_obj(gl_cached_texture &tex)
+		{
+			if (tex.locked)
+				unlock_gl_object(tex);
+
+			tex.deleted = true;
+		}
+
+		void remove_obj_for_glid(u32 gl_id)
+		{
+			for (gl_cached_texture &tex : texture_cache)
+			{
+				if (tex.gl_id == gl_id)
+					remove_obj(tex);
+			}
+		}
+
+		void clear_obj_cache()
+		{
+			for (gl_cached_texture &tex : texture_cache)
+			{
+				if (tex.locked)
+					unlock_gl_object(tex);
+
+				if (tex.gl_id)
+				{
+					LOG_NOTICE(RSX, "Deleting texture %d", tex.gl_id);
+					glDeleteTextures(1, &tex.gl_id);
+				}
+
+				tex.deleted = true;
+				tex.gl_id = 0;
+			}
+
+			texture_cache.resize(0);
+			destroy_rtt_cache();
+		}
+
+		bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
+		{
+			//Check for memory area overlap. unlock page(s) if needed and add this index to array.
+			//Axis separation test
+			const u32 &block_start = base1;
+			const u32 block_end = limit1;
+
+			if (limit2 < block_start) return false;
+			if (base2 > block_end) return false;
+
+			u32 min_separation = (limit2 - base2) + (limit1 - base1);
+			u32 range_limit = (block_end > limit2) ? block_end : limit2;
+			u32 range_base = (block_start < base2) ? block_start : base2;
+
+			u32 actual_separation = (range_limit - range_base);
+
+			if (actual_separation < min_separation)
+				return true;
+
+			return false;
+		}
+
+		cached_rtt* find_cached_rtt(u32 base, u32 size)
+		{
+			for (cached_rtt &rtt : rtt_cache)
+			{
+				if (region_overlaps(base, base+size, rtt.data_addr, rtt.data_addr+rtt.block_sz))
+				{
+					return &rtt;
+				}
+			}
+
+			return nullptr;
+		}
+
+		void invalidate_rtts_in_range(u32 base, u32 size)
+		{
+			for (cached_rtt &rtt : rtt_cache)
+			{
+				if (!rtt.data_addr || rtt.is_dirty) continue;
+
+				u32 rtt_aligned_base = ((u32)(rtt.data_addr)) & ~(4096 - 1);
+				u32 rtt_block_sz = align(rtt.block_sz, 4096);
+
+				if (region_overlaps(rtt_aligned_base, (rtt_aligned_base + rtt_block_sz), base, base+size))
+				{
+					LOG_NOTICE(RSX, "Dirty RTT FOUND addr=0x%X", base);
+					rtt.is_dirty = true;
+					if (rtt.locked)
+					{
+						rtt.locked = false;
+						unlock_memory_region((u32)rtt.data_addr, rtt.block_sz);
+					}
+				}
+			}
+		}
+
+		void prep_rtt(cached_rtt &rtt, u32 width, u32 height, u32 gl_pixel_format_internal)
+		{
+			int binding = 0;
+			bool is_depth = false;
+
+			if (gl_pixel_format_internal == GL_DEPTH24_STENCIL8 ||
+				gl_pixel_format_internal == GL_DEPTH_COMPONENT24 ||
+				gl_pixel_format_internal == GL_DEPTH_COMPONENT16 ||
+				gl_pixel_format_internal == GL_DEPTH_COMPONENT32)
+			{
+				is_depth = true;
+			}
+
+			glGetIntegerv(GL_TEXTURE_2D_BINDING_EXT, &binding);
+			glBindTexture(GL_TEXTURE_2D, rtt.copy_glid);
+
+			rtt.current_width = width;
+			rtt.current_height = height;
+
+			if (!is_depth)
+			{
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+				__glcheck glTexImage2D(GL_TEXTURE_2D, 0, gl_pixel_format_internal, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
+			}
+			else
+			{
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+				u32 ex_format = GL_UNSIGNED_SHORT;
+				u32 in_format = GL_DEPTH_COMPONENT16;
+
+				switch (gl_pixel_format_internal)
+				{
+				case GL_DEPTH24_STENCIL8:
+				{
+					ex_format = GL_UNSIGNED_INT_24_8;
+					in_format = GL_DEPTH_STENCIL;
+					break;
+				}
+				case GL_DEPTH_COMPONENT16:
+					break;
+				default:
+					throw EXCEPTION("Unsupported depth format!");
+				}
+
+				__glcheck glTexImage2D(GL_TEXTURE_2D, 0, gl_pixel_format_internal, width, height, 0, in_format, ex_format, nullptr);
+			}
+
+			glBindTexture(GL_TEXTURE_2D, binding);
+			rtt.is_depth = is_depth;
+		}
+
+		void save_rtt(u32 base, u32 size, u32 width, u32 height, u32 gl_pixel_format_internal, gl::texture &source)
+		{
+			cached_rtt *region = find_cached_rtt(base, size);
+
+			if (!region)
+			{
+				for (cached_rtt &rtt : rtt_cache)
+				{
+					if (rtt.valid && rtt.data_addr == 0)
+					{
+						prep_rtt(rtt, width, height, gl_pixel_format_internal);
+						
+						rtt.block_sz = size;
+						rtt.data_addr = base;
+						rtt.is_dirty = true;
+
+						LOG_NOTICE(RSX, "New RTT created for block 0x%X + 0x%X", (u32)rtt.data_addr, rtt.block_sz);
+
+						lock_memory_region((u32)rtt.data_addr, rtt.block_sz);
+						rtt.locked = true;
+
+						region = &rtt;
+						break;
+					}
+				}
+
+				if (!region) throw EXCEPTION("No region created!!");
+			}
+
+			if (width != region->current_width ||
+				height != region->current_height)
+			{
+				prep_rtt(*region, width, height, gl_pixel_format_internal);
+
+				if (region->locked && region->block_sz != size)
+				{
+					LOG_NOTICE(RSX, "Unlocking RTT since size has changed!");
+					unlock_memory_region((u32)region->data_addr, region->block_sz);
+
+					LOG_NOTICE(RSX, "Locking down RTT after size change!");
+					region->block_sz = size;
+					lock_memory_region((u32)region->data_addr, region->block_sz);
+					region->locked = true;
+				}
+			}
+
+			__glcheck glCopyImageSubData(source.id(), GL_TEXTURE_2D, 0, 0, 0, 0,
+										region->copy_glid, GL_TEXTURE_2D, 0, 0, 0, 0,
+										width, height, 1);
+			
+			region->is_dirty = false;
+
+			if (!region->locked)
+			{
+				LOG_WARNING(RSX, "Locking down RTT, was unlocked!");
+				lock_memory_region((u32)region->data_addr, region->block_sz);
+				region->locked = true;
+			}
+		}
+
+		void write_rtt(u32 base, u32 size, u32 texaddr)
+		{
+			//Actually download the data, since it seems that cell is writing to it manually
+			throw;
+		}
+
+		void destroy_rtt_cache()
+		{
+			for (cached_rtt &rtt : rtt_cache)
+			{
+				rtt.valid = false;
+				rtt.is_dirty = false;
+				rtt.block_sz = 0;
+				rtt.data_addr = 0;
+
+				glDeleteTextures(1, &rtt.copy_glid);
+				rtt.copy_glid = 0;
+			}
+
+			rtt_cache.resize(0);
+		}
+
+	public:
+
+		gl_texture_cache()
+			: frame_ctr(0)
+		{
+		}
+		
+		~gl_texture_cache()
+		{
+			clear_obj_cache();
+		}
+
+		void update_frame_ctr()
+		{
+			frame_ctr++;
+		}
+
+		void initialize_rtt_cache()
+		{
+			if (rtt_cache.size()) throw EXCEPTION("Initialize RTT cache while cache already exists! Leaking objects??");
+
+			for (int i = 0; i < 64; ++i)
+			{
+				cached_rtt rtt;
+
+				glGenTextures(1, &rtt.copy_glid);
+				rtt.is_dirty = true;
+				rtt.valid = true;
+				rtt.block_sz = 0;
+				rtt.data_addr = 0;
+				rtt.locked = false;
+
+				rtt_cache.push_back(rtt);
+			}
+		}
+
+		void upload_texture(int index, rsx::texture &tex, rsx::gl::texture &gl_texture)
+		{
+			const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
+			const u32 range = (u32)get_texture_size(tex);
+
+			cached_rtt *rtt = find_cached_rtt(texaddr, range);
+			
+			if (rtt && !rtt->is_dirty)
+			{
+				if (!rtt->is_depth)
+				{
+					u32 real_id = gl_texture.id();
+
+					glActiveTexture(GL_TEXTURE0 + index);
+					gl_texture.set_id(rtt->copy_glid);
+					gl_texture.bind();
+
+					gl_texture.set_id(real_id);
+				}
+				else
+				{
+					LOG_NOTICE(RSX, "Depth RTT found from 0x%X, Trying to upload width dims: %d x %d, Saved as %d x %d", rtt->data_addr, tex.width(), tex.height(), rtt->current_width, rtt->current_height);
+					//The texture should have already been loaded through the writeback interface call
+					//Bind it directly
+					u32 real_id = gl_texture.id();
+
+					glActiveTexture(GL_TEXTURE0 + index);
+					gl_texture.set_id(rtt->copy_glid);
+					gl_texture.bind();
+
+					gl_texture.set_id(real_id);
+				}
+				return;
+			}
+			else if (rtt)
+				LOG_NOTICE(RSX, "RTT texture for address 0x%X is dirty!", texaddr);
+			
+			gl_cached_texture *obj = nullptr;
+
+			if (!rtt)
+				obj = find_obj_for_params(texaddr, tex.width(), tex.height(), tex.mipmap());
+
+			if (obj && !obj->deleted)
+			{
+				u32 real_id = gl_texture.id();
+
+				glActiveTexture(GL_TEXTURE0 + index);
+				gl_texture.set_id(obj->gl_id);
+				gl_texture.bind();
+
+				gl_texture.set_id(real_id);
+			}
+			else
+			{
+				if (!obj) gl_texture.set_id(0);
+				else
+				{
+					//Reuse this GLid
+					gl_texture.set_id(obj->gl_id);
+
+					//Empty this slot for another one. A new holder will be created below anyway...
+					if (obj->locked) unlock_gl_object(*obj);
+					obj->gl_id = 0;
+				}
+
+				__glcheck gl_texture.init(index, tex);
+				gl_cached_texture &_obj = create_obj_for_params(gl_texture.id(), texaddr, tex.width(), tex.height(), tex.mipmap());
+
+				_obj.block_sz = (u32)get_texture_size(tex);
+				lock_gl_object(_obj);
+			}
+		}
+
+		bool mark_as_dirty(u32 address)
+		{
+			bool response = false;
+
+			for (gl_cached_texture &tex: texture_cache)
+			{
+				if (!tex.locked) continue;
+
+				if (tex.protected_block_start <= address &&
+					tex.protected_block_sz >(address - tex.protected_block_start))
+				{
+					LOG_NOTICE(RSX, "Texture object is dirty! %d", tex.gl_id);
+					unlock_gl_object(tex);
+
+					invalidate_rtts_in_range((u32)tex.data_addr, tex.block_sz);
+
+					tex.deleted = true;
+					response = true;
+				}
+			}
+
+			if (response) return true;
+
+			for (cached_rtt &rtt: rtt_cache)
+			{
+				if (!rtt.data_addr || rtt.is_dirty) continue;
+
+				u32 rtt_aligned_base = ((u32)(rtt.data_addr)) & ~(4096 - 1);
+				u32 rtt_block_sz = align(rtt.block_sz, 4096);
+				
+				if (rtt.locked && (u64)address >= rtt_aligned_base)
+				{
+					u32 offset = address - rtt_aligned_base;
+					if (offset >= rtt_block_sz) continue;
+
+					LOG_NOTICE(RSX, "Dirty non-texture RTT FOUND! addr=0x%X", rtt.data_addr);
+					rtt.is_dirty = true;
+
+					unlock_memory_region(rtt_aligned_base, rtt_block_sz);
+					rtt.locked = false;
+
+					response = true;
+				}
+			}
+
+			return response;
+		}
+
+		void save_render_target(u32 texaddr, u32 range, gl::texture &gl_texture)
+		{
+			save_rtt(texaddr, range, gl_texture.width(), gl_texture.height(), (GLenum)gl_texture.get_internal_format(), gl_texture);
+		}
+
+		std::vector<invalid_cache_area> find_and_invalidate_in_range(u32 base, u32 limit)
+		{
+			/**
+			* Sometimes buffers can share physical pages.
+			* Return objects if we really encroach on texture
+			*/
+
+			std::vector<invalid_cache_area> result;
+
+			for (gl_cached_texture &obj : texture_cache)
+			{
+				//Check for memory area overlap. unlock page(s) if needed and add this index to array.
+				//Axis separation test
+				const u32 &block_start = obj.protected_block_start;
+				const u32 block_end = block_start + obj.protected_block_sz;
+
+				if (limit < block_start) continue;
+				if (base > block_end) continue;
+
+				u32 min_separation = (limit - base) + obj.protected_block_sz;
+				u32 range_limit = (block_end > limit) ? block_end : limit;
+				u32 range_base = (block_start < base) ? block_start : base;
+
+				u32 actual_separation = (range_limit - range_base);
+
+				if (actual_separation < min_separation)
+				{
+					const u32 texture_start = (u32)obj.data_addr;
+					const u32 texture_end = texture_start + obj.block_sz;
+
+					min_separation = (limit - base) + obj.block_sz;
+					range_limit = (texture_end > limit) ? texture_end : limit;
+					range_base = (texture_start < base) ? texture_start : base;
+
+					actual_separation = (range_limit - range_base);
+
+					if (actual_separation < min_separation)
+					{
+						//Texture area is invalidated!
+						unlock_gl_object(obj);
+						obj.deleted = true;
+
+						continue;
+					}
+
+					//Overlap in this case will be at most 1 page...
+					invalid_cache_area invalid = { 0 };
+					if (base < obj.data_addr)
+						invalid.block_base = obj.protected_block_start;
+					else
+						invalid.block_base = obj.protected_block_start + obj.protected_block_sz - 4096;
+
+					invalid.block_sz = 4096;
+					unlock_memory_region(invalid.block_base, invalid.block_sz);
+					result.push_back(invalid);
+				}
+			}
+
+			return result;
+		}
+
+		void lock_invalidated_ranges(std::vector<invalid_cache_area> invalid)
+		{
+			for (invalid_cache_area area : invalid)
+			{
+				lock_memory_region(area.block_base, area.block_sz);
+			}
+		}
+
+		void remove_in_range(u32 texaddr, u32 range)
+		{
+			//Seems that the rsx only 'reads' full texture objects..
+			//This simplifies this function to simply check for matches
+			for (gl_cached_texture &cached : texture_cache)
+			{
+				if (cached.data_addr == texaddr &&
+					cached.block_sz == range)
+					remove_obj(cached);
+			}
+		}
+
+		bool explicit_writeback(gl::texture &tex, const u32 address, const u32 pitch)
+		{
+			const u32 range = tex.height() * pitch;
+			cached_rtt *rtt = find_cached_rtt(address, range);
+
+			if (rtt && !rtt->is_dirty)
+			{
+				u32 min_w = rtt->current_width;
+				u32 min_h = rtt->current_height;
+
+				if ((u32)tex.width() < min_w) min_w = (u32)tex.width();
+				if ((u32)tex.height() < min_h) min_h = (u32)tex.height();
+
+				//TODO: Image reinterpretation e.g read back rgba data as depth texture and vice-versa
+
+				__glcheck glCopyImageSubData(rtt->copy_glid, GL_TEXTURE_2D, 0, 0, 0, 0,
+					tex.id(), GL_TEXTURE_2D, 0, 0, 0, 0,
+					min_w, min_h, 1);
+
+				return true;
+			}
+
+			//No valid object found in cache
+			return false;
+		}
+	};
+}
--- a/rpcs3/Emu/RSX/GL/rsx_gl_texture.h
+++ b/rpcs3/Emu/RSX/GL/rsx_gl_texture.h
@ -56,6 +56,7 @@ namespace rsx
 			void remove();

 			void set_target(u32 target) { m_target = target; }
+			void set_id(u32 id) { m_id = id;  }
 			u32 id() const;
 		};
 	}
--- a/rpcs3/GLGSRender.vcxproj
+++ b/rpcs3/GLGSRender.vcxproj
@ -79,6 +79,7 @@
    <ClInclude Include="Emu\RSX\GL\GLProgramBuffer.h" />
    <ClInclude Include="Emu\RSX\GL\GLVertexProgram.h" />
    <ClInclude Include="Emu\RSX\GL\gl_helpers.h" />
+    <ClInclude Include="Emu\RSX\GL\gl_texture_cache.h" />
    <ClInclude Include="Emu\RSX\GL\OpenGL.h" />
    <ClInclude Include="Emu\RSX\GL\rsx_gl_texture.h" />
  </ItemGroup>