mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 04:02:42 +01:00
Move align helpers to util/asm.hpp
Also add some files: GLTextureCache.cpp VKTextureCache.cpp
This commit is contained in:
parent
d254a5736b
commit
eec11bfba9
@ -10,6 +10,8 @@
|
|||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
using namespace std::literals::string_literals;
|
using namespace std::literals::string_literals;
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@ -1725,7 +1727,7 @@ u64 fs::get_dir_size(const std::string& path, u64 rounding_alignment)
|
|||||||
|
|
||||||
if (!entry.is_directory)
|
if (!entry.is_directory)
|
||||||
{
|
{
|
||||||
result += ::align(entry.size, rounding_alignment);
|
result += utils::align(entry.size, rounding_alignment);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include "util/logs.hpp"
|
#include "util/logs.hpp"
|
||||||
#include "mutex.h"
|
#include "mutex.h"
|
||||||
#include "util/vm.hpp"
|
#include "util/vm.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
|
||||||
@ -52,8 +53,8 @@ static u8* add_jit_memory(usz size, uint align)
|
|||||||
// Simple allocation by incrementing pointer to the next free data
|
// Simple allocation by incrementing pointer to the next free data
|
||||||
const u64 pos = Ctr.atomic_op([&](u64& ctr) -> u64
|
const u64 pos = Ctr.atomic_op([&](u64& ctr) -> u64
|
||||||
{
|
{
|
||||||
const u64 _pos = ::align(ctr & 0xffff'ffff, align);
|
const u64 _pos = utils::align(ctr & 0xffff'ffff, align);
|
||||||
const u64 _new = ::align(_pos + size, align);
|
const u64 _new = utils::align(_pos + size, align);
|
||||||
|
|
||||||
if (_new > 0x40000000) [[unlikely]]
|
if (_new > 0x40000000) [[unlikely]]
|
||||||
{
|
{
|
||||||
@ -69,7 +70,7 @@ static u8* add_jit_memory(usz size, uint align)
|
|||||||
// Check the necessity to commit more memory
|
// Check the necessity to commit more memory
|
||||||
if (_new > olda) [[unlikely]]
|
if (_new > olda) [[unlikely]]
|
||||||
{
|
{
|
||||||
newa = ::align(_new, 0x200000);
|
newa = utils::align(_new, 0x200000);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctr += _new - (ctr & 0xffff'ffff);
|
ctr += _new - (ctr & 0xffff'ffff);
|
||||||
@ -223,7 +224,7 @@ asmjit::Runtime& asmjit::get_global_runtime()
|
|||||||
return asmjit::kErrorNoCodeGenerated;
|
return asmjit::kErrorNoCodeGenerated;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* p = m_pos.fetch_add(::align(codeSize, 4096));
|
void* p = m_pos.fetch_add(utils::align(codeSize, 4096));
|
||||||
if (!p || m_pos > m_max) [[unlikely]]
|
if (!p || m_pos > m_max) [[unlikely]]
|
||||||
{
|
{
|
||||||
*dst = nullptr;
|
*dst = nullptr;
|
||||||
@ -237,7 +238,7 @@ asmjit::Runtime& asmjit::get_global_runtime()
|
|||||||
return asmjit::kErrorInvalidState;
|
return asmjit::kErrorInvalidState;
|
||||||
}
|
}
|
||||||
|
|
||||||
utils::memory_protect(p, ::align(codeSize, 4096), utils::protection::rx);
|
utils::memory_protect(p, utils::align(codeSize, 4096), utils::protection::rx);
|
||||||
flush(p, relocSize);
|
flush(p, relocSize);
|
||||||
*dst = p;
|
*dst = p;
|
||||||
|
|
||||||
@ -351,8 +352,8 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u64 olda = ::align(oldp, align);
|
const u64 olda = utils::align(oldp, align);
|
||||||
const u64 newp = ::align(olda + size, align);
|
const u64 newp = utils::align(olda + size, align);
|
||||||
|
|
||||||
if ((newp - 1) / c_max_size != oldp / c_max_size)
|
if ((newp - 1) / c_max_size != oldp / c_max_size)
|
||||||
{
|
{
|
||||||
@ -363,8 +364,8 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||||||
if ((oldp - 1) / c_page_size != (newp - 1) / c_page_size)
|
if ((oldp - 1) / c_page_size != (newp - 1) / c_page_size)
|
||||||
{
|
{
|
||||||
// Allocate pages on demand
|
// Allocate pages on demand
|
||||||
const u64 pagea = ::align(oldp, c_page_size);
|
const u64 pagea = utils::align(oldp, c_page_size);
|
||||||
const u64 psize = ::align(newp - pagea, c_page_size);
|
const u64 psize = utils::align(newp - pagea, c_page_size);
|
||||||
utils::memory_commit(this->ptr + pagea, psize, prot);
|
utils::memory_commit(this->ptr + pagea, psize, prot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "util/v128.hpp"
|
#include "util/v128.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(edat_log, "EDAT");
|
LOG_CHANNEL(edat_log, "EDAT");
|
||||||
|
|
||||||
@ -949,7 +950,7 @@ bool EDATADecrypter::ReadHeader()
|
|||||||
}*/
|
}*/
|
||||||
|
|
||||||
file_size = edatHeader.file_size;
|
file_size = edatHeader.file_size;
|
||||||
total_blocks = ::aligned_div(edatHeader.file_size, edatHeader.block_size);
|
total_blocks = utils::aligned_div(edatHeader.file_size, edatHeader.block_size);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -962,7 +963,7 @@ u64 EDATADecrypter::ReadData(u64 pos, u8* data, u64 size)
|
|||||||
// now we need to offset things to account for the actual 'range' requested
|
// now we need to offset things to account for the actual 'range' requested
|
||||||
const u64 startOffset = pos % edatHeader.block_size;
|
const u64 startOffset = pos % edatHeader.block_size;
|
||||||
|
|
||||||
const u32 num_blocks = static_cast<u32>(::aligned_div(startOffset + size, edatHeader.block_size));
|
const u32 num_blocks = static_cast<u32>(utils::aligned_div(startOffset + size, edatHeader.block_size));
|
||||||
const u64 bufSize = num_blocks*edatHeader.block_size;
|
const u64 bufSize = num_blocks*edatHeader.block_size;
|
||||||
if (data_buf_size < (bufSize))
|
if (data_buf_size < (bufSize))
|
||||||
{
|
{
|
||||||
|
@ -428,6 +428,7 @@ target_sources(rpcs3_emu PRIVATE
|
|||||||
RSX/GL/GLTexture.cpp
|
RSX/GL/GLTexture.cpp
|
||||||
RSX/GL/GLVertexBuffers.cpp
|
RSX/GL/GLVertexBuffers.cpp
|
||||||
RSX/GL/GLVertexProgram.cpp
|
RSX/GL/GLVertexProgram.cpp
|
||||||
|
RSX/GL/GLTextureCache.cpp
|
||||||
RSX/GL/OpenGL.cpp
|
RSX/GL/OpenGL.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -454,6 +455,7 @@ if(TARGET 3rdparty_vulkan)
|
|||||||
RSX/VK/VKTexture.cpp
|
RSX/VK/VKTexture.cpp
|
||||||
RSX/VK/VKVertexBuffers.cpp
|
RSX/VK/VKVertexBuffers.cpp
|
||||||
RSX/VK/VKVertexProgram.cpp
|
RSX/VK/VKVertexProgram.cpp
|
||||||
|
RSX/VK/VKTextureCache.cpp
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include "cellPamf.h"
|
#include "cellPamf.h"
|
||||||
#include "cellDmux.h"
|
#include "cellDmux.h"
|
||||||
|
|
||||||
#include <thread>
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(cellDmux);
|
LOG_CHANNEL(cellDmux);
|
||||||
|
|
||||||
@ -753,9 +753,9 @@ PesHeader::PesHeader(DemuxerStream& stream)
|
|||||||
}
|
}
|
||||||
|
|
||||||
ElementaryStream::ElementaryStream(Demuxer* dmux, u32 addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr<CellDmuxCbEsMsg> cbFunc, u32 cbArg, u32 spec)
|
ElementaryStream::ElementaryStream(Demuxer* dmux, u32 addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr<CellDmuxCbEsMsg> cbFunc, u32 cbArg, u32 spec)
|
||||||
: put(align(addr, 128))
|
: put(utils::align(addr, 128))
|
||||||
, dmux(dmux)
|
, dmux(dmux)
|
||||||
, memAddr(align(addr, 128))
|
, memAddr(utils::align(addr, 128))
|
||||||
, memSize(size - (addr - memAddr))
|
, memSize(size - (addr - memAddr))
|
||||||
, fidMajor(fidMajor)
|
, fidMajor(fidMajor)
|
||||||
, fidMinor(fidMinor)
|
, fidMinor(fidMinor)
|
||||||
@ -847,7 +847,7 @@ void ElementaryStream::push_au(u32 size, u64 dts, u64 pts, u64 userdata, bool ra
|
|||||||
|
|
||||||
addr = put;
|
addr = put;
|
||||||
|
|
||||||
put = align(put + 128 + size, 128);
|
put = utils::align(put + 128 + size, 128);
|
||||||
|
|
||||||
put_count++;
|
put_count++;
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,8 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(cellSaveData);
|
LOG_CHANNEL(cellSaveData);
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
@ -953,7 +955,7 @@ static NEVER_INLINE error_code savedata_op(ppu_thread& ppu, u32 operation, u32 v
|
|||||||
{
|
{
|
||||||
if (!file.is_directory)
|
if (!file.is_directory)
|
||||||
{
|
{
|
||||||
size_bytes += ::align(file.size, 1024);
|
size_bytes += utils::align(file.size, 1024);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1334,7 +1336,7 @@ static NEVER_INLINE error_code savedata_op(ppu_thread& ppu, u32 operation, u32 v
|
|||||||
{
|
{
|
||||||
statGet->fileNum++;
|
statGet->fileNum++;
|
||||||
|
|
||||||
size_bytes += ::align(entry.size, 1024); // firmware rounds this value up
|
size_bytes += utils::align(entry.size, 1024); // firmware rounds this value up
|
||||||
|
|
||||||
if (statGet->fileListNum >= setBuf->fileListMax)
|
if (statGet->fileListNum >= setBuf->fileListMax)
|
||||||
continue;
|
continue;
|
||||||
@ -1892,7 +1894,7 @@ static NEVER_INLINE error_code savedata_op(ppu_thread& ppu, u32 operation, u32 v
|
|||||||
// add file list per FS order to PARAM.SFO
|
// add file list per FS order to PARAM.SFO
|
||||||
std::string final_blist;
|
std::string final_blist;
|
||||||
final_blist = fmt::merge(blist, "/");
|
final_blist = fmt::merge(blist, "/");
|
||||||
psf::assign(psf, "RPCS3_BLIST", psf::string(::align(::size32(final_blist) + 1, 4), final_blist));
|
psf::assign(psf, "RPCS3_BLIST", psf::string(utils::align(::size32(final_blist) + 1, 4), final_blist));
|
||||||
|
|
||||||
// Write all files in temporary directory
|
// Write all files in temporary directory
|
||||||
auto& fsfo = all_files["PARAM.SFO"];
|
auto& fsfo = all_files["PARAM.SFO"];
|
||||||
|
@ -34,6 +34,7 @@ extern "C"
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include "Utilities/lockless.h"
|
#include "Utilities/lockless.h"
|
||||||
#include <variant>
|
#include <variant>
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
std::mutex g_mutex_avcodec_open2;
|
std::mutex g_mutex_avcodec_open2;
|
||||||
|
|
||||||
@ -879,7 +880,7 @@ error_code cellVdecGetPicture(u32 handle, vm::cptr<CellVdecPicFormat> format, vm
|
|||||||
|
|
||||||
sws_scale(vdec->sws, in_data, in_line, 0, h, out_data, out_line);
|
sws_scale(vdec->sws, in_data, in_line, 0, h, out_data, out_line);
|
||||||
|
|
||||||
//const u32 buf_size = align(av_image_get_buffer_size(vdec->ctx->pix_fmt, vdec->ctx->width, vdec->ctx->height, 1), 128);
|
//const u32 buf_size = utils::align(av_image_get_buffer_size(vdec->ctx->pix_fmt, vdec->ctx->width, vdec->ctx->height, 1), 128);
|
||||||
|
|
||||||
//// TODO: zero padding bytes
|
//// TODO: zero padding bytes
|
||||||
|
|
||||||
@ -974,7 +975,7 @@ error_code cellVdecGetPicItem(u32 handle, vm::pptr<CellVdecPicItem> picItem)
|
|||||||
info->startAddr = 0x00000123; // invalid value (no address for picture)
|
info->startAddr = 0x00000123; // invalid value (no address for picture)
|
||||||
const int buffer_size = av_image_get_buffer_size(vdec->ctx->pix_fmt, vdec->ctx->width, vdec->ctx->height, 1);
|
const int buffer_size = av_image_get_buffer_size(vdec->ctx->pix_fmt, vdec->ctx->width, vdec->ctx->height, 1);
|
||||||
ensure(buffer_size >= 0);
|
ensure(buffer_size >= 0);
|
||||||
info->size = align<u32>(buffer_size, 128);
|
info->size = utils::align<u32>(buffer_size, 128);
|
||||||
info->auNum = 1;
|
info->auNum = 1;
|
||||||
info->auPts[0].lower = static_cast<u32>(pts);
|
info->auPts[0].lower = static_cast<u32>(pts);
|
||||||
info->auPts[0].upper = static_cast<u32>(pts >> 32);
|
info->auPts[0].upper = static_cast<u32>(pts >> 32);
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "Emu/Cell/lv2/sys_process.h"
|
#include "Emu/Cell/lv2/sys_process.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(sceNpTrophy);
|
LOG_CHANNEL(sceNpTrophy);
|
||||||
|
|
||||||
@ -1109,7 +1110,7 @@ error_code sceNpTrophyGetGameProgress(u32 context, u32 handle, vm::ptr<s32> perc
|
|||||||
const u32 trp_count = ctxt->tropusr->GetTrophiesCount();
|
const u32 trp_count = ctxt->tropusr->GetTrophiesCount();
|
||||||
|
|
||||||
// Round result to nearest (TODO: Check 0 trophies)
|
// Round result to nearest (TODO: Check 0 trophies)
|
||||||
*percentage = trp_count ? ::rounded_div(unlocked * 100, trp_count) : 0;
|
*percentage = trp_count ? utils::rounded_div(unlocked * 100, trp_count) : 0;
|
||||||
|
|
||||||
if (trp_count == 0 || trp_count > 128)
|
if (trp_count == 0 || trp_count > 128)
|
||||||
{
|
{
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(ppu_loader);
|
LOG_CHANNEL(ppu_loader);
|
||||||
|
|
||||||
@ -263,7 +264,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Set memory protection to read-only
|
// Set memory protection to read-only
|
||||||
vm::page_protect(ppu_function_manager::addr, ::align(::size32(hle_funcs) * 8, 0x1000), 0, 0, vm::page_writable);
|
vm::page_protect(ppu_function_manager::addr, utils::align(::size32(hle_funcs) * 8, 0x1000), 0, 0, vm::page_writable);
|
||||||
|
|
||||||
// Initialize function names
|
// Initialize function names
|
||||||
const bool is_first = g_ppu_function_names.empty();
|
const bool is_first = g_ppu_function_names.empty();
|
||||||
@ -319,7 +320,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const u32 next = ::align(alloc_addr, variable.second.align);
|
const u32 next = utils::align(alloc_addr, variable.second.align);
|
||||||
const u32 end = next + variable.second.size;
|
const u32 end = next + variable.second.size;
|
||||||
|
|
||||||
if (!next || (end >> 12 != alloc_addr >> 12))
|
if (!next || (end >> 12 != alloc_addr >> 12))
|
||||||
@ -1500,7 +1501,7 @@ void ppu_load_exec(const ppu_exec_object& elf)
|
|||||||
|
|
||||||
for (const auto& arg : Emu.argv)
|
for (const auto& arg : Emu.argv)
|
||||||
{
|
{
|
||||||
const u32 arg_size = ::align(::size32(arg) + 1, 0x10);
|
const u32 arg_size = utils::align(::size32(arg) + 1, 0x10);
|
||||||
const u32 arg_addr = vm::alloc(arg_size, vm::main);
|
const u32 arg_addr = vm::alloc(arg_size, vm::main);
|
||||||
|
|
||||||
std::memcpy(vm::base(arg_addr), arg.data(), arg_size);
|
std::memcpy(vm::base(arg_addr), arg.data(), arg_size);
|
||||||
@ -1513,7 +1514,7 @@ void ppu_load_exec(const ppu_exec_object& elf)
|
|||||||
|
|
||||||
for (const auto& arg : Emu.envp)
|
for (const auto& arg : Emu.envp)
|
||||||
{
|
{
|
||||||
const u32 arg_size = ::align(::size32(arg) + 1, 0x10);
|
const u32 arg_size = utils::align(::size32(arg) + 1, 0x10);
|
||||||
const u32 arg_addr = vm::alloc(arg_size, vm::main);
|
const u32 arg_addr = vm::alloc(arg_size, vm::main);
|
||||||
|
|
||||||
std::memcpy(vm::base(arg_addr), arg.data(), arg_size);
|
std::memcpy(vm::base(arg_addr), arg.data(), arg_size);
|
||||||
@ -1533,7 +1534,7 @@ void ppu_load_exec(const ppu_exec_object& elf)
|
|||||||
case 0x70: primary_stacksize = 1024 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_1M
|
case 0x70: primary_stacksize = 1024 * 1024; break; // SYS_PROCESS_PRIMARY_STACK_SIZE_1M
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
primary_stacksize = ::align<u32>(std::clamp<u32>(sz, 0x10000, 0x100000), 4096);
|
primary_stacksize = utils::align<u32>(std::clamp<u32>(sz, 0x10000, 0x100000), 4096);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1636,7 +1637,7 @@ void ppu_load_exec(const ppu_exec_object& elf)
|
|||||||
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz && (prog.p_flags & 0x2) == 0u /* W */)
|
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz && (prog.p_flags & 0x2) == 0u /* W */)
|
||||||
{
|
{
|
||||||
// Set memory protection to read-only when necessary
|
// Set memory protection to read-only when necessary
|
||||||
ensure(vm::page_protect(addr, ::align(size, 0x1000), 0, 0, vm::page_writable));
|
ensure(vm::page_protect(addr, utils::align(size, 0x1000), 0, 0, vm::page_writable));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -242,7 +242,7 @@ extern void ppu_register_range(u32 addr, u32 size)
|
|||||||
|
|
||||||
// Register executable range at
|
// Register executable range at
|
||||||
utils::memory_commit(&ppu_ref(addr), size * 2, utils::protection::rw);
|
utils::memory_commit(&ppu_ref(addr), size * 2, utils::protection::rw);
|
||||||
vm::page_protect(addr, align(size, 0x10000), 0, vm::page_executable);
|
vm::page_protect(addr, utils::align(size, 0x10000), 0, vm::page_executable);
|
||||||
|
|
||||||
const u64 fallback = g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? reinterpret_cast<uptr>(ppu_recompiler_fallback) : reinterpret_cast<uptr>(ppu_fallback);
|
const u64 fallback = g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? reinterpret_cast<uptr>(ppu_recompiler_fallback) : reinterpret_cast<uptr>(ppu_fallback);
|
||||||
|
|
||||||
@ -1098,7 +1098,7 @@ u32 ppu_thread::stack_push(u32 size, u32 align_v)
|
|||||||
ppu_thread& context = static_cast<ppu_thread&>(*cpu);
|
ppu_thread& context = static_cast<ppu_thread&>(*cpu);
|
||||||
|
|
||||||
const u32 old_pos = vm::cast(context.gpr[1]);
|
const u32 old_pos = vm::cast(context.gpr[1]);
|
||||||
context.gpr[1] -= align(size + 4, 8); // room minimal possible size
|
context.gpr[1] -= utils::align(size + 4, 8); // room minimal possible size
|
||||||
context.gpr[1] &= ~(u64{align_v} - 1); // fix stack alignment
|
context.gpr[1] &= ~(u64{align_v} - 1); // fix stack alignment
|
||||||
|
|
||||||
if (old_pos >= context.stack_addr && old_pos < context.stack_addr + context.stack_size && context.gpr[1] < context.stack_addr)
|
if (old_pos >= context.stack_addr && old_pos < context.stack_addr + context.stack_size && context.gpr[1] < context.stack_addr)
|
||||||
|
@ -288,7 +288,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
|||||||
words_align = 64;
|
words_align = 64;
|
||||||
|
|
||||||
const u32 starta = start & -64;
|
const u32 starta = start & -64;
|
||||||
const u32 enda = ::align(end, 64);
|
const u32 enda = utils::align(end, 64);
|
||||||
const u32 sizea = (enda - starta) / 64;
|
const u32 sizea = (enda - starta) / 64;
|
||||||
ensure(sizea);
|
ensure(sizea);
|
||||||
|
|
||||||
@ -369,7 +369,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
|||||||
words_align = 32;
|
words_align = 32;
|
||||||
|
|
||||||
const u32 starta = start & -32;
|
const u32 starta = start & -32;
|
||||||
const u32 enda = ::align(end, 32);
|
const u32 enda = utils::align(end, 32);
|
||||||
const u32 sizea = (enda - starta) / 32;
|
const u32 sizea = (enda - starta) / 32;
|
||||||
ensure(sizea);
|
ensure(sizea);
|
||||||
|
|
||||||
@ -491,7 +491,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
|||||||
words_align = 32;
|
words_align = 32;
|
||||||
|
|
||||||
const u32 starta = start & -32;
|
const u32 starta = start & -32;
|
||||||
const u32 enda = ::align(end, 32);
|
const u32 enda = utils::align(end, 32);
|
||||||
const u32 sizea = (enda - starta) / 32;
|
const u32 sizea = (enda - starta) / 32;
|
||||||
ensure(sizea);
|
ensure(sizea);
|
||||||
|
|
||||||
|
@ -2338,7 +2338,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||||||
}
|
}
|
||||||
|
|
||||||
u32 range_addr = eal & -128;
|
u32 range_addr = eal & -128;
|
||||||
u32 range_end = ::align(eal + size, 128);
|
u32 range_end = utils::align(eal + size, 128);
|
||||||
|
|
||||||
// Handle the case of crossing 64K page borders (TODO: maybe split in 4K fragments?)
|
// Handle the case of crossing 64K page borders (TODO: maybe split in 4K fragments?)
|
||||||
if (range_addr >> 16 != (range_end - 1) >> 16)
|
if (range_addr >> 16 != (range_end - 1) >> 16)
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include "Emu/IdManager.h"
|
#include "Emu/IdManager.h"
|
||||||
|
|
||||||
#include "util/vm.hpp"
|
#include "util/vm.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(sys_memory);
|
LOG_CHANNEL(sys_memory);
|
||||||
|
|
||||||
@ -57,7 +58,7 @@ error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32
|
|||||||
return CELL_ENOMEM;
|
return CELL_ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto area = vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0, ::align(size, 0x10000000), 0x401))
|
if (const auto area = vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0, utils::align(size, 0x10000000), 0x401))
|
||||||
{
|
{
|
||||||
if (u32 addr = area->alloc(size, nullptr, align))
|
if (u32 addr = area->alloc(size, nullptr, align))
|
||||||
{
|
{
|
||||||
@ -128,7 +129,7 @@ error_code sys_memory_allocate_from_container(cpu_thread& cpu, u32 size, u32 cid
|
|||||||
return ct.ret;
|
return ct.ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto area = vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0, ::align(size, 0x10000000), 0x401))
|
if (const auto area = vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0, utils::align(size, 0x10000000), 0x401))
|
||||||
{
|
{
|
||||||
if (u32 addr = area->alloc(size))
|
if (u32 addr = area->alloc(size))
|
||||||
{
|
{
|
||||||
|
@ -12,6 +12,8 @@
|
|||||||
#include "sys_mmapper.h"
|
#include "sys_mmapper.h"
|
||||||
#include "sys_memory.h"
|
#include "sys_memory.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(sys_ppu_thread);
|
LOG_CHANNEL(sys_ppu_thread);
|
||||||
|
|
||||||
// Simple structure to cleanup previous thread, because can't remove its own thread
|
// Simple structure to cleanup previous thread, because can't remove its own thread
|
||||||
@ -388,7 +390,7 @@ error_code _sys_ppu_thread_create(ppu_thread& ppu, vm::ptr<u64> thread_id, vm::p
|
|||||||
g_fxo->get<ppu_thread_cleaner>()->clean(0);
|
g_fxo->get<ppu_thread_cleaner>()->clean(0);
|
||||||
|
|
||||||
// Compute actual stack size and allocate
|
// Compute actual stack size and allocate
|
||||||
const u32 stack_size = ::align<u32>(std::max<u32>(_stacksz, 4096), 4096);
|
const u32 stack_size = utils::align<u32>(std::max<u32>(_stacksz, 4096), 4096);
|
||||||
|
|
||||||
const auto dct = g_fxo->get<lv2_memory_container>();
|
const auto dct = g_fxo->get<lv2_memory_container>();
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ void sys_spu_image::load(const fs::file& stream)
|
|||||||
this->nsegs = 0;
|
this->nsegs = 0;
|
||||||
this->segs = vm::null;
|
this->segs = vm::null;
|
||||||
|
|
||||||
vm::page_protect(segs.addr(), ::align(mem_size, 4096), 0, 0, vm::page_writable);
|
vm::page_protect(segs.addr(), utils::align(mem_size, 4096), 0, 0, vm::page_writable);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sys_spu_image::free()
|
void sys_spu_image::free()
|
||||||
|
@ -974,13 +974,13 @@ namespace vm
|
|||||||
|
|
||||||
if (state & page_1m_size)
|
if (state & page_1m_size)
|
||||||
{
|
{
|
||||||
i = ::align(i + 1, 0x100000 / 4096);
|
i = utils::align(i + 1, 0x100000 / 4096);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state & page_64k_size)
|
if (state & page_64k_size)
|
||||||
{
|
{
|
||||||
i = ::align(i + 1, 0x10000 / 4096);
|
i = utils::align(i + 1, 0x10000 / 4096);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1177,7 +1177,7 @@ namespace vm
|
|||||||
const u32 min_page_size = flags & 0x100 ? 0x1000 : 0x10000;
|
const u32 min_page_size = flags & 0x100 ? 0x1000 : 0x10000;
|
||||||
|
|
||||||
// Align to minimal page size
|
// Align to minimal page size
|
||||||
const u32 size = ::align(orig_size, min_page_size) + (flags & 0x10 ? 0x2000 : 0);
|
const u32 size = utils::align(orig_size, min_page_size) + (flags & 0x10 ? 0x2000 : 0);
|
||||||
|
|
||||||
// Check alignment (it's page allocation, so passing small values there is just silly)
|
// Check alignment (it's page allocation, so passing small values there is just silly)
|
||||||
if (align < min_page_size || align != (0x80000000u >> std::countl_zero(align)))
|
if (align < min_page_size || align != (0x80000000u >> std::countl_zero(align)))
|
||||||
@ -1217,7 +1217,7 @@ namespace vm
|
|||||||
vm::writer_lock lock(0);
|
vm::writer_lock lock(0);
|
||||||
|
|
||||||
// Search for an appropriate place (unoptimized)
|
// Search for an appropriate place (unoptimized)
|
||||||
for (u32 addr = ::align(this->addr, align); u64{addr} + size <= u64{this->addr} + this->size; addr += align)
|
for (u32 addr = utils::align(this->addr, align); u64{addr} + size <= u64{this->addr} + this->size; addr += align)
|
||||||
{
|
{
|
||||||
if (try_alloc(addr, pflags, size, std::move(shm)))
|
if (try_alloc(addr, pflags, size, std::move(shm)))
|
||||||
{
|
{
|
||||||
@ -1240,7 +1240,7 @@ namespace vm
|
|||||||
const u32 min_page_size = flags & 0x100 ? 0x1000 : 0x10000;
|
const u32 min_page_size = flags & 0x100 ? 0x1000 : 0x10000;
|
||||||
|
|
||||||
// Align to minimal page size
|
// Align to minimal page size
|
||||||
const u32 size = ::align(orig_size, min_page_size);
|
const u32 size = utils::align(orig_size, min_page_size);
|
||||||
|
|
||||||
// return if addr or size is invalid
|
// return if addr or size is invalid
|
||||||
if (!size || addr < this->addr || orig_size > size || addr + u64{size} > this->addr + u64{this->size} || flags & 0x10)
|
if (!size || addr < this->addr || orig_size > size || addr + u64{size} > this->addr + u64{this->size} || flags & 0x10)
|
||||||
@ -1410,7 +1410,7 @@ namespace vm
|
|||||||
|
|
||||||
static std::shared_ptr<block_t> _find_map(u32 size, u32 align, u64 flags)
|
static std::shared_ptr<block_t> _find_map(u32 size, u32 align, u64 flags)
|
||||||
{
|
{
|
||||||
for (u32 addr = ::align<u32>(0x20000000, align); addr - 1 < 0xC0000000 - 1; addr += align)
|
for (u32 addr = utils::align<u32>(0x20000000, align); addr - 1 < 0xC0000000 - 1; addr += align)
|
||||||
{
|
{
|
||||||
if (_test_map(addr, size))
|
if (_test_map(addr, size))
|
||||||
{
|
{
|
||||||
@ -1485,7 +1485,7 @@ namespace vm
|
|||||||
vm::writer_lock lock(0);
|
vm::writer_lock lock(0);
|
||||||
|
|
||||||
// Align to minimal page size
|
// Align to minimal page size
|
||||||
const u32 size = ::align(orig_size, 0x10000);
|
const u32 size = utils::align(orig_size, 0x10000);
|
||||||
|
|
||||||
// Check alignment
|
// Check alignment
|
||||||
if (align < 0x10000 || align != (0x80000000u >> std::countl_zero(align)))
|
if (align < 0x10000 || align != (0x80000000u >> std::countl_zero(align)))
|
||||||
|
@ -32,6 +32,8 @@
|
|||||||
#include <net/if_dl.h>
|
#include <net/if_dl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(sys_net);
|
LOG_CHANNEL(sys_net);
|
||||||
LOG_CHANNEL(sceNp2);
|
LOG_CHANNEL(sceNp2);
|
||||||
LOG_CHANNEL(sceNp);
|
LOG_CHANNEL(sceNp);
|
||||||
@ -384,7 +386,7 @@ vm::addr_t np_handler::allocate(u32 size)
|
|||||||
return vm::cast(static_cast<u64>(0));
|
return vm::cast(static_cast<u64>(0));
|
||||||
|
|
||||||
// Align allocs
|
// Align allocs
|
||||||
const u32 alloc_size = ::align(size, 4);
|
const u32 alloc_size = utils::align(size, 4);
|
||||||
if (alloc_size > mpool_avail)
|
if (alloc_size > mpool_avail)
|
||||||
{
|
{
|
||||||
sceNp.error("Not enough memory available in NP pool!");
|
sceNp.error("Not enough memory available in NP pool!");
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include "Emu/RSX/RSXThread.h"
|
#include "Emu/RSX/RSXThread.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
@ -23,7 +24,7 @@ namespace rsx
|
|||||||
}
|
}
|
||||||
|
|
||||||
// User memory + fifo size
|
// User memory + fifo size
|
||||||
buffer_size = ::align<u32>(buffer_size, 0x100000) + 0x10000000;
|
buffer_size = utils::align<u32>(buffer_size, 0x100000) + 0x10000000;
|
||||||
// We are not allowed to drain all memory so add a little
|
// We are not allowed to drain all memory so add a little
|
||||||
g_fxo->init<lv2_memory_container>(buffer_size + 0x1000000);
|
g_fxo->init<lv2_memory_container>(buffer_size + 0x1000000);
|
||||||
|
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
#include "../RSXThread.h"
|
#include "../RSXThread.h"
|
||||||
#include "../rsx_utils.h"
|
#include "../rsx_utils.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
// FIXME: GSL as_span break build if template parameter is non const with current revision.
|
// FIXME: GSL as_span break build if template parameter is non const with current revision.
|
||||||
@ -346,8 +348,8 @@ namespace
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
current_subresource_layout.width_in_block = aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
||||||
current_subresource_layout.height_in_block = aligned_div(miplevel_height_in_texel, block_edge_in_texel);
|
current_subresource_layout.height_in_block = utils::aligned_div(miplevel_height_in_texel, block_edge_in_texel);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (padded_row)
|
if (padded_row)
|
||||||
@ -375,7 +377,7 @@ namespace
|
|||||||
miplevel_height_in_texel = std::max(miplevel_height_in_texel / 2, 1);
|
miplevel_height_in_texel = std::max(miplevel_height_in_texel / 2, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
offset_in_src = align(offset_in_src, 128);
|
offset_in_src = utils::align(offset_in_src, 128);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -922,8 +924,8 @@ namespace rsx
|
|||||||
usz result = 0;
|
usz result = 0;
|
||||||
for (u16 i = 0; i < mipmap; ++i)
|
for (u16 i = 0; i < mipmap; ++i)
|
||||||
{
|
{
|
||||||
usz rowPitch = align(block_size_in_byte * width_in_blocks, row_pitch_alignment);
|
usz rowPitch = utils::align(block_size_in_byte * width_in_blocks, row_pitch_alignment);
|
||||||
result += align(rowPitch * height_in_blocks * depth, mipmap_alignment);
|
result += utils::align(rowPitch * height_in_blocks * depth, mipmap_alignment);
|
||||||
height_in_blocks = std::max<usz>(height_in_blocks / 2, 1);
|
height_in_blocks = std::max<usz>(height_in_blocks / 2, 1);
|
||||||
width_in_blocks = std::max<usz>(width_in_blocks / 2, 1);
|
width_in_blocks = std::max<usz>(width_in_blocks / 2, 1);
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "util/logs.hpp"
|
#include "util/logs.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ring buffer memory helper :
|
* Ring buffer memory helper :
|
||||||
@ -19,8 +20,8 @@ protected:
|
|||||||
template<int Alignment>
|
template<int Alignment>
|
||||||
bool can_alloc(usz size) const
|
bool can_alloc(usz size) const
|
||||||
{
|
{
|
||||||
usz alloc_size = align(size, Alignment);
|
usz alloc_size = utils::align(size, Alignment);
|
||||||
usz aligned_put_pos = align(m_put_pos, Alignment);
|
usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||||
if (aligned_put_pos + alloc_size < m_size)
|
if (aligned_put_pos + alloc_size < m_size)
|
||||||
{
|
{
|
||||||
// range before get
|
// range before get
|
||||||
@ -83,8 +84,8 @@ public:
|
|||||||
template<int Alignment>
|
template<int Alignment>
|
||||||
usz alloc(usz size)
|
usz alloc(usz size)
|
||||||
{
|
{
|
||||||
const usz alloc_size = align(size, Alignment);
|
const usz alloc_size = utils::align(size, Alignment);
|
||||||
const usz aligned_put_pos = align(m_put_pos, Alignment);
|
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||||
|
|
||||||
if (!can_alloc<Alignment>(size) && !grow(aligned_put_pos + alloc_size))
|
if (!can_alloc<Alignment>(size) && !grow(aligned_put_pos + alloc_size))
|
||||||
{
|
{
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "surface_store.h"
|
#include "surface_store.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
namespace utility
|
namespace utility
|
||||||
@ -23,20 +25,20 @@ namespace rsx
|
|||||||
{
|
{
|
||||||
switch (format)
|
switch (format)
|
||||||
{
|
{
|
||||||
case surface_color_format::b8: return align(width, 256);
|
case surface_color_format::b8: return utils::align(width, 256);
|
||||||
case surface_color_format::g8b8:
|
case surface_color_format::g8b8:
|
||||||
case surface_color_format::x1r5g5b5_o1r5g5b5:
|
case surface_color_format::x1r5g5b5_o1r5g5b5:
|
||||||
case surface_color_format::x1r5g5b5_z1r5g5b5:
|
case surface_color_format::x1r5g5b5_z1r5g5b5:
|
||||||
case surface_color_format::r5g6b5: return align(width * 2, 256);
|
case surface_color_format::r5g6b5: return utils::align(width * 2, 256);
|
||||||
case surface_color_format::a8b8g8r8:
|
case surface_color_format::a8b8g8r8:
|
||||||
case surface_color_format::x8b8g8r8_o8b8g8r8:
|
case surface_color_format::x8b8g8r8_o8b8g8r8:
|
||||||
case surface_color_format::x8b8g8r8_z8b8g8r8:
|
case surface_color_format::x8b8g8r8_z8b8g8r8:
|
||||||
case surface_color_format::x8r8g8b8_o8r8g8b8:
|
case surface_color_format::x8r8g8b8_o8r8g8b8:
|
||||||
case surface_color_format::x8r8g8b8_z8r8g8b8:
|
case surface_color_format::x8r8g8b8_z8r8g8b8:
|
||||||
case surface_color_format::x32:
|
case surface_color_format::x32:
|
||||||
case surface_color_format::a8r8g8b8: return align(width * 4, 256);
|
case surface_color_format::a8r8g8b8: return utils::align(width * 4, 256);
|
||||||
case surface_color_format::w16z16y16x16: return align(width * 8, 256);
|
case surface_color_format::w16z16y16x16: return utils::align(width * 8, 256);
|
||||||
case surface_color_format::w32z32y32x32: return align(width * 16, 256);
|
case surface_color_format::w32z32y32x32: return utils::align(width * 16, 256);
|
||||||
}
|
}
|
||||||
fmt::throw_exception("Unknown color surface format");
|
fmt::throw_exception("Unknown color surface format");
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#include "../rsx_utils.h"
|
#include "../rsx_utils.h"
|
||||||
#include <list>
|
#include <list>
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
namespace utility
|
namespace utility
|
||||||
@ -918,7 +920,7 @@ namespace rsx
|
|||||||
{
|
{
|
||||||
// Width is calculated in the coordinate-space of the requester; normalize
|
// Width is calculated in the coordinate-space of the requester; normalize
|
||||||
info.src_area.x = (info.src_area.x * required_bpp) / surface_bpp;
|
info.src_area.x = (info.src_area.x * required_bpp) / surface_bpp;
|
||||||
info.src_area.width = align(width * required_bpp, surface_bpp) / surface_bpp;
|
info.src_area.width = utils::align(width * required_bpp, surface_bpp) / surface_bpp;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
#include "Emu/IdManager.h"
|
#include "Emu/IdManager.h"
|
||||||
#include "GLHelpers.h"
|
#include "GLHelpers.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace gl
|
namespace gl
|
||||||
{
|
{
|
||||||
struct compute_task
|
struct compute_task
|
||||||
@ -224,7 +226,7 @@ namespace gl
|
|||||||
m_data_length = data_length;
|
m_data_length = data_length;
|
||||||
|
|
||||||
const auto num_bytes_per_invocation = optimal_group_size * kernel_size * 4;
|
const auto num_bytes_per_invocation = optimal_group_size * kernel_size * 4;
|
||||||
const auto num_bytes_to_process = align(data_length, num_bytes_per_invocation);
|
const auto num_bytes_to_process = utils::align(data_length, num_bytes_per_invocation);
|
||||||
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
|
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
|
||||||
|
|
||||||
if ((num_bytes_to_process + data_offset) > data->size())
|
if ((num_bytes_to_process + data_offset) > data->size())
|
||||||
|
@ -740,7 +740,7 @@ void GLGSRender::load_program_env()
|
|||||||
if (update_fragment_env) m_fragment_env_buffer->reserve_storage_on_heap(128);
|
if (update_fragment_env) m_fragment_env_buffer->reserve_storage_on_heap(128);
|
||||||
if (update_vertex_env) m_vertex_env_buffer->reserve_storage_on_heap(256);
|
if (update_vertex_env) m_vertex_env_buffer->reserve_storage_on_heap(256);
|
||||||
if (update_fragment_texture_env) m_texture_parameters_buffer->reserve_storage_on_heap(256);
|
if (update_fragment_texture_env) m_texture_parameters_buffer->reserve_storage_on_heap(256);
|
||||||
if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_constants_size, 256));
|
if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(utils::align(fragment_constants_size, 256));
|
||||||
if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
|
if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
|
||||||
if (update_raster_env) m_raster_env_ring_buffer->reserve_storage_on_heap(128);
|
if (update_raster_env) m_raster_env_ring_buffer->reserve_storage_on_heap(128);
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "Utilities/mutex.h"
|
#include "Utilities/mutex.h"
|
||||||
#include "Utilities/geometry.h"
|
#include "Utilities/geometry.h"
|
||||||
#include "util/logs.hpp"
|
#include "util/logs.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
#define GL_FRAGMENT_TEXTURES_START 0
|
#define GL_FRAGMENT_TEXTURES_START 0
|
||||||
#define GL_VERTEX_TEXTURES_START (GL_FRAGMENT_TEXTURES_START + 16)
|
#define GL_VERTEX_TEXTURES_START (GL_FRAGMENT_TEXTURES_START + 16)
|
||||||
@ -808,7 +809,7 @@ namespace gl
|
|||||||
virtual std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment)
|
virtual std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment)
|
||||||
{
|
{
|
||||||
u32 offset = m_data_loc;
|
u32 offset = m_data_loc;
|
||||||
if (m_data_loc) offset = align(offset, alignment);
|
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||||
|
|
||||||
if ((offset + alloc_size) > m_size)
|
if ((offset + alloc_size) > m_size)
|
||||||
{
|
{
|
||||||
@ -827,7 +828,7 @@ namespace gl
|
|||||||
}
|
}
|
||||||
|
|
||||||
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
|
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
|
||||||
m_data_loc = align(offset + alloc_size, 256);
|
m_data_loc = utils::align(offset + alloc_size, 256);
|
||||||
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
|
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -897,9 +898,9 @@ namespace gl
|
|||||||
ensure(m_memory_mapping == nullptr);
|
ensure(m_memory_mapping == nullptr);
|
||||||
|
|
||||||
u32 offset = m_data_loc;
|
u32 offset = m_data_loc;
|
||||||
if (m_data_loc) offset = align(offset, 256);
|
if (m_data_loc) offset = utils::align(offset, 256);
|
||||||
|
|
||||||
const u32 block_size = align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
|
const u32 block_size = utils::align(alloc_size + 16, 256); //Overallocate just in case we need to realign base
|
||||||
|
|
||||||
if ((offset + block_size) > m_size)
|
if ((offset + block_size) > m_size)
|
||||||
{
|
{
|
||||||
@ -933,10 +934,10 @@ namespace gl
|
|||||||
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
|
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
|
||||||
{
|
{
|
||||||
u32 offset = m_data_loc;
|
u32 offset = m_data_loc;
|
||||||
if (m_data_loc) offset = align(offset, alignment);
|
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||||
|
|
||||||
u32 padding = (offset - m_data_loc);
|
u32 padding = (offset - m_data_loc);
|
||||||
u32 real_size = align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit
|
u32 real_size = utils::align(padding + alloc_size, alignment); //Ensures we leave the loc pointer aligned after we exit
|
||||||
|
|
||||||
if (real_size > m_mapped_bytes)
|
if (real_size > m_mapped_bytes)
|
||||||
{
|
{
|
||||||
@ -946,10 +947,10 @@ namespace gl
|
|||||||
reserve_storage_on_heap(std::max(real_size, 4096U));
|
reserve_storage_on_heap(std::max(real_size, 4096U));
|
||||||
|
|
||||||
offset = m_data_loc;
|
offset = m_data_loc;
|
||||||
if (m_data_loc) offset = align(offset, alignment);
|
if (m_data_loc) offset = utils::align(offset, alignment);
|
||||||
|
|
||||||
padding = (offset - m_data_loc);
|
padding = (offset - m_data_loc);
|
||||||
real_size = align(padding + alloc_size, alignment);
|
real_size = utils::align(padding + alloc_size, alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_data_loc = offset + real_size;
|
m_data_loc = offset + real_size;
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
#include "../RSXThread.h"
|
#include "../RSXThread.h"
|
||||||
#include "../RSXTexture.h"
|
#include "../RSXTexture.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace gl
|
namespace gl
|
||||||
{
|
{
|
||||||
buffer g_typeless_transfer_buffer;
|
buffer g_typeless_transfer_buffer;
|
||||||
@ -614,8 +616,8 @@ namespace gl
|
|||||||
{
|
{
|
||||||
//Compressed formats have a 4-byte alignment
|
//Compressed formats have a 4-byte alignment
|
||||||
//TODO: Verify that samplers are not affected by the padding
|
//TODO: Verify that samplers are not affected by the padding
|
||||||
width = align(width, 4);
|
width = utils::align(width, 4);
|
||||||
height = align(height, 4);
|
height = utils::align(height, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLenum target;
|
GLenum target;
|
||||||
@ -654,7 +656,7 @@ namespace gl
|
|||||||
{
|
{
|
||||||
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
|
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
|
||||||
|
|
||||||
unpack_settings.row_length(align(dst->width(), 4));
|
unpack_settings.row_length(utils::align(dst->width(), 4));
|
||||||
unpack_settings.apply();
|
unpack_settings.apply();
|
||||||
|
|
||||||
glBindTexture(static_cast<GLenum>(dst->get_target()), dst->id());
|
glBindTexture(static_cast<GLenum>(dst->get_target()), dst->id());
|
||||||
@ -664,7 +666,7 @@ namespace gl
|
|||||||
for (const rsx::subresource_layout& layout : input_layouts)
|
for (const rsx::subresource_layout& layout : input_layouts)
|
||||||
{
|
{
|
||||||
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
|
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
|
||||||
const sizei image_size{ align(layout.width_in_texel, 4), align(layout.height_in_texel, 4) };
|
const sizei image_size{utils::align(layout.width_in_texel, 4), utils::align(layout.height_in_texel, 4)};
|
||||||
|
|
||||||
switch (dst->get_target())
|
switch (dst->get_target())
|
||||||
{
|
{
|
||||||
@ -835,7 +837,7 @@ namespace gl
|
|||||||
void upload_texture(texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
|
void upload_texture(texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
|
||||||
{
|
{
|
||||||
// Calculate staging buffer size
|
// Calculate staging buffer size
|
||||||
const u32 aligned_pitch = align<u32>(dst->pitch(), 4);
|
const u32 aligned_pitch = utils::align<u32>(dst->pitch(), 4);
|
||||||
usz texture_data_sz = dst->depth() * dst->height() * aligned_pitch;
|
usz texture_data_sz = dst->depth() * dst->height() * aligned_pitch;
|
||||||
std::vector<std::byte> data_upload_buf(texture_data_sz);
|
std::vector<std::byte> data_upload_buf(texture_data_sz);
|
||||||
|
|
||||||
|
191
rpcs3/Emu/RSX/GL/GLTextureCache.cpp
Normal file
191
rpcs3/Emu/RSX/GL/GLTextureCache.cpp
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
#include "stdafx.h"
|
||||||
|
#include "Emu/RSX/RSXThread.h"
|
||||||
|
#include "GLTexture.h"
|
||||||
|
#include "GLTextureCache.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
|
namespace gl
|
||||||
|
{
|
||||||
|
void cached_texture_section::finish_flush()
|
||||||
|
{
|
||||||
|
// Free resources
|
||||||
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||||
|
|
||||||
|
const auto valid_range = get_confirmed_range_delta();
|
||||||
|
const u32 valid_offset = valid_range.first;
|
||||||
|
const u32 valid_length = valid_range.second;
|
||||||
|
void *dst = get_ptr(get_section_base() + valid_offset);
|
||||||
|
|
||||||
|
if (!gl::get_driver_caps().ARB_compute_shader_supported)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case gl::texture::type::sbyte:
|
||||||
|
case gl::texture::type::ubyte:
|
||||||
|
{
|
||||||
|
// byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
|
||||||
|
ensure(!pack_unpack_swap_bytes);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case gl::texture::type::uint_24_8:
|
||||||
|
{
|
||||||
|
// Swap bytes on D24S8 does not swap the whole dword, just shuffles the 3 bytes for D24
|
||||||
|
// In this regard, D24S8 is the same structure on both PC and PS3, but the endianness of the whole block is reversed on PS3
|
||||||
|
ensure(pack_unpack_swap_bytes == false);
|
||||||
|
ensure(real_pitch == (width * 4));
|
||||||
|
if (rsx_pitch == real_pitch) [[likely]]
|
||||||
|
{
|
||||||
|
stream_data_to_memory_swapped_u32<true>(dst, dst, valid_length / 4, 4);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const u32 num_rows = utils::align(valid_length, rsx_pitch) / rsx_pitch;
|
||||||
|
u8* data = static_cast<u8*>(dst);
|
||||||
|
for (u32 row = 0; row < num_rows; ++row)
|
||||||
|
{
|
||||||
|
stream_data_to_memory_swapped_u32<true>(data, data, width, 4);
|
||||||
|
data += rsx_pitch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_swizzled())
|
||||||
|
{
|
||||||
|
// This format is completely worthless to CPU processing algorithms where cache lines on die are linear.
|
||||||
|
// If this is happening, usually it means it was not a planned readback (e.g shared pages situation)
|
||||||
|
rsx_log.warning("[Performance warning] CPU readback of swizzled data");
|
||||||
|
|
||||||
|
// Read-modify-write to avoid corrupting already resident memory outside texture region
|
||||||
|
std::vector<u8> tmp_data(rsx_pitch * height);
|
||||||
|
std::memcpy(tmp_data.data(), dst, tmp_data.size());
|
||||||
|
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case gl::texture::type::uint_8_8_8_8:
|
||||||
|
case gl::texture::type::uint_24_8:
|
||||||
|
rsx::convert_linear_swizzle<u32, false>(tmp_data.data(), dst, width, height, rsx_pitch);
|
||||||
|
break;
|
||||||
|
case gl::texture::type::ushort_5_6_5:
|
||||||
|
case gl::texture::type::ushort:
|
||||||
|
rsx::convert_linear_swizzle<u16, false>(tmp_data.data(), dst, width, height, rsx_pitch);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rsx_log.error("Unexpected swizzled texture format 0x%x", static_cast<u32>(format));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context == rsx::texture_upload_context::framebuffer_storage)
|
||||||
|
{
|
||||||
|
// Update memory tag
|
||||||
|
static_cast<gl::render_target*>(vram_texture)->sync_tag();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void texture_cache::copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources) const
|
||||||
|
{
|
||||||
|
const auto dst_bpp = dst_image->pitch() / dst_image->width();
|
||||||
|
const auto dst_aspect = dst_image->aspect();
|
||||||
|
|
||||||
|
for (const auto &slice : sources)
|
||||||
|
{
|
||||||
|
if (!slice.src)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const bool typeless = dst_aspect != slice.src->aspect() ||
|
||||||
|
!formats_are_bitcast_compatible(static_cast<GLenum>(slice.src->get_internal_format()), static_cast<GLenum>(dst_image->get_internal_format()));
|
||||||
|
|
||||||
|
std::unique_ptr<gl::texture> tmp;
|
||||||
|
auto src_image = slice.src;
|
||||||
|
auto src_x = slice.src_x;
|
||||||
|
auto src_y = slice.src_y;
|
||||||
|
auto src_w = slice.src_w;
|
||||||
|
auto src_h = slice.src_h;
|
||||||
|
|
||||||
|
if (slice.xform == rsx::surface_transform::coordinate_transform)
|
||||||
|
{
|
||||||
|
// Dimensions were given in 'dst' space. Work out the real source coordinates
|
||||||
|
const auto src_bpp = slice.src->pitch() / slice.src->width();
|
||||||
|
src_x = (src_x * dst_bpp) / src_bpp;
|
||||||
|
src_w = utils::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto surface = dynamic_cast<gl::render_target*>(slice.src))
|
||||||
|
{
|
||||||
|
surface->transform_samples_to_pixels(src_x, src_w, src_y, src_h);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeless) [[unlikely]]
|
||||||
|
{
|
||||||
|
const auto src_bpp = slice.src->pitch() / slice.src->width();
|
||||||
|
const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp;
|
||||||
|
tmp = std::make_unique<texture>(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, static_cast<GLenum>(dst_image->get_internal_format()));
|
||||||
|
|
||||||
|
src_image = tmp.get();
|
||||||
|
|
||||||
|
// Compute src region in dst format layout
|
||||||
|
const u16 src_w2 = u16(src_w * src_bpp) / dst_bpp;
|
||||||
|
const u16 src_x2 = u16(src_x * src_bpp) / dst_bpp;
|
||||||
|
|
||||||
|
if (src_w2 == slice.dst_w && src_h == slice.dst_h && slice.level == 0)
|
||||||
|
{
|
||||||
|
// Optimization, avoid typeless copy to tmp followed by data copy to dst
|
||||||
|
// Combine the two transfers into one
|
||||||
|
const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } };
|
||||||
|
const coord3u dst_region = { { slice.dst_x, slice.dst_y, slice.dst_z }, { slice.dst_w, slice.dst_h, 1 } };
|
||||||
|
gl::copy_typeless(dst_image, slice.src, dst_region, src_region);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } };
|
||||||
|
const coord3u dst_region = { { src_x2, src_y, 0 }, { src_w2, src_h, 1 } };
|
||||||
|
gl::copy_typeless(src_image, slice.src, dst_region, src_region);
|
||||||
|
|
||||||
|
src_x = src_x2;
|
||||||
|
src_w = src_w2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src_w == slice.dst_w && src_h == slice.dst_h)
|
||||||
|
{
|
||||||
|
glCopyImageSubData(src_image->id(), GL_TEXTURE_2D, 0, src_x, src_y, 0,
|
||||||
|
dst_image->id(), static_cast<GLenum>(dst_image->get_target()), slice.level, slice.dst_x, slice.dst_y, slice.dst_z, src_w, src_h, 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ensure(dst_image->get_target() == gl::texture::target::texture2D);
|
||||||
|
|
||||||
|
auto _blitter = gl::g_hw_blitter;
|
||||||
|
const areai src_rect = { src_x, src_y, src_x + src_w, src_y + src_h };
|
||||||
|
const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h };
|
||||||
|
|
||||||
|
gl::texture* _dst;
|
||||||
|
if (src_image->get_internal_format() == dst_image->get_internal_format() && slice.level == 0)
|
||||||
|
{
|
||||||
|
_dst = dst_image;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tmp = std::make_unique<texture>(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, static_cast<GLenum>(slice.src->get_internal_format()));
|
||||||
|
_dst = tmp.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
_blitter->scale_image(cmd, src_image, _dst,
|
||||||
|
src_rect, dst_rect, false, {});
|
||||||
|
|
||||||
|
if (_dst != dst_image)
|
||||||
|
{
|
||||||
|
// Data cast comes after scaling
|
||||||
|
glCopyImageSubData(tmp->id(), GL_TEXTURE_2D, 0, slice.dst_x, slice.dst_y, 0,
|
||||||
|
dst_image->id(), static_cast<GLenum>(dst_image->get_target()), slice.level, slice.dst_x, slice.dst_y, slice.dst_z, slice.dst_w, slice.dst_h, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -62,7 +62,7 @@ namespace gl
|
|||||||
void init_buffer(const gl::texture* src)
|
void init_buffer(const gl::texture* src)
|
||||||
{
|
{
|
||||||
const u32 vram_size = src->pitch() * src->height();
|
const u32 vram_size = src->pitch() * src->height();
|
||||||
const u32 buffer_size = align(vram_size, 4096);
|
const u32 buffer_size = utils::align(vram_size, 4096);
|
||||||
|
|
||||||
if (pbo)
|
if (pbo)
|
||||||
{
|
{
|
||||||
@ -333,86 +333,7 @@ namespace gl
|
|||||||
return glMapBufferRange(GL_PIXEL_PACK_BUFFER, offset, size, GL_MAP_READ_BIT);
|
return glMapBufferRange(GL_PIXEL_PACK_BUFFER, offset, size, GL_MAP_READ_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void finish_flush()
|
void finish_flush();
|
||||||
{
|
|
||||||
// Free resources
|
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
|
||||||
|
|
||||||
const auto valid_range = get_confirmed_range_delta();
|
|
||||||
const u32 valid_offset = valid_range.first;
|
|
||||||
const u32 valid_length = valid_range.second;
|
|
||||||
void *dst = get_ptr(get_section_base() + valid_offset);
|
|
||||||
|
|
||||||
if (!gl::get_driver_caps().ARB_compute_shader_supported)
|
|
||||||
{
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case gl::texture::type::sbyte:
|
|
||||||
case gl::texture::type::ubyte:
|
|
||||||
{
|
|
||||||
// byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
|
|
||||||
ensure(!pack_unpack_swap_bytes);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case gl::texture::type::uint_24_8:
|
|
||||||
{
|
|
||||||
// Swap bytes on D24S8 does not swap the whole dword, just shuffles the 3 bytes for D24
|
|
||||||
// In this regard, D24S8 is the same structure on both PC and PS3, but the endianness of the whole block is reversed on PS3
|
|
||||||
ensure(pack_unpack_swap_bytes == false);
|
|
||||||
ensure(real_pitch == (width * 4));
|
|
||||||
if (rsx_pitch == real_pitch) [[likely]]
|
|
||||||
{
|
|
||||||
stream_data_to_memory_swapped_u32<true>(dst, dst, valid_length / 4, 4);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const u32 num_rows = align(valid_length, rsx_pitch) / rsx_pitch;
|
|
||||||
u8* data = static_cast<u8*>(dst);
|
|
||||||
for (u32 row = 0; row < num_rows; ++row)
|
|
||||||
{
|
|
||||||
stream_data_to_memory_swapped_u32<true>(data, data, width, 4);
|
|
||||||
data += rsx_pitch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_swizzled())
|
|
||||||
{
|
|
||||||
// This format is completely worthless to CPU processing algorithms where cache lines on die are linear.
|
|
||||||
// If this is happening, usually it means it was not a planned readback (e.g shared pages situation)
|
|
||||||
rsx_log.warning("[Performance warning] CPU readback of swizzled data");
|
|
||||||
|
|
||||||
// Read-modify-write to avoid corrupting already resident memory outside texture region
|
|
||||||
std::vector<u8> tmp_data(rsx_pitch * height);
|
|
||||||
std::memcpy(tmp_data.data(), dst, tmp_data.size());
|
|
||||||
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case gl::texture::type::uint_8_8_8_8:
|
|
||||||
case gl::texture::type::uint_24_8:
|
|
||||||
rsx::convert_linear_swizzle<u32, false>(tmp_data.data(), dst, width, height, rsx_pitch);
|
|
||||||
break;
|
|
||||||
case gl::texture::type::ushort_5_6_5:
|
|
||||||
case gl::texture::type::ushort:
|
|
||||||
rsx::convert_linear_swizzle<u16, false>(tmp_data.data(), dst, width, height, rsx_pitch);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rsx_log.error("Unexpected swizzled texture format 0x%x", static_cast<u32>(format));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (context == rsx::texture_upload_context::framebuffer_storage)
|
|
||||||
{
|
|
||||||
// Update memory tag
|
|
||||||
static_cast<gl::render_target*>(vram_texture)->sync_tag();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Misc
|
* Misc
|
||||||
@ -637,106 +558,7 @@ namespace gl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources) const
|
void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources) const;
|
||||||
{
|
|
||||||
const auto dst_bpp = dst_image->pitch() / dst_image->width();
|
|
||||||
const auto dst_aspect = dst_image->aspect();
|
|
||||||
|
|
||||||
for (const auto &slice : sources)
|
|
||||||
{
|
|
||||||
if (!slice.src)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const bool typeless = dst_aspect != slice.src->aspect() ||
|
|
||||||
!formats_are_bitcast_compatible(static_cast<GLenum>(slice.src->get_internal_format()), static_cast<GLenum>(dst_image->get_internal_format()));
|
|
||||||
|
|
||||||
std::unique_ptr<gl::texture> tmp;
|
|
||||||
auto src_image = slice.src;
|
|
||||||
auto src_x = slice.src_x;
|
|
||||||
auto src_y = slice.src_y;
|
|
||||||
auto src_w = slice.src_w;
|
|
||||||
auto src_h = slice.src_h;
|
|
||||||
|
|
||||||
if (slice.xform == rsx::surface_transform::coordinate_transform)
|
|
||||||
{
|
|
||||||
// Dimensions were given in 'dst' space. Work out the real source coordinates
|
|
||||||
const auto src_bpp = slice.src->pitch() / slice.src->width();
|
|
||||||
src_x = (src_x * dst_bpp) / src_bpp;
|
|
||||||
src_w = ::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (auto surface = dynamic_cast<gl::render_target*>(slice.src))
|
|
||||||
{
|
|
||||||
surface->transform_samples_to_pixels(src_x, src_w, src_y, src_h);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeless) [[unlikely]]
|
|
||||||
{
|
|
||||||
const auto src_bpp = slice.src->pitch() / slice.src->width();
|
|
||||||
const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp;
|
|
||||||
tmp = std::make_unique<texture>(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, static_cast<GLenum>(dst_image->get_internal_format()));
|
|
||||||
|
|
||||||
src_image = tmp.get();
|
|
||||||
|
|
||||||
// Compute src region in dst format layout
|
|
||||||
const u16 src_w2 = u16(src_w * src_bpp) / dst_bpp;
|
|
||||||
const u16 src_x2 = u16(src_x * src_bpp) / dst_bpp;
|
|
||||||
|
|
||||||
if (src_w2 == slice.dst_w && src_h == slice.dst_h && slice.level == 0)
|
|
||||||
{
|
|
||||||
// Optimization, avoid typeless copy to tmp followed by data copy to dst
|
|
||||||
// Combine the two transfers into one
|
|
||||||
const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } };
|
|
||||||
const coord3u dst_region = { { slice.dst_x, slice.dst_y, slice.dst_z }, { slice.dst_w, slice.dst_h, 1 } };
|
|
||||||
gl::copy_typeless(dst_image, slice.src, dst_region, src_region);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } };
|
|
||||||
const coord3u dst_region = { { src_x2, src_y, 0 }, { src_w2, src_h, 1 } };
|
|
||||||
gl::copy_typeless(src_image, slice.src, dst_region, src_region);
|
|
||||||
|
|
||||||
src_x = src_x2;
|
|
||||||
src_w = src_w2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (src_w == slice.dst_w && src_h == slice.dst_h)
|
|
||||||
{
|
|
||||||
glCopyImageSubData(src_image->id(), GL_TEXTURE_2D, 0, src_x, src_y, 0,
|
|
||||||
dst_image->id(), static_cast<GLenum>(dst_image->get_target()), slice.level, slice.dst_x, slice.dst_y, slice.dst_z, src_w, src_h, 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ensure(dst_image->get_target() == gl::texture::target::texture2D);
|
|
||||||
|
|
||||||
auto _blitter = gl::g_hw_blitter;
|
|
||||||
const areai src_rect = { src_x, src_y, src_x + src_w, src_y + src_h };
|
|
||||||
const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h };
|
|
||||||
|
|
||||||
gl::texture* _dst;
|
|
||||||
if (src_image->get_internal_format() == dst_image->get_internal_format() && slice.level == 0)
|
|
||||||
{
|
|
||||||
_dst = dst_image;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp = std::make_unique<texture>(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, static_cast<GLenum>(slice.src->get_internal_format()));
|
|
||||||
_dst = tmp.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
_blitter->scale_image(cmd, src_image, _dst,
|
|
||||||
src_rect, dst_rect, false, {});
|
|
||||||
|
|
||||||
if (_dst != dst_image)
|
|
||||||
{
|
|
||||||
// Data cast comes after scaling
|
|
||||||
glCopyImageSubData(tmp->id(), GL_TEXTURE_2D, 0, slice.dst_x, slice.dst_y, 0,
|
|
||||||
dst_image->id(), static_cast<GLenum>(dst_image->get_target()), slice.level, slice.dst_x, slice.dst_y, slice.dst_z, slice.dst_w, slice.dst_h, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gl::texture* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
|
gl::texture* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
|
||||||
{
|
{
|
||||||
|
@ -139,6 +139,52 @@ namespace rsx
|
|||||||
fmt::throw_exception("rsx::get_address(offset=0x%x, location=0x%x): %s%s", offset, location, msg, src_loc{line, col, file, func});
|
fmt::throw_exception("rsx::get_address(offset=0x%x, location=0x%x): %s%s", offset, location, msg, src_loc{line, col, file, func});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count) const
|
||||||
|
{
|
||||||
|
if (single_vertex)
|
||||||
|
{
|
||||||
|
return { 0, 1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 max_index = (first + count) - 1;
|
||||||
|
u32 _max_index = 0;
|
||||||
|
u32 _min_index = first;
|
||||||
|
|
||||||
|
for (const auto &attrib : locations)
|
||||||
|
{
|
||||||
|
if (attrib.frequency <= 1) [[likely]]
|
||||||
|
{
|
||||||
|
_max_index = max_index;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (attrib.modulo)
|
||||||
|
{
|
||||||
|
if (max_index >= attrib.frequency)
|
||||||
|
{
|
||||||
|
// Actually uses the modulo operator
|
||||||
|
_min_index = 0;
|
||||||
|
_max_index = attrib.frequency - 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Same as having no modulo
|
||||||
|
_max_index = max_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Division operator
|
||||||
|
_min_index = std::min(_min_index, first / attrib.frequency);
|
||||||
|
_max_index = std::max<u32>(_max_index, utils::aligned_div(max_index, attrib.frequency));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure(_max_index >= _min_index);
|
||||||
|
return { _min_index, (_max_index - _min_index) + 1 };
|
||||||
|
}
|
||||||
|
|
||||||
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size)
|
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
@ -2521,7 +2567,7 @@ namespace rsx
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Some cases do not need full delay
|
// Some cases do not need full delay
|
||||||
remaining = ::aligned_div(remaining, div);
|
remaining = utils::aligned_div(remaining, div);
|
||||||
const u64 until = get_system_time() + remaining;
|
const u64 until = get_system_time() + remaining;
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
|
@ -246,51 +246,7 @@ namespace rsx
|
|||||||
rsx::simple_array<interleaved_attribute_t> locations;
|
rsx::simple_array<interleaved_attribute_t> locations;
|
||||||
|
|
||||||
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
|
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
|
||||||
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const
|
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const;
|
||||||
{
|
|
||||||
if (single_vertex)
|
|
||||||
{
|
|
||||||
return { 0, 1 };
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 max_index = (first + count) - 1;
|
|
||||||
u32 _max_index = 0;
|
|
||||||
u32 _min_index = first;
|
|
||||||
|
|
||||||
for (const auto &attrib : locations)
|
|
||||||
{
|
|
||||||
if (attrib.frequency <= 1) [[likely]]
|
|
||||||
{
|
|
||||||
_max_index = max_index;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (attrib.modulo)
|
|
||||||
{
|
|
||||||
if (max_index >= attrib.frequency)
|
|
||||||
{
|
|
||||||
// Actually uses the modulo operator
|
|
||||||
_min_index = 0;
|
|
||||||
_max_index = attrib.frequency - 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Same as having no modulo
|
|
||||||
_max_index = max_index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Division operator
|
|
||||||
_min_index = std::min(_min_index, first / attrib.frequency);
|
|
||||||
_max_index = std::max<u32>(_max_index, aligned_div(max_index, attrib.frequency));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure(_max_index >= _min_index);
|
|
||||||
return { _min_index, (_max_index - _min_index) + 1 };
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum attribute_buffer_placement : u8
|
enum attribute_buffer_placement : u8
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#include "Utilities/StrUtil.h"
|
#include "Utilities/StrUtil.h"
|
||||||
#include "Emu/IdManager.h"
|
#include "Emu/IdManager.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
#define VK_MAX_COMPUTE_TASKS 4096 // Max number of jobs per frame
|
#define VK_MAX_COMPUTE_TASKS 4096 // Max number of jobs per frame
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
@ -296,7 +298,7 @@ namespace vk
|
|||||||
"%vars"
|
"%vars"
|
||||||
"\n";
|
"\n";
|
||||||
|
|
||||||
const auto parameters_size = align(push_constants_size, 16) / 16;
|
const auto parameters_size = utils::align(push_constants_size, 16) / 16;
|
||||||
const std::pair<std::string, std::string> syntax_replace[] =
|
const std::pair<std::string, std::string> syntax_replace[] =
|
||||||
{
|
{
|
||||||
{ "%ws", std::to_string(optimal_group_size) },
|
{ "%ws", std::to_string(optimal_group_size) },
|
||||||
@ -943,7 +945,7 @@ namespace vk
|
|||||||
set_parameters(cmd);
|
set_parameters(cmd);
|
||||||
|
|
||||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||||
const u32 linear_invocations = aligned_div(data_length, num_bytes_per_invocation);
|
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||||
compute_task::run(cmd, linear_invocations);
|
compute_task::run(cmd, linear_invocations);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -997,7 +999,7 @@ namespace vk
|
|||||||
word_count = num_words;
|
word_count = num_words;
|
||||||
block_length = num_words * 4;
|
block_length = num_words * 4;
|
||||||
|
|
||||||
const u32 linear_invocations = aligned_div(word_count, optimal_group_size);
|
const u32 linear_invocations = utils::aligned_div(word_count, optimal_group_size);
|
||||||
compute_task::run(cmd, linear_invocations);
|
compute_task::run(cmd, linear_invocations);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
#include "VKResourceManager.h"
|
#include "VKResourceManager.h"
|
||||||
#include "VKDMA.h"
|
#include "VKDMA.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
static constexpr usz s_dma_block_length = 0x01000000;
|
static constexpr usz s_dma_block_length = 0x01000000;
|
||||||
@ -85,7 +87,7 @@ namespace vk
|
|||||||
{
|
{
|
||||||
if (!inheritance_info.parent)
|
if (!inheritance_info.parent)
|
||||||
{
|
{
|
||||||
const u32 start = align(range.start, s_page_size);
|
const u32 start = utils::align(range.start, s_page_size);
|
||||||
const u32 end = ((range.end + 1) & s_page_align);
|
const u32 end = ((range.end + 1) & s_page_align);
|
||||||
|
|
||||||
for (u32 page = start; page < end; page += s_page_size)
|
for (u32 page = start; page < end; page += s_page_size)
|
||||||
@ -259,7 +261,7 @@ namespace vk
|
|||||||
}
|
}
|
||||||
|
|
||||||
dma_block* block_head = nullptr;
|
dma_block* block_head = nullptr;
|
||||||
auto block_end = align(limit, s_dma_block_length);
|
auto block_end = utils::align(limit, s_dma_block_length);
|
||||||
|
|
||||||
// Reverse scan to try and find the minimum required length in case of other chaining
|
// Reverse scan to try and find the minimum required length in case of other chaining
|
||||||
for (auto block = last_block; block != first_block; block -= s_dma_block_length)
|
for (auto block = last_block; block != first_block; block -= s_dma_block_length)
|
||||||
|
@ -132,7 +132,7 @@ namespace vk
|
|||||||
{
|
{
|
||||||
// Create new heap. All sizes are aligned up by 64M, upto 1GiB
|
// Create new heap. All sizes are aligned up by 64M, upto 1GiB
|
||||||
const usz size_limit = 1024 * 0x100000;
|
const usz size_limit = 1024 * 0x100000;
|
||||||
const usz aligned_new_size = align(m_size + size, 64 * 0x100000);
|
const usz aligned_new_size = utils::align(m_size + size, 64 * 0x100000);
|
||||||
|
|
||||||
if (aligned_new_size >= size_limit)
|
if (aligned_new_size >= size_limit)
|
||||||
{
|
{
|
||||||
@ -351,8 +351,8 @@ namespace vk
|
|||||||
{
|
{
|
||||||
auto create_texture = [&]()
|
auto create_texture = [&]()
|
||||||
{
|
{
|
||||||
u32 new_width = align(requested_width, 1024u);
|
u32 new_width = utils::align(requested_width, 1024u);
|
||||||
u32 new_height = align(requested_height, 1024u);
|
u32 new_height = utils::align(requested_height, 1024u);
|
||||||
|
|
||||||
return new vk::image(*g_current_renderer, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
return new vk::image(*g_current_renderer, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||||
VK_IMAGE_TYPE_2D, format, new_width, new_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
VK_IMAGE_TYPE_2D, format, new_width, new_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
@ -388,7 +388,7 @@ namespace vk
|
|||||||
if (!g_scratch_buffer)
|
if (!g_scratch_buffer)
|
||||||
{
|
{
|
||||||
// Choose optimal size
|
// Choose optimal size
|
||||||
const u64 alloc_size = std::max<u64>(64 * 0x100000, align(min_required_size, 0x100000));
|
const u64 alloc_size = std::max<u64>(64 * 0x100000, utils::align(min_required_size, 0x100000));
|
||||||
|
|
||||||
g_scratch_buffer = std::make_unique<vk::buffer>(*g_current_renderer, alloc_size,
|
g_scratch_buffer = std::make_unique<vk::buffer>(*g_current_renderer, alloc_size,
|
||||||
g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
#include "VKGSRender.h"
|
#include "VKGSRender.h"
|
||||||
#include "Emu/Cell/Modules/cellVideoOut.h"
|
#include "Emu/Cell/Modules/cellVideoOut.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
void VKGSRender::reinitialize_swapchain()
|
void VKGSRender::reinitialize_swapchain()
|
||||||
{
|
{
|
||||||
m_swapchain_dims.width = m_frame->client_width();
|
m_swapchain_dims.width = m_frame->client_width();
|
||||||
@ -651,7 +653,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
|||||||
|
|
||||||
const usz sshot_size = buffer_height * buffer_width * 4;
|
const usz sshot_size = buffer_height * buffer_width * 4;
|
||||||
|
|
||||||
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
vk::buffer sshot_vkbuf(*m_device, utils::align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
|
||||||
|
|
||||||
VkBufferImageCopy copy_info;
|
VkBufferImageCopy copy_info;
|
||||||
|
@ -131,8 +131,8 @@ namespace vk
|
|||||||
multisampled = msaa_image;
|
multisampled = msaa_image;
|
||||||
resolve = resolve_image;
|
resolve = resolve_image;
|
||||||
|
|
||||||
const u32 invocations_x = align(resolve_image->width(), cs_wave_x) / cs_wave_x;
|
const u32 invocations_x = utils::align(resolve_image->width(), cs_wave_x) / cs_wave_x;
|
||||||
const u32 invocations_y = align(resolve_image->height(), cs_wave_y) / cs_wave_y;
|
const u32 invocations_y = utils::align(resolve_image->height(), cs_wave_y) / cs_wave_y;
|
||||||
|
|
||||||
compute_task::run(cmd, invocations_x, invocations_y, 1);
|
compute_task::run(cmd, invocations_x, invocations_y, 1);
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
#include "VKRenderPass.h"
|
#include "VKRenderPass.h"
|
||||||
#include "VKRenderTargets.h"
|
#include "VKRenderTargets.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
VkComponentMapping default_component_map()
|
VkComponentMapping default_component_map()
|
||||||
@ -89,7 +91,7 @@ namespace vk
|
|||||||
ensure(dst->size() >= allocation_end);
|
ensure(dst->size() >= allocation_end);
|
||||||
|
|
||||||
const auto data_offset = u32(region.bufferOffset);
|
const auto data_offset = u32(region.bufferOffset);
|
||||||
const auto z32_offset = align<u32>(data_offset + packed16_length, 256);
|
const auto z32_offset = utils::align<u32>(data_offset + packed16_length, 256);
|
||||||
|
|
||||||
// 1. Copy the depth to buffer
|
// 1. Copy the depth to buffer
|
||||||
VkBufferImageCopy region2;
|
VkBufferImageCopy region2;
|
||||||
@ -135,8 +137,8 @@ namespace vk
|
|||||||
ensure(dst->size() >= allocation_end);
|
ensure(dst->size() >= allocation_end);
|
||||||
|
|
||||||
const auto data_offset = u32(region.bufferOffset);
|
const auto data_offset = u32(region.bufferOffset);
|
||||||
const auto z_offset = align<u32>(data_offset + packed_length, 256);
|
const auto z_offset = utils::align<u32>(data_offset + packed_length, 256);
|
||||||
const auto s_offset = align<u32>(z_offset + in_depth_size, 256);
|
const auto s_offset = utils::align<u32>(z_offset + in_depth_size, 256);
|
||||||
|
|
||||||
// 1. Copy the depth and stencil blocks to separate banks
|
// 1. Copy the depth and stencil blocks to separate banks
|
||||||
VkBufferImageCopy sub_regions[2];
|
VkBufferImageCopy sub_regions[2];
|
||||||
@ -225,7 +227,7 @@ namespace vk
|
|||||||
ensure(src->size() >= allocation_end);
|
ensure(src->size() >= allocation_end);
|
||||||
|
|
||||||
const auto data_offset = u32(region.bufferOffset);
|
const auto data_offset = u32(region.bufferOffset);
|
||||||
const auto z32_offset = align<u32>(data_offset + packed16_length, 256);
|
const auto z32_offset = utils::align<u32>(data_offset + packed16_length, 256);
|
||||||
|
|
||||||
// 1. Pre-compute barrier
|
// 1. Pre-compute barrier
|
||||||
vk::insert_buffer_memory_barrier(cmd, src->value, z32_offset, packed32_length,
|
vk::insert_buffer_memory_barrier(cmd, src->value, z32_offset, packed32_length,
|
||||||
@ -260,8 +262,8 @@ namespace vk
|
|||||||
ensure(src->size() >= allocation_end); // "Out of memory (compute heap). Lower your resolution scale setting."
|
ensure(src->size() >= allocation_end); // "Out of memory (compute heap). Lower your resolution scale setting."
|
||||||
|
|
||||||
const auto data_offset = u32(region.bufferOffset);
|
const auto data_offset = u32(region.bufferOffset);
|
||||||
const auto z_offset = align<u32>(data_offset + packed_length, 256);
|
const auto z_offset = utils::align<u32>(data_offset + packed_length, 256);
|
||||||
const auto s_offset = align<u32>(z_offset + in_depth_size, 256);
|
const auto s_offset = utils::align<u32>(z_offset + in_depth_size, 256);
|
||||||
|
|
||||||
// Zero out the stencil block
|
// Zero out the stencil block
|
||||||
vkCmdFillBuffer(cmd, src->value, s_offset, in_stencil_size, 0);
|
vkCmdFillBuffer(cmd, src->value, s_offset, in_stencil_size, 0);
|
||||||
@ -821,7 +823,7 @@ namespace vk
|
|||||||
const auto src_offset = section.bufferOffset;
|
const auto src_offset = section.bufferOffset;
|
||||||
|
|
||||||
// Align output to 128-byte boundary to keep some drivers happy
|
// Align output to 128-byte boundary to keep some drivers happy
|
||||||
dst_offset = align(dst_offset, 128);
|
dst_offset = utils::align(dst_offset, 128);
|
||||||
|
|
||||||
u32 data_length = 0;
|
u32 data_length = 0;
|
||||||
for (unsigned i = 0, j = packet.first; i < packet.second; ++i, ++j)
|
for (unsigned i = 0, j = packet.first; i < packet.second; ++i, ++j)
|
||||||
@ -930,7 +932,7 @@ namespace vk
|
|||||||
if (layout.level == 0)
|
if (layout.level == 0)
|
||||||
{
|
{
|
||||||
// Align mip0 on a 128-byte boundary
|
// Align mip0 on a 128-byte boundary
|
||||||
scratch_offset = align(scratch_offset, 128);
|
scratch_offset = utils::align(scratch_offset, 128);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy from upload heap to scratch mem
|
// Copy from upload heap to scratch mem
|
||||||
|
360
rpcs3/Emu/RSX/VK/VKTextureCache.cpp
Normal file
360
rpcs3/Emu/RSX/VK/VKTextureCache.cpp
Normal file
@ -0,0 +1,360 @@
|
|||||||
|
#include "stdafx.h"
|
||||||
|
#include "VKGSRender.h"
|
||||||
|
#include "VKTextureCache.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
|
namespace vk
|
||||||
|
{
|
||||||
|
void cached_texture_section::dma_transfer(vk::command_buffer& cmd, vk::image* src, const areai& src_area, const utils::address_range& valid_range, u32 pitch)
|
||||||
|
{
|
||||||
|
ensure(src->samples() == 1);
|
||||||
|
|
||||||
|
if (!m_device)
|
||||||
|
{
|
||||||
|
m_device = &cmd.get_command_pool().get_owner();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dma_fence)
|
||||||
|
{
|
||||||
|
// NOTE: This can be reached if previously synchronized, or a special path happens.
|
||||||
|
// If a hard flush occurred while this surface was flush_always the cache would have reset its protection afterwards.
|
||||||
|
// DMA resource would still be present but already used to flush previously.
|
||||||
|
vk::get_resource_manager()->dispose(dma_fence);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vk::is_renderpass_open(cmd))
|
||||||
|
{
|
||||||
|
vk::end_renderpass(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||||
|
|
||||||
|
const auto internal_bpp = vk::get_format_texel_width(src->format());
|
||||||
|
const auto transfer_width = static_cast<u32>(src_area.width());
|
||||||
|
const auto transfer_height = static_cast<u32>(src_area.height());
|
||||||
|
real_pitch = internal_bpp * transfer_width;
|
||||||
|
rsx_pitch = pitch;
|
||||||
|
|
||||||
|
const bool require_format_conversion = !!(src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || src->format() == VK_FORMAT_D32_SFLOAT;
|
||||||
|
if (require_format_conversion || pack_unpack_swap_bytes)
|
||||||
|
{
|
||||||
|
const auto section_length = valid_range.length();
|
||||||
|
const auto transfer_pitch = real_pitch;
|
||||||
|
const auto task_length = transfer_pitch * src_area.height();
|
||||||
|
|
||||||
|
auto working_buffer = vk::get_scratch_buffer(task_length);
|
||||||
|
auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length);
|
||||||
|
|
||||||
|
VkBufferImageCopy region = {};
|
||||||
|
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
||||||
|
region.imageOffset = { src_area.x1, src_area.y1, 0 };
|
||||||
|
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||||
|
vk::copy_image_to_buffer(cmd, src, working_buffer, region, (require_format_conversion && pack_unpack_swap_bytes));
|
||||||
|
|
||||||
|
// NOTE: For depth/stencil formats, copying to buffer and byteswap are combined into one step above
|
||||||
|
if (pack_unpack_swap_bytes && !require_format_conversion)
|
||||||
|
{
|
||||||
|
const auto texel_layout = vk::get_format_element_size(src->format());
|
||||||
|
const auto elem_size = texel_layout.first;
|
||||||
|
vk::cs_shuffle_base *shuffle_kernel;
|
||||||
|
|
||||||
|
if (elem_size == 2)
|
||||||
|
{
|
||||||
|
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_16>();
|
||||||
|
}
|
||||||
|
else if (elem_size == 4)
|
||||||
|
{
|
||||||
|
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ensure(get_context() == rsx::texture_upload_context::dma);
|
||||||
|
shuffle_kernel = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shuffle_kernel)
|
||||||
|
{
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||||
|
|
||||||
|
shuffle_kernel->run(cmd, working_buffer, task_length);
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rsx_pitch == real_pitch) [[likely]]
|
||||||
|
{
|
||||||
|
VkBufferCopy copy = {};
|
||||||
|
copy.dstOffset = final_mapping.first;
|
||||||
|
copy.size = section_length;
|
||||||
|
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, 1, ©);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (context != rsx::texture_upload_context::dma)
|
||||||
|
{
|
||||||
|
// Partial load for the bits outside the existing image
|
||||||
|
// NOTE: A true DMA section would have been prepped beforehand
|
||||||
|
// TODO: Parial range load/flush
|
||||||
|
vk::load_dma(valid_range.start, section_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<VkBufferCopy> copy;
|
||||||
|
copy.reserve(transfer_height);
|
||||||
|
|
||||||
|
u32 dst_offset = final_mapping.first;
|
||||||
|
u32 src_offset = 0;
|
||||||
|
|
||||||
|
for (unsigned row = 0; row < transfer_height; ++row)
|
||||||
|
{
|
||||||
|
copy.push_back({ src_offset, dst_offset, transfer_pitch });
|
||||||
|
src_offset += real_pitch;
|
||||||
|
dst_offset += rsx_pitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, transfer_height, copy.data());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
VkBufferImageCopy region = {};
|
||||||
|
region.bufferRowLength = (rsx_pitch / internal_bpp);
|
||||||
|
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
||||||
|
region.imageOffset = { src_area.x1, src_area.y1, 0 };
|
||||||
|
region.imageExtent = { transfer_width, transfer_height, 1 };
|
||||||
|
|
||||||
|
auto mapping = vk::map_dma(cmd, valid_range.start, valid_range.length());
|
||||||
|
region.bufferOffset = mapping.first;
|
||||||
|
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, mapping.second->value, 1, ®ion);
|
||||||
|
}
|
||||||
|
|
||||||
|
src->pop_layout(cmd);
|
||||||
|
|
||||||
|
// Create event object for this transfer and queue signal op
|
||||||
|
dma_fence = std::make_unique<vk::event>(*m_device);
|
||||||
|
dma_fence->signal(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||||
|
|
||||||
|
// Set cb flag for queued dma operations
|
||||||
|
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
|
||||||
|
|
||||||
|
if (get_context() == rsx::texture_upload_context::dma)
|
||||||
|
{
|
||||||
|
// Save readback hint in case transformation is required later
|
||||||
|
switch (internal_bpp)
|
||||||
|
{
|
||||||
|
case 2:
|
||||||
|
gcm_format = CELL_GCM_TEXTURE_R5G6B5;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
default:
|
||||||
|
gcm_format = CELL_GCM_TEXTURE_A8R8G8B8;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized = true;
|
||||||
|
sync_timestamp = get_system_time();
|
||||||
|
}
|
||||||
|
|
||||||
|
void texture_cache::copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer) const
|
||||||
|
{
|
||||||
|
const auto dst_aspect = dst->aspect();
|
||||||
|
const auto dst_bpp = vk::get_format_texel_width(dst->format());
|
||||||
|
|
||||||
|
for (const auto §ion : sections_to_transfer)
|
||||||
|
{
|
||||||
|
if (!section.src)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const bool typeless = section.src->aspect() != dst_aspect ||
|
||||||
|
!formats_are_bitcast_compatible(dst, section.src);
|
||||||
|
|
||||||
|
// Avoid inserting unnecessary barrier GENERAL->TRANSFER_SRC->GENERAL in active render targets
|
||||||
|
const auto preferred_layout = (section.src->current_layout != VK_IMAGE_LAYOUT_GENERAL) ?
|
||||||
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
|
||||||
|
section.src->push_layout(cmd, preferred_layout);
|
||||||
|
|
||||||
|
auto src_image = section.src;
|
||||||
|
auto src_x = section.src_x;
|
||||||
|
auto src_y = section.src_y;
|
||||||
|
auto src_w = section.src_w;
|
||||||
|
auto src_h = section.src_h;
|
||||||
|
|
||||||
|
rsx::flags32_t transform = section.xform;
|
||||||
|
if (section.xform == rsx::surface_transform::coordinate_transform)
|
||||||
|
{
|
||||||
|
// Dimensions were given in 'dst' space. Work out the real source coordinates
|
||||||
|
const auto src_bpp = vk::get_format_texel_width(section.src->format());
|
||||||
|
src_x = (src_x * dst_bpp) / src_bpp;
|
||||||
|
src_w = utils::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
||||||
|
|
||||||
|
transform &= ~(rsx::surface_transform::coordinate_transform);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto surface = dynamic_cast<vk::render_target*>(section.src))
|
||||||
|
{
|
||||||
|
surface->transform_samples_to_pixels(src_x, src_w, src_y, src_h);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeless) [[unlikely]]
|
||||||
|
{
|
||||||
|
const auto src_bpp = vk::get_format_texel_width(section.src->format());
|
||||||
|
const u16 convert_w = u16(src_w * src_bpp) / dst_bpp;
|
||||||
|
const u16 convert_x = u16(src_x * src_bpp) / dst_bpp;
|
||||||
|
|
||||||
|
if (convert_w == section.dst_w && src_h == section.dst_h &&
|
||||||
|
transform == rsx::surface_transform::identity &&
|
||||||
|
section.level == 0 && section.dst_z == 0)
|
||||||
|
{
|
||||||
|
// Optimization to avoid double transfer
|
||||||
|
// TODO: Handle level and layer offsets
|
||||||
|
const areai src_rect = coordi{{ src_x, src_y }, { src_w, src_h }};
|
||||||
|
const areai dst_rect = coordi{{ section.dst_x, section.dst_y }, { section.dst_w, section.dst_h }};
|
||||||
|
vk::copy_image_typeless(cmd, section.src, dst, src_rect, dst_rect, 1);
|
||||||
|
|
||||||
|
section.src->pop_layout(cmd);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
src_image = vk::get_typeless_helper(dst->format(), dst->format_class(), convert_x + convert_w, src_y + src_h);
|
||||||
|
src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||||
|
|
||||||
|
const areai src_rect = coordi{{ src_x, src_y }, { src_w, src_h }};
|
||||||
|
const areai dst_rect = coordi{{ convert_x, src_y }, { convert_w, src_h }};
|
||||||
|
vk::copy_image_typeless(cmd, section.src, src_image, src_rect, dst_rect, 1);
|
||||||
|
src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||||
|
|
||||||
|
src_x = convert_x;
|
||||||
|
src_w = convert_w;
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure(src_image->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src_image->current_layout == VK_IMAGE_LAYOUT_GENERAL);
|
||||||
|
|
||||||
|
// Final aspect mask of the 'final' transfer source
|
||||||
|
const auto new_src_aspect = src_image->aspect();
|
||||||
|
|
||||||
|
if (src_w == section.dst_w && src_h == section.dst_h && transform == rsx::surface_transform::identity) [[likely]]
|
||||||
|
{
|
||||||
|
VkImageCopy copy_rgn;
|
||||||
|
copy_rgn.srcOffset = { src_x, src_y, 0 };
|
||||||
|
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
|
||||||
|
copy_rgn.dstSubresource = { dst_aspect, 0, 0, 1 };
|
||||||
|
copy_rgn.srcSubresource = { new_src_aspect, 0, 0, 1 };
|
||||||
|
copy_rgn.extent = { src_w, src_h, 1 };
|
||||||
|
|
||||||
|
if (dst->info.imageType == VK_IMAGE_TYPE_3D)
|
||||||
|
{
|
||||||
|
copy_rgn.dstOffset.z = section.dst_z;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
copy_rgn.dstSubresource.baseArrayLayer = section.dst_z;
|
||||||
|
copy_rgn.dstSubresource.mipLevel = section.level;
|
||||||
|
}
|
||||||
|
|
||||||
|
vkCmdCopyImage(cmd, src_image->value, src_image->current_layout, dst->value, dst->current_layout, 1, ©_rgn);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ensure(section.dst_z == 0);
|
||||||
|
|
||||||
|
u16 dst_x = section.dst_x, dst_y = section.dst_y;
|
||||||
|
vk::image* _dst;
|
||||||
|
|
||||||
|
if (src_image->info.format == dst->info.format && section.level == 0) [[likely]]
|
||||||
|
{
|
||||||
|
_dst = dst;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Either a bitcast is required or a scale+copy to mipmap level
|
||||||
|
_dst = vk::get_typeless_helper(src_image->format(), src_image->format_class(), dst->width(), dst->height() * 2);
|
||||||
|
_dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (transform == rsx::surface_transform::identity)
|
||||||
|
{
|
||||||
|
vk::copy_scaled_image(cmd, src_image, _dst,
|
||||||
|
coordi{ { src_x, src_y }, { src_w, src_h } },
|
||||||
|
coordi{ { section.dst_x, section.dst_y }, { section.dst_w, section.dst_h } },
|
||||||
|
1, src_image->format() == _dst->format(),
|
||||||
|
VK_FILTER_NEAREST);
|
||||||
|
}
|
||||||
|
else if (transform == rsx::surface_transform::argb_to_bgra)
|
||||||
|
{
|
||||||
|
VkBufferImageCopy copy{};
|
||||||
|
copy.imageExtent = { src_w, src_h, 1 };
|
||||||
|
copy.imageOffset = { src_x, src_y, 0 };
|
||||||
|
copy.imageSubresource = { src_image->aspect(), 0, 0, 1 };
|
||||||
|
|
||||||
|
const auto mem_length = src_w * src_h * dst_bpp;
|
||||||
|
auto scratch_buf = vk::get_scratch_buffer(mem_length);
|
||||||
|
vkCmdCopyImageToBuffer(cmd, src_image->value, src_image->current_layout, scratch_buf->value, 1, ©);
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||||
|
|
||||||
|
auto shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
|
||||||
|
shuffle_kernel->run(cmd, scratch_buf, mem_length);
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
|
||||||
|
auto tmp = vk::get_typeless_helper(src_image->format(), src_image->format_class(), section.dst_x + section.dst_w, section.dst_y + section.dst_h);
|
||||||
|
tmp->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||||
|
|
||||||
|
copy.imageOffset = { 0, 0, 0 };
|
||||||
|
vkCmdCopyBufferToImage(cmd, scratch_buf->value, tmp->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©);
|
||||||
|
|
||||||
|
dst_x = 0;
|
||||||
|
dst_y = 0;
|
||||||
|
|
||||||
|
if (src_w != section.dst_w || src_h != section.dst_h)
|
||||||
|
{
|
||||||
|
// Optionally scale if needed
|
||||||
|
if (tmp == _dst) [[unlikely]]
|
||||||
|
{
|
||||||
|
dst_y = src_h;
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::copy_scaled_image(cmd, tmp, _dst,
|
||||||
|
areai{ 0, 0, src_w, static_cast<s32>(src_h) },
|
||||||
|
coordi{ { dst_x, dst_y }, { section.dst_w, section.dst_h } },
|
||||||
|
1, tmp->info.format == _dst->info.format,
|
||||||
|
VK_FILTER_NEAREST);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_dst = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fmt::throw_exception("Unreachable");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_dst != dst) [[unlikely]]
|
||||||
|
{
|
||||||
|
// Casting comes after the scaling!
|
||||||
|
VkImageCopy copy_rgn;
|
||||||
|
copy_rgn.srcOffset = { s32(dst_x), s32(dst_y), 0 };
|
||||||
|
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
|
||||||
|
copy_rgn.dstSubresource = { dst_aspect, section.level, 0, 1 };
|
||||||
|
copy_rgn.srcSubresource = { _dst->aspect(), 0, 0, 1 };
|
||||||
|
copy_rgn.extent = { section.dst_w, section.dst_h, 1 };
|
||||||
|
|
||||||
|
_dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||||
|
vkCmdCopyImage(cmd, _dst->value, _dst->current_layout, dst->value, dst->current_layout, 1, ©_rgn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
section.src->pop_layout(cmd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -167,160 +167,7 @@ namespace vk
|
|||||||
return flushed;
|
return flushed;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dma_transfer(vk::command_buffer& cmd, vk::image* src, const areai& src_area, const utils::address_range& valid_range, u32 pitch)
|
void dma_transfer(vk::command_buffer& cmd, vk::image* src, const areai& src_area, const utils::address_range& valid_range, u32 pitch);
|
||||||
{
|
|
||||||
ensure(src->samples() == 1);
|
|
||||||
|
|
||||||
if (!m_device)
|
|
||||||
{
|
|
||||||
m_device = &cmd.get_command_pool().get_owner();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dma_fence)
|
|
||||||
{
|
|
||||||
// NOTE: This can be reached if previously synchronized, or a special path happens.
|
|
||||||
// If a hard flush occurred while this surface was flush_always the cache would have reset its protection afterwards.
|
|
||||||
// DMA resource would still be present but already used to flush previously.
|
|
||||||
vk::get_resource_manager()->dispose(dma_fence);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vk::is_renderpass_open(cmd))
|
|
||||||
{
|
|
||||||
vk::end_renderpass(cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
|
||||||
|
|
||||||
const auto internal_bpp = vk::get_format_texel_width(src->format());
|
|
||||||
const auto transfer_width = static_cast<u32>(src_area.width());
|
|
||||||
const auto transfer_height = static_cast<u32>(src_area.height());
|
|
||||||
real_pitch = internal_bpp * transfer_width;
|
|
||||||
rsx_pitch = pitch;
|
|
||||||
|
|
||||||
const bool require_format_conversion = !!(src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || src->format() == VK_FORMAT_D32_SFLOAT;
|
|
||||||
if (require_format_conversion || pack_unpack_swap_bytes)
|
|
||||||
{
|
|
||||||
const auto section_length = valid_range.length();
|
|
||||||
const auto transfer_pitch = real_pitch;
|
|
||||||
const auto task_length = transfer_pitch * src_area.height();
|
|
||||||
|
|
||||||
auto working_buffer = vk::get_scratch_buffer(task_length);
|
|
||||||
auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length);
|
|
||||||
|
|
||||||
VkBufferImageCopy region = {};
|
|
||||||
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
|
||||||
region.imageOffset = { src_area.x1, src_area.y1, 0 };
|
|
||||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
|
||||||
vk::copy_image_to_buffer(cmd, src, working_buffer, region, (require_format_conversion && pack_unpack_swap_bytes));
|
|
||||||
|
|
||||||
// NOTE: For depth/stencil formats, copying to buffer and byteswap are combined into one step above
|
|
||||||
if (pack_unpack_swap_bytes && !require_format_conversion)
|
|
||||||
{
|
|
||||||
const auto texel_layout = vk::get_format_element_size(src->format());
|
|
||||||
const auto elem_size = texel_layout.first;
|
|
||||||
vk::cs_shuffle_base *shuffle_kernel;
|
|
||||||
|
|
||||||
if (elem_size == 2)
|
|
||||||
{
|
|
||||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_16>();
|
|
||||||
}
|
|
||||||
else if (elem_size == 4)
|
|
||||||
{
|
|
||||||
shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ensure(get_context() == rsx::texture_upload_context::dma);
|
|
||||||
shuffle_kernel = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shuffle_kernel)
|
|
||||||
{
|
|
||||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
|
||||||
|
|
||||||
shuffle_kernel->run(cmd, working_buffer, task_length);
|
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
|
||||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
||||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rsx_pitch == real_pitch) [[likely]]
|
|
||||||
{
|
|
||||||
VkBufferCopy copy = {};
|
|
||||||
copy.dstOffset = final_mapping.first;
|
|
||||||
copy.size = section_length;
|
|
||||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, 1, ©);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (context != rsx::texture_upload_context::dma)
|
|
||||||
{
|
|
||||||
// Partial load for the bits outside the existing image
|
|
||||||
// NOTE: A true DMA section would have been prepped beforehand
|
|
||||||
// TODO: Parial range load/flush
|
|
||||||
vk::load_dma(valid_range.start, section_length);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<VkBufferCopy> copy;
|
|
||||||
copy.reserve(transfer_height);
|
|
||||||
|
|
||||||
u32 dst_offset = final_mapping.first;
|
|
||||||
u32 src_offset = 0;
|
|
||||||
|
|
||||||
for (unsigned row = 0; row < transfer_height; ++row)
|
|
||||||
{
|
|
||||||
copy.push_back({ src_offset, dst_offset, transfer_pitch });
|
|
||||||
src_offset += real_pitch;
|
|
||||||
dst_offset += rsx_pitch;
|
|
||||||
}
|
|
||||||
|
|
||||||
vkCmdCopyBuffer(cmd, working_buffer->value, final_mapping.second->value, transfer_height, copy.data());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
VkBufferImageCopy region = {};
|
|
||||||
region.bufferRowLength = (rsx_pitch / internal_bpp);
|
|
||||||
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
|
||||||
region.imageOffset = { src_area.x1, src_area.y1, 0 };
|
|
||||||
region.imageExtent = { transfer_width, transfer_height, 1 };
|
|
||||||
|
|
||||||
auto mapping = vk::map_dma(cmd, valid_range.start, valid_range.length());
|
|
||||||
region.bufferOffset = mapping.first;
|
|
||||||
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, mapping.second->value, 1, ®ion);
|
|
||||||
}
|
|
||||||
|
|
||||||
src->pop_layout(cmd);
|
|
||||||
|
|
||||||
// Create event object for this transfer and queue signal op
|
|
||||||
dma_fence = std::make_unique<vk::event>(*m_device);
|
|
||||||
dma_fence->signal(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
||||||
|
|
||||||
// Set cb flag for queued dma operations
|
|
||||||
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
|
|
||||||
|
|
||||||
if (get_context() == rsx::texture_upload_context::dma)
|
|
||||||
{
|
|
||||||
// Save readback hint in case transformation is required later
|
|
||||||
switch (internal_bpp)
|
|
||||||
{
|
|
||||||
case 2:
|
|
||||||
gcm_format = CELL_GCM_TEXTURE_R5G6B5;
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
default:
|
|
||||||
gcm_format = CELL_GCM_TEXTURE_A8R8G8B8;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized = true;
|
|
||||||
sync_timestamp = get_system_time();
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_texture(vk::command_buffer& cmd, bool miss)
|
void copy_texture(vk::command_buffer& cmd, bool miss)
|
||||||
{
|
{
|
||||||
@ -610,202 +457,7 @@ namespace vk
|
|||||||
return mapping;
|
return mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer) const
|
void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer) const;
|
||||||
{
|
|
||||||
const auto dst_aspect = dst->aspect();
|
|
||||||
const auto dst_bpp = vk::get_format_texel_width(dst->format());
|
|
||||||
|
|
||||||
for (const auto §ion : sections_to_transfer)
|
|
||||||
{
|
|
||||||
if (!section.src)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const bool typeless = section.src->aspect() != dst_aspect ||
|
|
||||||
!formats_are_bitcast_compatible(dst, section.src);
|
|
||||||
|
|
||||||
// Avoid inserting unnecessary barrier GENERAL->TRANSFER_SRC->GENERAL in active render targets
|
|
||||||
const auto preferred_layout = (section.src->current_layout != VK_IMAGE_LAYOUT_GENERAL) ?
|
|
||||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
|
|
||||||
|
|
||||||
section.src->push_layout(cmd, preferred_layout);
|
|
||||||
|
|
||||||
auto src_image = section.src;
|
|
||||||
auto src_x = section.src_x;
|
|
||||||
auto src_y = section.src_y;
|
|
||||||
auto src_w = section.src_w;
|
|
||||||
auto src_h = section.src_h;
|
|
||||||
|
|
||||||
rsx::flags32_t transform = section.xform;
|
|
||||||
if (section.xform == rsx::surface_transform::coordinate_transform)
|
|
||||||
{
|
|
||||||
// Dimensions were given in 'dst' space. Work out the real source coordinates
|
|
||||||
const auto src_bpp = vk::get_format_texel_width(section.src->format());
|
|
||||||
src_x = (src_x * dst_bpp) / src_bpp;
|
|
||||||
src_w = ::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
|
||||||
|
|
||||||
transform &= ~(rsx::surface_transform::coordinate_transform);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (auto surface = dynamic_cast<vk::render_target*>(section.src))
|
|
||||||
{
|
|
||||||
surface->transform_samples_to_pixels(src_x, src_w, src_y, src_h);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeless) [[unlikely]]
|
|
||||||
{
|
|
||||||
const auto src_bpp = vk::get_format_texel_width(section.src->format());
|
|
||||||
const u16 convert_w = u16(src_w * src_bpp) / dst_bpp;
|
|
||||||
const u16 convert_x = u16(src_x * src_bpp) / dst_bpp;
|
|
||||||
|
|
||||||
if (convert_w == section.dst_w && src_h == section.dst_h &&
|
|
||||||
transform == rsx::surface_transform::identity &&
|
|
||||||
section.level == 0 && section.dst_z == 0)
|
|
||||||
{
|
|
||||||
// Optimization to avoid double transfer
|
|
||||||
// TODO: Handle level and layer offsets
|
|
||||||
const areai src_rect = coordi{{ src_x, src_y }, { src_w, src_h }};
|
|
||||||
const areai dst_rect = coordi{{ section.dst_x, section.dst_y }, { section.dst_w, section.dst_h }};
|
|
||||||
vk::copy_image_typeless(cmd, section.src, dst, src_rect, dst_rect, 1);
|
|
||||||
|
|
||||||
section.src->pop_layout(cmd);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
src_image = vk::get_typeless_helper(dst->format(), dst->format_class(), convert_x + convert_w, src_y + src_h);
|
|
||||||
src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
|
||||||
|
|
||||||
const areai src_rect = coordi{{ src_x, src_y }, { src_w, src_h }};
|
|
||||||
const areai dst_rect = coordi{{ convert_x, src_y }, { convert_w, src_h }};
|
|
||||||
vk::copy_image_typeless(cmd, section.src, src_image, src_rect, dst_rect, 1);
|
|
||||||
src_image->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
|
||||||
|
|
||||||
src_x = convert_x;
|
|
||||||
src_w = convert_w;
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure(src_image->current_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL || src_image->current_layout == VK_IMAGE_LAYOUT_GENERAL);
|
|
||||||
|
|
||||||
// Final aspect mask of the 'final' transfer source
|
|
||||||
const auto new_src_aspect = src_image->aspect();
|
|
||||||
|
|
||||||
if (src_w == section.dst_w && src_h == section.dst_h && transform == rsx::surface_transform::identity) [[likely]]
|
|
||||||
{
|
|
||||||
VkImageCopy copy_rgn;
|
|
||||||
copy_rgn.srcOffset = { src_x, src_y, 0 };
|
|
||||||
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
|
|
||||||
copy_rgn.dstSubresource = { dst_aspect, 0, 0, 1 };
|
|
||||||
copy_rgn.srcSubresource = { new_src_aspect, 0, 0, 1 };
|
|
||||||
copy_rgn.extent = { src_w, src_h, 1 };
|
|
||||||
|
|
||||||
if (dst->info.imageType == VK_IMAGE_TYPE_3D)
|
|
||||||
{
|
|
||||||
copy_rgn.dstOffset.z = section.dst_z;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
copy_rgn.dstSubresource.baseArrayLayer = section.dst_z;
|
|
||||||
copy_rgn.dstSubresource.mipLevel = section.level;
|
|
||||||
}
|
|
||||||
|
|
||||||
vkCmdCopyImage(cmd, src_image->value, src_image->current_layout, dst->value, dst->current_layout, 1, ©_rgn);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ensure(section.dst_z == 0);
|
|
||||||
|
|
||||||
u16 dst_x = section.dst_x, dst_y = section.dst_y;
|
|
||||||
vk::image* _dst;
|
|
||||||
|
|
||||||
if (src_image->info.format == dst->info.format && section.level == 0) [[likely]]
|
|
||||||
{
|
|
||||||
_dst = dst;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Either a bitcast is required or a scale+copy to mipmap level
|
|
||||||
_dst = vk::get_typeless_helper(src_image->format(), src_image->format_class(), dst->width(), dst->height() * 2);
|
|
||||||
_dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (transform == rsx::surface_transform::identity)
|
|
||||||
{
|
|
||||||
vk::copy_scaled_image(cmd, src_image, _dst,
|
|
||||||
coordi{ { src_x, src_y }, { src_w, src_h } },
|
|
||||||
coordi{ { section.dst_x, section.dst_y }, { section.dst_w, section.dst_h } },
|
|
||||||
1, src_image->format() == _dst->format(),
|
|
||||||
VK_FILTER_NEAREST);
|
|
||||||
}
|
|
||||||
else if (transform == rsx::surface_transform::argb_to_bgra)
|
|
||||||
{
|
|
||||||
VkBufferImageCopy copy{};
|
|
||||||
copy.imageExtent = { src_w, src_h, 1 };
|
|
||||||
copy.imageOffset = { src_x, src_y, 0 };
|
|
||||||
copy.imageSubresource = { src_image->aspect(), 0, 0, 1 };
|
|
||||||
|
|
||||||
const auto mem_length = src_w * src_h * dst_bpp;
|
|
||||||
auto scratch_buf = vk::get_scratch_buffer(mem_length);
|
|
||||||
vkCmdCopyImageToBuffer(cmd, src_image->value, src_image->current_layout, scratch_buf->value, 1, ©);
|
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
|
||||||
|
|
||||||
auto shuffle_kernel = vk::get_compute_task<vk::cs_shuffle_32>();
|
|
||||||
shuffle_kernel->run(cmd, scratch_buf, mem_length);
|
|
||||||
|
|
||||||
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
||||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
|
||||||
|
|
||||||
auto tmp = vk::get_typeless_helper(src_image->format(), src_image->format_class(), section.dst_x + section.dst_w, section.dst_y + section.dst_h);
|
|
||||||
tmp->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
|
||||||
|
|
||||||
copy.imageOffset = { 0, 0, 0 };
|
|
||||||
vkCmdCopyBufferToImage(cmd, scratch_buf->value, tmp->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©);
|
|
||||||
|
|
||||||
dst_x = 0;
|
|
||||||
dst_y = 0;
|
|
||||||
|
|
||||||
if (src_w != section.dst_w || src_h != section.dst_h)
|
|
||||||
{
|
|
||||||
// Optionally scale if needed
|
|
||||||
if (tmp == _dst) [[unlikely]]
|
|
||||||
{
|
|
||||||
dst_y = src_h;
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::copy_scaled_image(cmd, tmp, _dst,
|
|
||||||
areai{ 0, 0, src_w, static_cast<s32>(src_h) },
|
|
||||||
coordi{ { dst_x, dst_y }, { section.dst_w, section.dst_h } },
|
|
||||||
1, tmp->info.format == _dst->info.format,
|
|
||||||
VK_FILTER_NEAREST);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
_dst = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
fmt::throw_exception("Unreachable");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_dst != dst) [[unlikely]]
|
|
||||||
{
|
|
||||||
// Casting comes after the scaling!
|
|
||||||
VkImageCopy copy_rgn;
|
|
||||||
copy_rgn.srcOffset = { s32(dst_x), s32(dst_y), 0 };
|
|
||||||
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
|
|
||||||
copy_rgn.dstSubresource = { dst_aspect, section.level, 0, 1 };
|
|
||||||
copy_rgn.srcSubresource = { _dst->aspect(), 0, 0, 1 };
|
|
||||||
copy_rgn.extent = { section.dst_w, section.dst_h, 1 };
|
|
||||||
|
|
||||||
_dst->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
|
||||||
vkCmdCopyImage(cmd, _dst->value, _dst->current_layout, dst->value, dst->current_layout, 1, ©_rgn);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
section.src->pop_layout(cmd);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::image* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
|
vk::image* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
|
||||||
{
|
{
|
||||||
|
@ -107,6 +107,7 @@
|
|||||||
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
|
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\GL\GLTexture.cpp" />
|
<ClCompile Include="Emu\RSX\GL\GLTexture.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
|
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
<ClCompile Include="Emu\RSX\GL\GLShaderInterpreter.cpp" />
|
<ClCompile Include="Emu\RSX\GL\GLShaderInterpreter.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
|
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\GL\GLPipelineCompiler.cpp" />
|
<ClCompile Include="Emu\RSX\GL\GLPipelineCompiler.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />
|
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "PSF.h"
|
#include "PSF.h"
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
LOG_CHANNEL(psf_log, "PSF");
|
LOG_CHANNEL(psf_log, "PSF");
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
@ -208,7 +210,7 @@ namespace psf
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Align next section (data) offset
|
// Align next section (data) offset
|
||||||
key_offset = ::align(key_offset, 4);
|
key_offset = utils::align(key_offset, 4);
|
||||||
|
|
||||||
// Generate header
|
// Generate header
|
||||||
header_t header;
|
header_t header;
|
||||||
|
@ -67,6 +67,7 @@
|
|||||||
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\VK\VKTextureCache.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\VK\VKTextureCache.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" />
|
<ClCompile Include="Emu\RSX\VK\VKQueryPool.cpp" />
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "Emu/Cell/PPUFunction.h"
|
#include "Emu/Cell/PPUFunction.h"
|
||||||
|
|
||||||
#include "util/yaml.hpp"
|
#include "util/yaml.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
#include "util/to_endian.hpp"
|
#include "util/to_endian.hpp"
|
||||||
#include "Utilities/StrUtil.h"
|
#include "Utilities/StrUtil.h"
|
||||||
#include "Utilities/bin_patch.h" // get_patches_path()
|
#include "Utilities/bin_patch.h" // get_patches_path()
|
||||||
@ -418,17 +419,17 @@ bool cheat_engine::set_value(const u32 offset, const T value)
|
|||||||
|
|
||||||
if (exec_code_at_end && exec_code_at_start)
|
if (exec_code_at_end && exec_code_at_start)
|
||||||
{
|
{
|
||||||
size = align<u32>(addr + size, 4) - (addr & -4);
|
size = utils::align<u32>(addr + size, 4) - (addr & -4);
|
||||||
addr &= -4;
|
addr &= -4;
|
||||||
}
|
}
|
||||||
else if (exec_code_at_end)
|
else if (exec_code_at_end)
|
||||||
{
|
{
|
||||||
size -= align<u32>(size - 4096 + (addr & 4095), 4);
|
size -= utils::align<u32>(size - 4096 + (addr & 4095), 4);
|
||||||
addr = align<u32>(addr, 4096);
|
addr = utils::align<u32>(addr, 4096);
|
||||||
}
|
}
|
||||||
else if (exec_code_at_start)
|
else if (exec_code_at_start)
|
||||||
{
|
{
|
||||||
size = align<u32>(4096 - (addr & 4095), 4);
|
size = utils::align<u32>(4096 - (addr & 4095), 4);
|
||||||
addr &= -4;
|
addr &= -4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
#include <QVBoxLayout>
|
#include <QVBoxLayout>
|
||||||
#include <QTimer>
|
#include <QTimer>
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
constexpr auto qstr = QString::fromStdString;
|
constexpr auto qstr = QString::fromStdString;
|
||||||
|
|
||||||
debugger_frame::debugger_frame(std::shared_ptr<gui_settings> settings, QWidget *parent)
|
debugger_frame::debugger_frame(std::shared_ptr<gui_settings> settings, QWidget *parent)
|
||||||
@ -573,7 +575,7 @@ void debugger_frame::ShowGotoAddressDialog()
|
|||||||
if (cpu)
|
if (cpu)
|
||||||
{
|
{
|
||||||
// -1 turns into 0
|
// -1 turns into 0
|
||||||
u32 pc = ::align<u32>(cpu->get_pc(), 4);
|
u32 pc = utils::align<u32>(cpu->get_pc(), 4);
|
||||||
address_preview_label->setText(QString("Address: 0x%1").arg(pc, 8, 16, QChar('0')));
|
address_preview_label->setText(QString("Address: 0x%1").arg(pc, 8, 16, QChar('0')));
|
||||||
expression_input->setPlaceholderText(QString("0x%1").arg(pc, 8, 16, QChar('0')));
|
expression_input->setPlaceholderText(QString("0x%1").arg(pc, 8, 16, QChar('0')));
|
||||||
}
|
}
|
||||||
@ -605,7 +607,7 @@ void debugger_frame::ShowGotoAddressDialog()
|
|||||||
if (diag->exec() == QDialog::Accepted)
|
if (diag->exec() == QDialog::Accepted)
|
||||||
{
|
{
|
||||||
// -1 turns into 0
|
// -1 turns into 0
|
||||||
u32 address = ::align<u32>(cpu ? cpu->get_pc() : 0, 4);
|
u32 address = utils::align<u32>(cpu ? cpu->get_pc() : 0, 4);
|
||||||
|
|
||||||
if (expression_input->text().isEmpty())
|
if (expression_input->text().isEmpty())
|
||||||
{
|
{
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
#include <QWheelEvent>
|
#include <QWheelEvent>
|
||||||
#include <shared_mutex>
|
#include <shared_mutex>
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
constexpr auto qstr = QString::fromStdString;
|
constexpr auto qstr = QString::fromStdString;
|
||||||
|
|
||||||
memory_viewer_panel::memory_viewer_panel(QWidget* parent, u32 addr)
|
memory_viewer_panel::memory_viewer_panel(QWidget* parent, u32 addr)
|
||||||
@ -209,7 +211,7 @@ memory_viewer_panel::memory_viewer_panel(QWidget* parent, u32 addr)
|
|||||||
{
|
{
|
||||||
bool ok;
|
bool ok;
|
||||||
const QString text = m_addr_line->text();
|
const QString text = m_addr_line->text();
|
||||||
m_addr = (text.startsWith("0x", Qt::CaseInsensitive) ? text.right(text.size() - 2) : text).toULong(&ok, 16);
|
m_addr = (text.startsWith("0x", Qt::CaseInsensitive) ? text.right(text.size() - 2) : text).toULong(&ok, 16);
|
||||||
m_addr -= m_addr % (m_colcount * 4); // Align by amount of bytes in a row
|
m_addr -= m_addr % (m_colcount * 4); // Align by amount of bytes in a row
|
||||||
m_addr_line->setText(QString("%1").arg(m_addr, 8, 16, QChar('0'))); // get 8 digits in input line
|
m_addr_line->setText(QString("%1").arg(m_addr, 8, 16, QChar('0'))); // get 8 digits in input line
|
||||||
ShowMemory();
|
ShowMemory();
|
||||||
@ -293,7 +295,7 @@ void memory_viewer_panel::resizeEvent(QResizeEvent *event)
|
|||||||
std::string memory_viewer_panel::getHeaderAtAddr(u32 addr)
|
std::string memory_viewer_panel::getHeaderAtAddr(u32 addr)
|
||||||
{
|
{
|
||||||
// Check if its an SPU Local Storage beginning
|
// Check if its an SPU Local Storage beginning
|
||||||
const u32 spu_boundary = ::align<u32>(addr, SPU_LS_SIZE);
|
const u32 spu_boundary = utils::align<u32>(addr, SPU_LS_SIZE);
|
||||||
|
|
||||||
if (spu_boundary <= addr + m_colcount * 4 - 1)
|
if (spu_boundary <= addr + m_colcount * 4 - 1)
|
||||||
{
|
{
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <charconv>
|
#include <charconv>
|
||||||
|
|
||||||
#include "util/v128.hpp"
|
#include "util/v128.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
constexpr auto qstr = QString::fromStdString;
|
constexpr auto qstr = QString::fromStdString;
|
||||||
inline std::string sstr(const QString& _in) { return _in.toStdString(); }
|
inline std::string sstr(const QString& _in) { return _in.toStdString(); }
|
||||||
@ -30,7 +31,7 @@ enum registers : int
|
|||||||
ppu_ff31 = ppu_ff0 + 31,
|
ppu_ff31 = ppu_ff0 + 31,
|
||||||
ppu_v0,
|
ppu_v0,
|
||||||
ppu_v31 = ppu_v0 + 31,
|
ppu_v31 = ppu_v0 + 31,
|
||||||
spu_r0 = ::align(ppu_v31 + 1u, 128),
|
spu_r0 = utils::align(ppu_v31 + 1u, 128),
|
||||||
spu_r127 = spu_r0 + 127,
|
spu_r127 = spu_r0 + 127,
|
||||||
PPU_CR,
|
PPU_CR,
|
||||||
PPU_LR,
|
PPU_LR,
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include "util/sysinfo.hpp"
|
#include "util/sysinfo.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
#ifdef WITH_DISCORD_RPC
|
#ifdef WITH_DISCORD_RPC
|
||||||
#include "_discord_utils.h"
|
#include "_discord_utils.h"
|
||||||
@ -1809,7 +1810,7 @@ void settings_dialog::SnapSlider(QSlider *slider, int interval)
|
|||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
slider->setValue(::rounded_div(value, interval) * interval);
|
slider->setValue(utils::rounded_div(value, interval) * interval);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,6 +292,32 @@ namespace utils
|
|||||||
do _mm_pause();
|
do _mm_pause();
|
||||||
while (__rdtsc() - start < cycles);
|
while (__rdtsc() - start < cycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Align to power of 2
|
||||||
|
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value>>
|
||||||
|
constexpr T align(T value, ullong align)
|
||||||
|
{
|
||||||
|
return static_cast<T>((value + (align - 1)) & (0 - align));
|
||||||
|
}
|
||||||
|
|
||||||
|
// General purpose aligned division, the result is rounded up not truncated
|
||||||
|
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value>>
|
||||||
|
constexpr T aligned_div(T value, ullong align)
|
||||||
|
{
|
||||||
|
return static_cast<T>((value + align - 1) / align);
|
||||||
|
}
|
||||||
|
|
||||||
|
// General purpose aligned division, the result is rounded to nearest
|
||||||
|
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>>
|
||||||
|
constexpr T rounded_div(T value, std::conditional_t<std::is_signed<T>::value, llong, ullong> align)
|
||||||
|
{
|
||||||
|
if constexpr (std::is_unsigned<T>::value)
|
||||||
|
{
|
||||||
|
return static_cast<T>((value + (align / 2)) / align);
|
||||||
|
}
|
||||||
|
|
||||||
|
return static_cast<T>((value + (value < 0 ? 0 - align : align) / 2) / align);
|
||||||
|
}
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
|
|
||||||
using utils::busy_wait;
|
using utils::busy_wait;
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
inline std::array<u32, 4> utils::get_cpuid(u32 func, u32 subfunc)
|
inline std::array<u32, 4> utils::get_cpuid(u32 func, u32 subfunc)
|
||||||
{
|
{
|
||||||
int regs[4];
|
int regs[4];
|
||||||
@ -298,7 +300,7 @@ std::string utils::get_OS_version()
|
|||||||
|
|
||||||
static constexpr ullong round_tsc(ullong val)
|
static constexpr ullong round_tsc(ullong val)
|
||||||
{
|
{
|
||||||
return ::rounded_div(val, 1'000'000) * 1'000'000;
|
return utils::rounded_div(val, 1'000'000) * 1'000'000;
|
||||||
}
|
}
|
||||||
|
|
||||||
ullong utils::get_tsc_freq()
|
ullong utils::get_tsc_freq()
|
||||||
|
@ -595,31 +595,6 @@ struct f16
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value>>
|
|
||||||
constexpr T align(T value, ullong align)
|
|
||||||
{
|
|
||||||
return static_cast<T>((value + (align - 1)) & (0 - align));
|
|
||||||
}
|
|
||||||
|
|
||||||
// General purpose aligned division, the result is rounded up not truncated
|
|
||||||
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value>>
|
|
||||||
constexpr T aligned_div(T value, ullong align)
|
|
||||||
{
|
|
||||||
return static_cast<T>((value + align - 1) / align);
|
|
||||||
}
|
|
||||||
|
|
||||||
// General purpose aligned division, the result is rounded to nearest
|
|
||||||
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>>
|
|
||||||
constexpr T rounded_div(T value, std::conditional_t<std::is_signed<T>::value, llong, ullong> align)
|
|
||||||
{
|
|
||||||
if constexpr (std::is_unsigned<T>::value)
|
|
||||||
{
|
|
||||||
return static_cast<T>((value + (align / 2)) / align);
|
|
||||||
}
|
|
||||||
|
|
||||||
return static_cast<T>((value + (value < 0 ? 0 - align : align) / 2) / align);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, typename T2>
|
template <typename T, typename T2>
|
||||||
inline u32 offset32(T T2::*const mptr)
|
inline u32 offset32(T T2::*const mptr)
|
||||||
{
|
{
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "util/logs.hpp"
|
#include "util/logs.hpp"
|
||||||
#include "util/vm.hpp"
|
#include "util/vm.hpp"
|
||||||
|
#include "util/asm.hpp"
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include "util/dyn_lib.hpp"
|
#include "util/dyn_lib.hpp"
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
@ -209,7 +210,7 @@ namespace utils
|
|||||||
}
|
}
|
||||||
|
|
||||||
shm::shm(u32 size, u32 flags)
|
shm::shm(u32 size, u32 flags)
|
||||||
: m_size(::align(size, 0x10000))
|
: m_size(utils::align(size, 0x10000))
|
||||||
, m_flags(flags)
|
, m_flags(flags)
|
||||||
, m_ptr(0)
|
, m_ptr(0)
|
||||||
{
|
{
|
||||||
@ -306,7 +307,7 @@ namespace utils
|
|||||||
{
|
{
|
||||||
const u64 res64 = reinterpret_cast<u64>(::mmap(reinterpret_cast<void*>(ptr64), m_size + 0xf000, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0));
|
const u64 res64 = reinterpret_cast<u64>(::mmap(reinterpret_cast<void*>(ptr64), m_size + 0xf000, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0));
|
||||||
|
|
||||||
const u64 aligned = ::align(res64, 0x10000);
|
const u64 aligned = utils::align(res64, 0x10000);
|
||||||
const auto result = ::mmap(reinterpret_cast<void*>(aligned), m_size, +prot, MAP_SHARED | MAP_FIXED, m_file, 0);
|
const auto result = ::mmap(reinterpret_cast<void*>(aligned), m_size, +prot, MAP_SHARED | MAP_FIXED, m_file, 0);
|
||||||
|
|
||||||
// Now cleanup remnants
|
// Now cleanup remnants
|
||||||
|
Loading…
Reference in New Issue
Block a user