1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2025-01-31 20:41:45 +01:00

Implemented async nv3089::image_in & nv0039::buffer_notify

Added range<> utility
Handle access violation from any thread
minor improvements
This commit is contained in:
DHrpcs3 2016-03-15 20:05:35 +03:00
parent 0f29b5ddb2
commit 53e3833aa1
15 changed files with 500 additions and 267 deletions

View File

@ -7,6 +7,7 @@
#include "Emu/Cell/RawSPUThread.h"
#include "Emu/SysCalls/SysCalls.h"
#include "Thread.h"
#include "range.h"
#ifdef _WIN32
#include <windows.h>
@ -902,7 +903,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
{
LOG_ERROR(MEMORY, "Invalid or unsupported instruction (op=%d, reg=%d, d_size=%lld, a_size=0x%llx, i_size=%lld)", op, reg, d_size, a_size, i_size);
report_opcode();
return false;
return true;
}
switch (op)
@ -914,7 +915,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
if (reg - X64R_XMM0 >= 16)
{
LOG_ERROR(MEMORY, "X64OP_STORE: d_size=16, reg=%d", reg);
return false;
return true;
}
std::memcpy(vm::base_priv(addr), XMMREG(context, reg - X64R_XMM0), 16);
@ -924,7 +925,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
u64 reg_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value))
{
return false;
return true;
}
std::memcpy(vm::base_priv(addr), &reg_value, d_size);
@ -935,13 +936,13 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
if (d_size > 8)
{
LOG_ERROR(MEMORY, "X64OP_MOVS: d_size=%lld", d_size);
return false;
return true;
}
if (vm::base(addr) != (void*)RDI(context))
{
LOG_ERROR(MEMORY, "X64OP_MOVS: rdi=0x%llx, rsi=0x%llx, addr=0x%x", (u64)RDI(context), (u64)RSI(context), addr);
return false;
return true;
}
u32 a_addr = addr;
@ -958,7 +959,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
if (EFLAGS(context) & 0x400 /* direction flag */)
{
LOG_ERROR(MEMORY, "X64OP_MOVS TODO: reversed direction");
return false;
return true;
//RSI(context) -= d_size;
//RDI(context) -= d_size;
//a_addr -= (u32)d_size;
@ -990,19 +991,19 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
if (d_size > 8)
{
LOG_ERROR(MEMORY, "X64OP_STOS: d_size=%lld", d_size);
return false;
return true;
}
if (vm::base(addr) != (void*)RDI(context))
{
LOG_ERROR(MEMORY, "X64OP_STOS: rdi=0x%llx, addr=0x%x", (u64)RDI(context), addr);
return false;
return true;
}
u64 value;
if (!get_x64_reg_value(context, X64R_RAX, d_size, i_size, value))
{
return false;
return true;
}
u32 a_addr = addr;
@ -1016,7 +1017,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
if (EFLAGS(context) & 0x400 /* direction flag */)
{
LOG_ERROR(MEMORY, "X64OP_STOS TODO: reversed direction");
return false;
return true;
//RDI(context) -= d_size;
//a_addr -= (u32)d_size;
}
@ -1046,7 +1047,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
u64 reg_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value))
{
return false;
return true;
}
switch (d_size)
@ -1055,12 +1056,12 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
case 2: reg_value = sync_lock_test_and_set((u16*)vm::base_priv(addr), (u16)reg_value); break;
case 4: reg_value = sync_lock_test_and_set((u32*)vm::base_priv(addr), (u32)reg_value); break;
case 8: reg_value = sync_lock_test_and_set((u64*)vm::base_priv(addr), (u64)reg_value); break;
default: return false;
default: return true;
}
if (!put_x64_reg_value(context, reg, d_size, reg_value))
{
return false;
return true;
}
break;
}
@ -1069,7 +1070,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
u64 reg_value, old_value, cmp_value;
if (!get_x64_reg_value(context, reg, d_size, i_size, reg_value) || !get_x64_reg_value(context, X64R_RAX, d_size, i_size, cmp_value))
{
return false;
return true;
}
switch (d_size)
@ -1078,7 +1079,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
case 2: old_value = sync_val_compare_and_swap((u16*)vm::base_priv(addr), (u16)cmp_value, (u16)reg_value); break;
case 4: old_value = sync_val_compare_and_swap((u32*)vm::base_priv(addr), (u32)cmp_value, (u32)reg_value); break;
case 8: old_value = sync_val_compare_and_swap((u64*)vm::base_priv(addr), (u64)cmp_value, (u64)reg_value); break;
default: return false;
default: return true;
}
if (!put_x64_reg_value(context, X64R_RAX, d_size, old_value) || !set_x64_cmp_flags(context, d_size, cmp_value, old_value))
@ -1092,7 +1093,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
u64 value;
if (!get_x64_reg_value(context, reg, d_size, i_size, value))
{
return false;
return true;
}
switch (d_size)
@ -1101,12 +1102,12 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
case 2: value &= sync_fetch_and_and((u16*)vm::base_priv(addr), (u16)value); break;
case 4: value &= sync_fetch_and_and((u32*)vm::base_priv(addr), (u32)value); break;
case 8: value &= sync_fetch_and_and((u64*)vm::base_priv(addr), (u64)value); break;
default: return false;
default: return true;
}
if (!set_x64_cmp_flags(context, d_size, value, 0))
{
return false;
return true;
}
break;
}
@ -1114,7 +1115,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context)
{
LOG_ERROR(MEMORY, "Invalid or unsupported operation (op=%d, reg=%d, d_size=%lld, a_size=0x%llx, i_size=%lld)", op, reg, d_size, a_size, i_size);
report_opcode();
return false;
return true;
}
}
@ -1148,17 +1149,30 @@ void prepare_throw_access_violation(x64_context* context, const char* cause, u32
static LONG exception_handler(PEXCEPTION_POINTERS pExp)
{
const u64 addr64 = pExp->ExceptionRecord->ExceptionInformation[1] - (u64)vm::base(0);
const bool is_writing = pExp->ExceptionRecord->ExceptionInformation[0] != 0;
if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION)
{
const range<u64> vm_range = range<u64>((u64)vm::base(0)).size(1ull << 32);
const u64 address = pExp->ExceptionRecord->ExceptionInformation[1];
if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && addr64 < 0x100000000ull && thread_ctrl::get_current() && handle_access_violation((u32)addr64, is_writing, pExp->ContextRecord))
if (vm_range.contains(address))
{
u32 vaddress = u32(address - vm_range.begin());
const bool is_writing = pExp->ExceptionRecord->ExceptionInformation[0] != 0;
if (handle_access_violation(vaddress, is_writing, pExp->ContextRecord))
{
return EXCEPTION_CONTINUE_EXECUTION;
}
}
}
if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_NONCONTINUABLE_EXCEPTION)
{
return EXCEPTION_CONTINUE_EXECUTION;
}
else
{
return EXCEPTION_CONTINUE_SEARCH;
}
return EXCEPTION_CONTINUE_SEARCH;
}
static LONG exception_filter(PEXCEPTION_POINTERS pExp)
@ -1245,18 +1259,20 @@ static void signal_handler(int sig, siginfo_t* info, void* uct)
const bool is_writing = context->uc_mcontext.gregs[REG_ERR] & 0x2;
#endif
const u64 addr64 = (u64)info->si_addr - (u64)vm::base(0);
const range<u64> vm_range = range<u64>((u64)vm::base(0)).size(1ull << 32);
const u64 address = (u64)info->si_addr;
const auto cause = is_writing ? "writing" : "reading";
// TODO: Exception specific informative messages
if (addr64 < 0x100000000ull && thread_ctrl::get_current())
if (vm_range.contains(address))
{
u32 vaddress = u32(address - vm_range.begin());
// Try to process access violation
if (!handle_access_violation((u32)addr64, is_writing, context))
if (!handle_access_violation(vaddress, is_writing, context))
{
// Setup throw_access_violation() call on the context
prepare_throw_access_violation(context, cause, (u32)addr64);
prepare_throw_access_violation(context, cause, vaddress);
}
}
else

158
Utilities/range.h Normal file
View File

@ -0,0 +1,158 @@
#pragma once
#include <cstddef>
template<typename Type>
class range
{
Type m_begin;
Type m_end;
public:
using type = Type;
constexpr range(Type begin, Type end)
: m_begin(begin), m_end(end)
{
}
constexpr range(Type point)
: m_begin(point), m_end(point + 1)
{
}
constexpr range()
: m_begin{}
, m_end{}
{
}
range& set(Type begin, Type end)
{
m_begin = begin;
m_end = end;
return *this;
}
range& set(range& other)
{
return set(other.begin(), other.end());
}
range& begin(Type value)
{
m_begin = value;
return *this;
}
range& end(Type value)
{
m_end = value;
return *this;
}
range& size(Type value)
{
m_end = m_begin + value;
return *this;
}
void extend(const range& other)
{
m_begin = std::min(m_begin, other.m_begin);
m_end = std::min(m_end, other.m_end);
}
constexpr bool valid() const
{
return m_begin <= m_end;
}
constexpr Type begin() const
{
return m_begin;
}
constexpr Type end() const
{
return m_end;
}
constexpr Type size() const
{
return m_end - m_begin;
}
constexpr bool contains(Type point) const
{
return point >= m_begin && point < m_end;
}
constexpr bool overlaps(const range& rhs) const
{
return m_begin < rhs.m_end && m_end > rhs.m_begin;
}
constexpr bool operator == (const range& rhs) const
{
return m_begin == rhs.m_begin && m_end == rhs.m_end;
}
constexpr bool operator != (const range& rhs) const
{
return m_begin != rhs.m_begin || m_end != rhs.m_end;
}
constexpr range operator / (Type rhs) const
{
return{ m_begin / rhs, m_end / rhs };
}
constexpr range operator * (Type rhs) const
{
return{ m_begin * rhs, m_end * rhs };
}
constexpr range operator + (Type rhs) const
{
return{ m_begin + rhs, m_end + rhs };
}
constexpr range operator - (Type rhs) const
{
return{ m_begin - rhs, m_end - rhs };
}
range& operator /= (Type rhs)
{
m_begin /= rhs;
m_end /= rhs;
return *this;
}
range& operator *= (Type rhs)
{
m_begin *= rhs;
m_end *= rhs;
return *this;
}
range& operator += (Type rhs)
{
m_begin += rhs;
m_end += rhs;
return *this;
}
range& operator -= (Type rhs)
{
m_begin -= rhs;
m_end -= rhs;
return *this;
}
constexpr range operator &(const range& rhs) const
{
return{ std::max(m_begin, rhs.m_begin), std::min(m_end, rhs.m_end) };
}
};

View File

@ -24,11 +24,11 @@ using s32 = std::int32_t;
using s64 = std::int64_t;
#define DECLARE_ENUM_CLASS_BITWISE_OPERATORS(type) \
inline type operator |(type lhs, type rhs) \
inline constexpr type operator |(type lhs, type rhs) \
{ \
return type(std::underlying_type_t<type>(lhs) | std::underlying_type_t<type>(rhs)); \
} \
inline type operator &(type lhs, type rhs) \
inline constexpr type operator &(type lhs, type rhs) \
{ \
return type(std::underlying_type_t<type>(lhs) & std::underlying_type_t<type>(rhs)); \
} \
@ -40,7 +40,7 @@ using s64 = std::int64_t;
{ \
return lhs = lhs & rhs; \
} \
inline type operator ~(type lhs) \
inline constexpr type operator ~(type lhs) \
{ \
return type(~std::underlying_type_t<type>(lhs)); \
} \

View File

@ -561,8 +561,7 @@ void GLGSRender::end()
m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture);
}
}
if (draw_command == rsx::draw_command::array)
else if (draw_command == rsx::draw_command::array)
{
for (const auto &first_count : first_count_commands)
{
@ -1232,7 +1231,7 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer)
tmp.create(src_info.target);
__glcheck scale_texture(tmp, src_info.format.internal_format, { (int)convert_w, (int)convert_h },
src_texture.view(), { (int)src_x + int(in_x), (int)src_y + int(in_y) }, { int(src_x + in_w), int(src_y + in_h) });
src_texture.view(), { (int)src_x + int(in_x) * 0, (int)src_y + int(in_y)*0 }, { int(src_x + in_w), int(src_y + in_h) });
src_id = tmp.id();

View File

@ -2,7 +2,11 @@
#include "gl_helpers.h"
#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable: 4091)
#include <DbgHelp.h>
#pragma warning(pop)
#pragma comment(lib, "Dbghelp.lib")
#endif

View File

@ -25,9 +25,8 @@ namespace gl
};
std::vector<capability_texture> found_textures;
u32 texture_size = info.size();
m_parent_region->for_each(info.start_address, texture_size, [&](cached_texture& texture)
m_parent_region->for_each(info.range(), [&](cached_texture& texture)
{
if ((texture.m_state & cache_entry_state::local_synchronized) == cache_entry_state::invalid)
{
@ -124,7 +123,7 @@ namespace gl
{
//read from host
//flush all local textures at region
m_parent_region->for_each(info.start_address, texture_size, [](cached_texture& texture)
m_parent_region->for_each(info.range(), [](cached_texture& texture)
{
texture.sync(gl::cache_buffers::host);
//texture.invalidate(gl::cache_buffers::local);
@ -387,7 +386,7 @@ namespace gl
if ((buffers & cache_buffers::host) != cache_buffers::none)
{
m_state &= ~cache_entry_state::host_synchronized;
m_parent_region->for_each(info.start_address, info.size(), [this](cached_texture& texture)
m_parent_region->for_each(info.range(), [this](cached_texture& texture)
{
if (std::addressof(texture) != this)
{
@ -522,21 +521,14 @@ namespace gl
}
}
void protected_region::for_each(u32 start_address, u32 size, std::function<void(cached_texture& texture)> callback)
void protected_region::for_each(range<u32> range, std::function<void(cached_texture& texture)> callback)
{
for (auto &entry : m_textures)
{
if (entry.first.start_address >= start_address + size)
if (range.overlaps({ entry.first.start_address, entry.first.start_address + entry.first.size() }))
{
continue;
callback(entry.second);
}
if (entry.first.start_address + entry.first.size() <= start_address)
{
continue;
}
callback(entry.second);
}
}
@ -558,7 +550,7 @@ namespace gl
if (m_current_protection != flags)
{
//LOG_WARNING(RSX, "protected region [0x%x, 0x%x)", start_address, start_address + size());
vm::page_protect(start_address, size(), 0, m_current_protection & ~flags, flags);
vm::page_protect(begin(), size(), 0, m_current_protection & ~flags, flags);
m_current_protection = flags;
}
}
@ -584,7 +576,7 @@ namespace gl
}
//LOG_WARNING(RSX, "unprotected region [0x%x, 0x%x)", start_address, start_address + size());
vm::page_protect(start_address, size(), 0, flags, 0);
vm::page_protect(begin(), size(), 0, flags, 0);
m_current_protection &= ~flags;
}
@ -612,17 +604,7 @@ namespace gl
}
}
if (region.start_address < start_address)
{
pages_count += (start_address - region.start_address) / vm::page_size;
start_address = region.start_address;
}
else
{
//[start_address, region.start_address + region.pages_count * vm::page_size)
pages_count = (region.start_address + region.pages_count * vm::page_size - start_address) / vm::page_size;
}
extend(region);
}
cached_texture& protected_region::add(const texture_info& info)
@ -669,40 +651,39 @@ namespace gl
cached_texture &texture_cache::entry(const texture_info &info, cache_buffers sync)
{
u32 aligned_address;
u32 aligned_size;
range<u32> aligned_range;
const bool accurate_cache = false;
if (accurate_cache)
{
aligned_address = info.start_address & ~(vm::page_size - 1);
aligned_size = align(info.start_address - aligned_address + info.size(), vm::page_size);
aligned_range.begin(info.start_address & ~(vm::page_size - 1));
aligned_range.size(align(info.start_address - aligned_range.begin() + info.size(), vm::page_size));
}
else
{
aligned_size = info.size() & ~(vm::page_size - 1);
u32 aligned_size = info.size() & ~(vm::page_size - 1);
if (!aligned_size)
{
aligned_address = info.start_address & ~(vm::page_size - 1);
aligned_size = align(info.size() + info.start_address - aligned_address, vm::page_size);
aligned_range.begin(info.start_address & ~(vm::page_size - 1));
aligned_range.size(align(info.size() + info.start_address - aligned_range.begin(), vm::page_size));
}
else
{
aligned_address = align(info.start_address, vm::page_size);
aligned_range.begin(align(info.start_address, vm::page_size));
aligned_range.size(aligned_size);
}
}
std::vector<std::list<protected_region>::iterator> regions = find_regions(aligned_address, aligned_size);
std::vector<std::list<protected_region>::iterator> regions = find_regions(aligned_range);
protected_region *region;
if (regions.empty())
{
m_protected_regions.emplace_back();
region = &m_protected_regions.back();
region->pages_count = aligned_size / vm::page_size;
region->start_address = aligned_address;
region->set(aligned_range);
}
else
{
@ -714,14 +695,14 @@ namespace gl
m_protected_regions.erase(regions[index]);
}
if (region->start_address > aligned_address)
if (region->begin() > aligned_range.begin())
{
region->pages_count += (region->start_address - aligned_address) / vm::page_size;
region->start_address = aligned_address;
region->end(region->end() + (region->begin() - aligned_range.begin()));
region->begin(aligned_range.begin());
}
u32 new_pages_count = (aligned_address + aligned_size - region->start_address) / vm::page_size;
region->pages_count = std::max(region->pages_count, new_pages_count);
u32 new_size = aligned_range.end() - region->begin();
region->size(std::max(region->size(), new_size));
}
cached_texture *result = region->find(info);
@ -740,12 +721,7 @@ namespace gl
{
for (auto& entry : m_protected_regions)
{
if (entry.start_address > address)
{
continue;
}
if (address >= entry.start_address && address < entry.start_address + entry.size())
if (entry.contains(address))
{
return &entry;
}
@ -754,23 +730,16 @@ namespace gl
return nullptr;
}
std::vector<std::list<protected_region>::iterator> texture_cache::find_regions(u32 address, u32 size)
std::vector<std::list<protected_region>::iterator> texture_cache::find_regions(range<u32> range)
{
std::vector<std::list<protected_region>::iterator> result;
for (auto it = m_protected_regions.begin(); it != m_protected_regions.end(); ++it)
{
if (it->start_address >= address + size)
if (it->overlaps(range))
{
continue;
result.push_back(it);
}
if (it->start_address + it->size() <= address)
{
continue;
}
result.push_back(it);
}
return result;

View File

@ -1,7 +1,8 @@
#pragma once
#include <vector>
#include "Utilities/types.h"
#include "gl_helpers.h"
#include <Utilities/types.h>
#include <Utilities/range.h>
namespace gl
{
@ -74,6 +75,11 @@ namespace gl
{
return compressed_size ? compressed_size : height * pitch * depth;
}
range<u32> range() const
{
return{ start_address, start_address + size() };
}
};
struct protected_region;
@ -116,24 +122,16 @@ namespace gl
friend protected_region;
};
struct protected_region
struct protected_region : range<u32>
{
u32 start_address;
u32 pages_count;
private:
std::unordered_map<texture_info, cached_texture, fnv_1a_hasher, bitwise_equals> m_textures;
u32 m_current_protection = 0;
public:
u32 size() const
{
return pages_count * vm::page_size;
}
cache_access requires_protection() const;
void for_each(std::function<void(cached_texture& texture)> callback);
void for_each(u32 start_address, u32 size, std::function<void(cached_texture& texture)> callback);
void for_each(range<u32> range, std::function<void(cached_texture& texture)> callback);
void protect();
void unprotect(cache_access access = cache_access::read_write);
bool empty() const;
@ -154,7 +152,7 @@ namespace gl
public:
cached_texture &entry(const texture_info &info, cache_buffers sync = cache_buffers::none);
protected_region *find_region(u32 address);
std::vector<std::list<protected_region>::iterator> find_regions(u32 address, u32 size);
std::vector<std::list<protected_region>::iterator> find_regions(range<u32> range);
void update_protection();
void clear();
};

View File

@ -353,23 +353,15 @@ namespace rsx
}
});
// TODO: exit condition
while (true)
loop([this]
{
CHECK_EMU_STATUS;
be_t<u32> get = ctrl->get;
be_t<u32> put = ctrl->put;
if (put == get || !Emu.IsRunning())
{
do_internal_task();
continue;
}
if (m_internal_task_waiters.load(std::memory_order_relaxed))
{
do_internal_task();
return;
}
const u32 cmd = ReadIO32(get);
@ -380,7 +372,7 @@ namespace rsx
u32 offs = cmd & 0x1fffffff;
//LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put);
ctrl->get = offs;
continue;
return;
}
if (cmd & CELL_GCM_METHOD_FLAG_CALL)
{
@ -388,7 +380,7 @@ namespace rsx
u32 offs = cmd & ~3;
//LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get);
ctrl->get = offs;
continue;
return;
}
if (cmd == CELL_GCM_METHOD_FLAG_RETURN)
{
@ -396,13 +388,13 @@ namespace rsx
m_call_stack.pop();
//LOG_WARNING(RSX, "rsx return(0x%x)", get);
ctrl->get = get;
continue;
return;
}
if (cmd == 0) //nop
{
ctrl->get = get + 4;
continue;
return;
}
auto args = vm::ptr<u32>::make((u32)RSXIOMem.RealAddr(get + 4));
@ -433,7 +425,7 @@ namespace rsx
}
ctrl->get = get + (count + 1) * 4;
}
});
}
std::string thread::get_name() const
@ -548,7 +540,7 @@ namespace rsx
void thread::invoke(std::function<void()> callback)
{
if (get_thread_ctrl() == thread_ctrl::get_current())
if (is_current())
{
callback();
}

View File

@ -10,6 +10,7 @@
#include "Utilities/Thread.h"
#include "Utilities/Timer.h"
#include "Utilities/convert.h"
#include "Emu/System.h"
extern u64 get_system_time();
@ -412,5 +413,39 @@ namespace rsx
u32 ReadIO32(u32 addr);
void WriteIO32(u32 addr, u32 value);
template<typename Type>
force_inline auto loop(Type function) -> std::enable_if_t<!std::is_same<decltype(function()), void>::value, void>
{
while (function())
{
CHECK_EMU_STATUS;
if (m_internal_task_waiters.load(std::memory_order_relaxed))
{
do_internal_task();
}
else
{
std::this_thread::sleep_for(1ms);
}
}
}
template<typename Type>
force_inline auto loop(Type function) -> std::enable_if_t<std::is_same<decltype(function()), void>::value, void>
{
while (true)
{
CHECK_EMU_STATUS;
if (m_internal_task_waiters.load(std::memory_order_relaxed))
{
do_internal_task();
}
function();
}
}
};
}

View File

@ -19,6 +19,36 @@ namespace rsx
template<> struct vertex_data_type_from_element_type<u8> { static const vertex_base_type type = vertex_base_type::ub; };
template<> struct vertex_data_type_from_element_type<u16> { static const vertex_base_type type = vertex_base_type::s1; };
std::atomic<uint> operations_in_progress{ 0 };
struct scoped_operation
{
~scoped_operation()
{
--operations_in_progress;
}
};
force_inline void async_operation(std::function<void()> function)
{
++operations_in_progress;
std::thread([function = std::move(function)]()
{
scoped_operation operation;
function();
}).detach();
}
std::vector<std::shared_ptr<thread_ctrl>> threads_storage;
void wait_for_operations_end(thread* rsx)
{
rsx->loop([] { return operations_in_progress > 0; });
threads_storage.clear();
}
namespace nv406e
{
force_inline void set_reference(thread* rsx, u32 arg)
@ -29,13 +59,7 @@ namespace rsx
force_inline void semaphore_acquire(thread* rsx, u32 arg)
{
//TODO: dma
while (vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg)
{
if (Emu.IsStopped())
break;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
rsx->loop([=] { return vm::ps3::read32(rsx->label_addr + method_registers[NV406E_SEMAPHORE_OFFSET]) != arg; });
}
force_inline void semaphore_release(thread* rsx, u32 arg)
@ -55,6 +79,8 @@ namespace rsx
force_inline void back_end_write_semaphore_release(thread* rsx, u32 arg)
{
wait_for_operations_end(rsx);
//TODO: dma
vm::ps3::write32(rsx->label_addr + method_registers[NV4097_SET_SEMAPHORE_OFFSET],
(arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff));
@ -217,6 +243,8 @@ namespace rsx
force_inline void set_begin_end(thread* rsx, u32 arg)
{
wait_for_operations_end(rsx);
if (arg)
{
rsx->draw_inline_vertex_array = false;
@ -323,7 +351,7 @@ namespace rsx
namespace nv3089
{
never_inline void image_in(thread *rsx, u32 arg)
force_inline void image_in(thread *rsx, u32 arg)
{
u32 operation = method_registers[NV3089_SET_OPERATION];
@ -344,6 +372,14 @@ namespace rsx
u8 in_inter = method_registers[NV3089_IMAGE_IN_FORMAT] >> 24;
u32 src_color_format = method_registers[NV3089_SET_COLOR_FORMAT];
u32 context_surface = method_registers[NV3089_SET_CONTEXT_SURFACE];
const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET];
const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE];
f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX];
f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY];
f32 in_x = (arg & 0xffff) / 16.f;
f32 in_y = (arg >> 16) / 16.f;
@ -362,16 +398,13 @@ namespace rsx
LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: unknown operation (%d)", operation);
}
const u32 src_offset = method_registers[NV3089_IMAGE_IN_OFFSET];
const u32 src_dma = method_registers[NV3089_SET_CONTEXT_DMA_IMAGE];
u32 dst_offset;
u32 dst_dma = 0;
u16 dst_color_format;
u32 out_pitch = 0;
u32 out_aligment = 64;
switch (method_registers[NV3089_SET_CONTEXT_SURFACE])
switch (context_surface)
{
case CELL_GCM_CONTEXT_SURFACE2D:
dst_dma = method_registers[NV3062_SET_CONTEXT_DMA_IMAGE_DESTIN];
@ -395,12 +428,6 @@ namespace rsx
u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
u32 in_offset = u32(in_x) * u32(in_bpp + in_pitch * in_y);
u32 out_offset = out_x * out_bpp + out_pitch * out_y;
tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma);
u32 dst_address = get_address(dst_offset + out_offset, dst_dma);
if (out_pitch == 0)
{
out_pitch = out_bpp * out_w;
@ -421,10 +448,13 @@ namespace rsx
clip_h = out_h;
}
//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
u32 in_offset = u32(in_x * in_bpp) + u32(in_pitch * in_y);
u32 out_offset = out_x * out_bpp + out_pitch * out_y;
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset);
tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf);//get_address(src_offset, src_dma);
u32 dst_address = get_address(dst_offset + out_offset, dst_dma);
//LOG_ERROR(RSX, "NV3089_IMAGE_IN_SIZE: src = 0x%x, dst = 0x%x", src_address, dst_address);
if (dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 &&
dst_color_format != CELL_GCM_TRANSFER_SURFACE_FORMAT_A8R8G8B8)
@ -442,14 +472,9 @@ namespace rsx
// method_registers[NV3089_IMAGE_IN_SIZE], in_pitch, src_offset, double(1 << 20) / (method_registers[NV3089_DS_DX]), double(1 << 20) / (method_registers[NV3089_DT_DY]),
// method_registers[NV3089_CLIP_SIZE], method_registers[NV3089_IMAGE_OUT_SIZE]);
std::unique_ptr<u8[]> temp1, temp2;
AVPixelFormat in_format = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
AVPixelFormat out_format = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
f32 scale_x = 1048576.f / method_registers[NV3089_DS_DX];
f32 scale_y = 1048576.f / method_registers[NV3089_DT_DY];
u32 convert_w = (u32)(scale_x * in_w);
u32 convert_h = (u32)(scale_y * in_h);
@ -459,146 +484,158 @@ namespace rsx
bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0;
u32 slice_h = clip_h;
u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16;
u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24;
if (src_region.tile)
// 0 indicates height of 1 pixel
if (!sw_height_log2)
{
if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2)
{
slice_h *= 2;
}
u32 size = slice_h * in_pitch;
if (size > src_region.tile->size - src_region.base)
{
u32 diff = size - (src_region.tile->size - src_region.base);
slice_h -= (diff + in_pitch - 1) / in_pitch;
}
sw_height_log2 = 1;
}
if (method_registers[NV3089_SET_CONTEXT_SURFACE] != CELL_GCM_CONTEXT_SWIZZLE2D)
async_operation([=]
{
if (need_convert || need_clip)
{
if (need_clip)
{
if (need_convert)
{
convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset);
clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
std::unique_ptr<u8[]> temp1, temp2;
u32 slice_h = clip_h;
if (src_region.tile)
{
if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2)
{
slice_h *= 2;
}
u32 size = slice_h * in_pitch;
if (size > src_region.tile->size - src_region.base)
{
u32 diff = size - (src_region.tile->size - src_region.base);
slice_h -= (diff + in_pitch - 1) / in_pitch;
}
}
if (context_surface != CELL_GCM_CONTEXT_SWIZZLE2D)
{
if (need_convert || need_clip)
{
if (need_clip)
{
if (need_convert)
{
convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
clip_image(pixels_dst + out_offset, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
}
else
{
clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
}
}
else
{
clip_image(pixels_dst + out_offset, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
}
}
else
{
convert_scale_image(pixels_dst + out_offset, out_format, out_w, out_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
if (out_pitch != in_pitch || out_pitch != out_bpp * out_w || in_pitch != in_bpp * in_w)
{
for (u32 y = 0; y < out_h; ++y)
{
u8 *dst = pixels_dst + out_pitch * y;
u8 *src = pixels_src + in_pitch * y;
std::memmove(dst, src, out_w * out_bpp);
}
}
else
{
std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h);
}
}
}
else
{
if (out_pitch != in_pitch || out_pitch != out_bpp * out_w || in_pitch != in_bpp * in_w)
if (need_convert || need_clip)
{
for (u32 y = 0; y < out_h; ++y)
if (need_clip)
{
u8 *dst = pixels_dst + out_pitch * y;
u8 *src = pixels_src + in_pitch * y;
if (need_convert)
{
convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
std::memmove(dst, src, out_w * out_bpp);
}
}
else
{
std::memmove(pixels_dst + out_offset, pixels_src, out_pitch * out_h);
}
}
}
else
{
if (need_convert || need_clip)
{
if (need_clip)
{
if (need_convert)
{
convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter ? true : false);
clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch);
}
else
{
clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
}
}
else
{
clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch);
convert_scale_image(temp2, out_format, out_w, out_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false);
}
pixels_src = temp2.get();
}
else
// swizzle based on destination size
u16 sw_width = 1 << sw_width_log2;
u16 sw_height = 1 << sw_height_log2;
temp2.reset(new u8[out_bpp * sw_width * sw_height]);
u8* linear_pixels = pixels_src;
u8* swizzled_pixels = temp2.get();
std::unique_ptr<u8[]> sw_temp;
// Check and pad texture out if we are given non square texture for swizzle to be correct
if (sw_width != out_w || sw_height != out_h)
{
convert_scale_image(temp2, out_format, out_w, out_h, out_pitch,
pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter ? true : false);
sw_temp.reset(new u8[out_bpp * sw_width * sw_height]);
switch (out_bpp)
{
case 1:
pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
case 2:
pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
case 4:
pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
break;
}
linear_pixels = sw_temp.get();
}
pixels_src = temp2.get();
}
u8 sw_width_log2 = method_registers[NV309E_SET_FORMAT] >> 16;
u8 sw_height_log2 = method_registers[NV309E_SET_FORMAT] >> 24;
// 0 indicates height of 1 pixel
sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2;
// swizzle based on destination size
u16 sw_width = 1 << sw_width_log2;
u16 sw_height = 1 << sw_height_log2;
temp2.reset(new u8[out_bpp * sw_width * sw_height]);
u8* linear_pixels = pixels_src;
u8* swizzled_pixels = temp2.get();
std::unique_ptr<u8[]> sw_temp;
// Check and pad texture out if we are given non square texture for swizzle to be correct
if (sw_width != out_w || sw_height != out_h)
{
sw_temp.reset(new u8[out_bpp * sw_width * sw_height]);
switch (out_bpp)
{
case 1:
pad_texture<u8>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 2:
pad_texture<u16>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 4:
pad_texture<u32>(linear_pixels, sw_temp.get(), out_w, out_h, sw_width, sw_height);
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
}
linear_pixels = sw_temp.get();
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
}
switch (out_bpp)
{
case 1:
convert_linear_swizzle<u8>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 2:
convert_linear_swizzle<u16>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
case 4:
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
}
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
}
});
}
}
@ -630,22 +667,32 @@ namespace rsx
out_pitch = line_length;
}
u8 *dst = (u8*)vm::base(get_address(method_registers[NV0039_OFFSET_OUT], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT]));
const u8 *src = (u8*)vm::base(get_address(method_registers[NV0039_OFFSET_IN], method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN]));
if (in_pitch == out_pitch && out_pitch == line_length)
u32 src_offset = method_registers[NV0039_OFFSET_IN];
u32 src_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_IN];
u32 dst_offset = method_registers[NV0039_OFFSET_OUT];
u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT];
async_operation([=]
{
std::memcpy(dst, src, line_length * line_count);
}
else
{
for (u32 i = 0; i < line_count; ++i)
u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma));
const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma));
if (in_pitch == out_pitch && out_pitch == line_length)
{
std::memcpy(dst, src, line_length);
dst += out_pitch;
src += in_pitch;
std::memcpy(dst, src, line_length * line_count);
}
}
else
{
for (u32 i = 0; i < line_count; ++i)
{
std::memcpy(dst, src, line_length);
dst += out_pitch;
src += in_pitch;
}
}
});
}
}

View File

@ -640,26 +640,31 @@ s32 cellGameThemeInstallFromBuffer()
s32 cellDiscGameGetBootDiscInfo()
{
return 0;
throw EXCEPTION("");
}
s32 cellDiscGameRegisterDiscChangeCallback()
{
return 0;
throw EXCEPTION("");
}
s32 cellDiscGameUnregisterDiscChangeCallback()
{
return 0;
throw EXCEPTION("");
}
s32 cellGameRegisterDiscChangeCallback()
{
return 0;
throw EXCEPTION("");
}
s32 cellGameUnregisterDiscChangeCallback()
{
return 0;
throw EXCEPTION("");
}

View File

@ -1591,6 +1591,8 @@ s32 _cellSyncLFQueueDetachLv2EventQueue(vm::ptr<u32> spus, u32 num, vm::ptr<Cell
throw EXCEPTION("");
}
static const u32 cellSyncMutexTryLock_id = get_function_id("cellSyncMutexTryLock");
Module<> cellSync("cellSync", []()
{
// setup error handler
@ -1624,13 +1626,17 @@ Module<> cellSync("cellSync", []()
// analyse error code
if (u32 code = (value & 0xffffff00) == 0x80410100 ? static_cast<u32>(value) : 0)
{
cellSync.error("%s() -> %s (0x%x)", func->name, get_error(code), code);
//CELL_SYNC_ERROR_BUSY is ok for cellSyncMutexTryLock
if (code != CELL_SYNC_ERROR_BUSY || func->id != cellSyncMutexTryLock_id)
{
cellSync.error("%s() -> %s (0x%x)", func->name, get_error(code), code);
}
}
};
REG_FUNC(cellSync, cellSyncMutexInitialize);
REG_FUNC(cellSync, cellSyncMutexLock);
REG_FUNC(cellSync, cellSyncMutexTryLock);
REG_FNID(cellSync, cellSyncMutexTryLock_id, cellSyncMutexTryLock);
REG_FUNC(cellSync, cellSyncMutexUnlock);
REG_FUNC(cellSync, cellSyncBarrierInitialize);

View File

@ -377,6 +377,7 @@
<ClInclude Include="..\Utilities\GNU.h" />
<ClInclude Include="..\Utilities\Log.h" />
<ClInclude Include="..\Utilities\File.h" />
<ClInclude Include="..\Utilities\range.h" />
<ClInclude Include="..\Utilities\rPlatform.h" />
<ClInclude Include="..\Utilities\rTime.h" />
<ClInclude Include="..\Utilities\rXml.h" />

View File

@ -1794,5 +1794,8 @@
<ClInclude Include="Emu\RSX\Common\surface_store.h">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
<ClInclude Include="..\Utilities\range.h">
<Filter>Utilities</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@ -85,7 +85,7 @@
<OutDir>$(SolutionDir)bin\</OutDir>
<LinkIncremental>false</LinkIncremental>
<RunCodeAnalysis>false</RunCodeAnalysis>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration) Library\;$(LibraryPath)</LibraryPath>
<LibraryPath>$(SolutionDir)$(Platform)\Release\;$(SolutionDir)$(Platform)\Release Library\;$(LibraryPath)</LibraryPath>
</PropertyGroup>
<ItemDefinitionGroup>
<PreBuildEvent>