mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-23 03:02:53 +01:00
rsx: ZCULL synchronization fixes
- Track asynchronous operations in RSX core - Add read barriers to force pending writes to finish. Fixes zcull delay flicker in all UE3 titles without forcing hard stall - Increase zcull latency as all writes should be synchronized now
This commit is contained in:
parent
315798b1f4
commit
2dce55d036
@ -1574,6 +1574,11 @@ void GLGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
glEndQuery(GL_ANY_SAMPLES_PASSED);
|
||||
}
|
||||
|
||||
void GLGSRender::shell_do_cleanup()
|
||||
{
|
||||
//TODO: Key cleanup requests with UID to identify resources to remove
|
||||
|
@ -367,6 +367,7 @@ public:
|
||||
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
||||
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
|
||||
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
|
||||
protected:
|
||||
void begin() override;
|
||||
|
@ -2092,7 +2092,7 @@ namespace rsx
|
||||
|
||||
//Reset zcull ctrl
|
||||
zcull_ctrl->set_active(this, false);
|
||||
zcull_ctrl->clear();
|
||||
zcull_ctrl->clear(this);
|
||||
|
||||
if (zcull_ctrl->has_pending())
|
||||
{
|
||||
@ -2142,7 +2142,7 @@ namespace rsx
|
||||
if (g_cfg.video.disable_zcull_queries)
|
||||
return;
|
||||
|
||||
zcull_ctrl->clear();
|
||||
zcull_ctrl->clear(this);
|
||||
}
|
||||
|
||||
void thread::get_zcull_stats(u32 type, vm::addr_t sink)
|
||||
@ -2153,18 +2153,13 @@ namespace rsx
|
||||
switch (type)
|
||||
{
|
||||
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||
{
|
||||
zcull_ctrl->read_report(this, sink, type);
|
||||
return;
|
||||
}
|
||||
case CELL_GCM_ZCULL_STATS:
|
||||
case CELL_GCM_ZCULL_STATS1:
|
||||
case CELL_GCM_ZCULL_STATS2:
|
||||
case CELL_GCM_ZCULL_STATS3:
|
||||
{
|
||||
//TODO
|
||||
value = (type != CELL_GCM_ZCULL_STATS3)? UINT16_MAX : 0;
|
||||
break;
|
||||
zcull_ctrl->read_report(this, sink, type);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
|
||||
@ -2181,6 +2176,14 @@ namespace rsx
|
||||
void thread::sync()
|
||||
{
|
||||
zcull_ctrl->sync(this);
|
||||
|
||||
_mm_mfence();
|
||||
verify (HERE), async_tasks_pending.load() == 0;
|
||||
}
|
||||
|
||||
void thread::read_barrier(u32 memory_address, u32 memory_range)
|
||||
{
|
||||
zcull_ctrl->read_barrier(this, memory_address, memory_range);
|
||||
}
|
||||
|
||||
void thread::notify_zcull_info_changed()
|
||||
@ -2328,6 +2331,7 @@ namespace rsx
|
||||
|
||||
m_pending_writes.push_back({});
|
||||
m_pending_writes.back().query = m_current_task;
|
||||
ptimer->async_tasks_pending++;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2342,7 +2346,7 @@ namespace rsx
|
||||
|
||||
void ZCULL_control::read_report(::rsx::thread* ptimer, vm::addr_t sink, u32 type)
|
||||
{
|
||||
if (m_current_task)
|
||||
if (m_current_task && type == CELL_GCM_ZPASS_PIXEL_CNT)
|
||||
{
|
||||
m_current_task->owned = true;
|
||||
end_occlusion_query(m_current_task);
|
||||
@ -2384,6 +2388,8 @@ namespace rsx
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
ptimer->async_tasks_pending++;
|
||||
}
|
||||
|
||||
void ZCULL_control::allocate_new_query(::rsx::thread* ptimer)
|
||||
@ -2436,7 +2442,7 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
void ZCULL_control::clear()
|
||||
void ZCULL_control::clear(class ::rsx::thread* ptimer)
|
||||
{
|
||||
if (!m_pending_writes.empty())
|
||||
{
|
||||
@ -2449,6 +2455,7 @@ namespace rsx
|
||||
discard_occlusion_query(It->query);
|
||||
It->query->pending = false;
|
||||
valid_size--;
|
||||
ptimer->async_tasks_pending--;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2470,9 +2477,27 @@ namespace rsx
|
||||
m_cycles_delay = max_zcull_cycles_delay;
|
||||
}
|
||||
|
||||
void ZCULL_control::write(vm::addr_t sink, u32 timestamp, u32 value)
|
||||
void ZCULL_control::write(vm::addr_t sink, u32 timestamp, u32 type, u32 value)
|
||||
{
|
||||
verify(HERE), sink;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||
value = value ? UINT16_MAX : 0;
|
||||
break;
|
||||
case CELL_GCM_ZCULL_STATS3:
|
||||
value = value ? 0 : UINT16_MAX;
|
||||
break;
|
||||
case CELL_GCM_ZCULL_STATS2:
|
||||
case CELL_GCM_ZCULL_STATS1:
|
||||
case CELL_GCM_ZCULL_STATS:
|
||||
default:
|
||||
//Not implemented
|
||||
value = UINT32_MAX;
|
||||
break;
|
||||
}
|
||||
|
||||
vm::ptr<CellGcmReportData> out = sink;
|
||||
out->value = value;
|
||||
out->timer = timestamp;
|
||||
@ -2520,7 +2545,7 @@ namespace rsx
|
||||
|
||||
if (!writer.forwarder)
|
||||
//No other queries in the chain, write result
|
||||
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
|
||||
write(writer.sink, ptimer->timestamp(), writer.type, result);
|
||||
|
||||
processed++;
|
||||
}
|
||||
@ -2555,10 +2580,13 @@ namespace rsx
|
||||
else
|
||||
It = m_statistics_map.erase(It);
|
||||
}
|
||||
|
||||
//Decrement jobs counter
|
||||
ptimer->async_tasks_pending -= processed;
|
||||
}
|
||||
|
||||
//Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available
|
||||
m_cycles_delay = 2;
|
||||
m_cycles_delay = min_zcull_cycles_delay;
|
||||
}
|
||||
|
||||
void ZCULL_control::update(::rsx::thread* ptimer)
|
||||
@ -2644,7 +2672,7 @@ namespace rsx
|
||||
//only zpass supported right now
|
||||
if (!writer.forwarder)
|
||||
//No other queries in the chain, write result
|
||||
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
|
||||
write(writer.sink, ptimer->timestamp(), writer.type, result);
|
||||
|
||||
processed++;
|
||||
}
|
||||
@ -2669,6 +2697,24 @@ namespace rsx
|
||||
{
|
||||
m_pending_writes.resize(0);
|
||||
}
|
||||
|
||||
ptimer->async_tasks_pending -= processed;
|
||||
}
|
||||
}
|
||||
|
||||
void ZCULL_control::read_barrier(::rsx::thread* ptimer, u32 memory_address, u32 memory_range)
|
||||
{
|
||||
if (m_pending_writes.empty())
|
||||
return;
|
||||
|
||||
const auto memory_end = memory_address + memory_range;
|
||||
for (const auto &writer : m_pending_writes)
|
||||
{
|
||||
if (writer.sink >= memory_address && writer.sink < memory_end)
|
||||
{
|
||||
sync(ptimer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -185,9 +185,8 @@ namespace rsx
|
||||
struct ZCULL_control
|
||||
{
|
||||
//Delay in 'cycles' before a report update operation is forced to retire
|
||||
//Larger values might give more performance but some engines (UE3) dont seem to wait for results and will flicker
|
||||
//TODO: Determine the real max delay in real hardware
|
||||
const u32 max_zcull_cycles_delay = 10;
|
||||
const u32 max_zcull_cycles_delay = 128;
|
||||
const u32 min_zcull_cycles_delay = 16;
|
||||
|
||||
//Number of occlusion query slots available. Real hardware actually has far fewer units before choking
|
||||
const u32 occlusion_query_count = 128;
|
||||
@ -200,7 +199,7 @@ namespace rsx
|
||||
occlusion_query_info* m_current_task = nullptr;
|
||||
u32 m_statistics_tag_id = 0;
|
||||
u32 m_tsc = 0;
|
||||
u32 m_cycles_delay = 10;
|
||||
u32 m_cycles_delay = max_zcull_cycles_delay;
|
||||
|
||||
std::vector<queued_report_write> m_pending_writes;
|
||||
std::unordered_map<u32, u32> m_statistics_map;
|
||||
@ -211,7 +210,7 @@ namespace rsx
|
||||
void set_enabled(class ::rsx::thread* ptimer, bool enabled);
|
||||
void set_active(class ::rsx::thread* ptimer, bool active);
|
||||
|
||||
void write(vm::addr_t sink, u32 timestamp, u32 value);
|
||||
void write(vm::addr_t sink, u32 timestamp, u32 type, u32 value);
|
||||
|
||||
//Read current zcull statistics into the address provided
|
||||
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
|
||||
@ -220,11 +219,14 @@ namespace rsx
|
||||
void allocate_new_query(class ::rsx::thread* ptimer);
|
||||
|
||||
//clears current stat block and increments stat_tag_id
|
||||
void clear();
|
||||
void clear(class ::rsx::thread* ptimer);
|
||||
|
||||
//forcefully flushes all
|
||||
void sync(class ::rsx::thread* ptimer);
|
||||
|
||||
//conditionally sync any pending writes if range overlaps
|
||||
void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range);
|
||||
|
||||
//call once every 'tick' to update
|
||||
void update(class ::rsx::thread* ptimer);
|
||||
|
||||
@ -367,6 +369,8 @@ namespace rsx
|
||||
bool sync_point_request = false;
|
||||
bool in_begin_end = false;
|
||||
|
||||
atomic_t<s32> async_tasks_pending{ 0 };
|
||||
|
||||
bool conditional_render_test_failed = false;
|
||||
bool conditional_render_enabled = false;
|
||||
bool zcull_stats_enabled = false;
|
||||
@ -412,6 +416,7 @@ namespace rsx
|
||||
|
||||
//sync
|
||||
void sync();
|
||||
void read_barrier(u32 memory_address, u32 memory_range);
|
||||
|
||||
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
|
||||
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
||||
|
@ -724,7 +724,7 @@ std::string rsx::get_method_name(const u32 id)
|
||||
return std::string("CELL_GCM_") + found->second;
|
||||
}
|
||||
|
||||
return fmt::format("Unknown/illegal method [0x%08x]", id);
|
||||
return fmt::format("Unknown/illegal method [0x%08x]", id << 2);
|
||||
}
|
||||
|
||||
// Various parameter pretty printing function
|
||||
|
@ -689,6 +689,9 @@ namespace rsx
|
||||
in_pitch = in_bpp * in_w;
|
||||
}
|
||||
|
||||
const auto read_address = get_address(src_offset, src_dma);
|
||||
rsx->read_barrier(read_address, in_pitch * in_h);
|
||||
|
||||
if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 &&
|
||||
dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8)
|
||||
{
|
||||
@ -933,7 +936,7 @@ namespace rsx
|
||||
|
||||
namespace nv0039
|
||||
{
|
||||
void buffer_notify(thread*, u32, u32 arg)
|
||||
void buffer_notify(thread *rsx, u32, u32 arg)
|
||||
{
|
||||
s32 in_pitch = method_registers.nv0039_input_pitch();
|
||||
s32 out_pitch = method_registers.nv0039_output_pitch();
|
||||
@ -968,8 +971,11 @@ namespace rsx
|
||||
u32 dst_offset = method_registers.nv0039_output_offset();
|
||||
u32 dst_dma = method_registers.nv0039_output_location();
|
||||
|
||||
const auto read_address = get_address(src_offset, src_dma);
|
||||
rsx->read_barrier(read_address, in_pitch * line_count);
|
||||
|
||||
u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma));
|
||||
const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma));
|
||||
const u8 *src = (u8*)vm::base(read_address);
|
||||
|
||||
if (in_pitch == out_pitch && out_pitch == line_length)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user