1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-22 18:53:28 +01:00

Merge pull request #1271 from vlj/d3d12

D3d12: Perf and compat improvements
This commit is contained in:
B1ackDaemon 2015-10-27 04:08:56 +02:00
commit eec64ef1c0
14 changed files with 340 additions and 197 deletions

View File

@ -26,6 +26,21 @@ union alignas(2) f16
{
u16 _u16;
u8 _u8[2];
explicit f16(u16 raw)
{
_u16 = raw;
}
explicit operator float() const
{
// See http://stackoverflow.com/a/26779139
// The conversion doesn't handle NaN/Inf
u32 raw = ((_u16 & 0x8000) << 16) | // Sign (just moved)
(((_u16 & 0x7c00) + 0x1C000) << 13) | // Exponent ( exp - 15 + 127)
((_u16 & 0x03FF) << 13); // Mantissa
return (float&)raw;
}
};
using f32 = float;

View File

@ -109,34 +109,88 @@ void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_
}
}
template<typename IndexType, typename DstType, typename SrcType>
void expandIndexedTriangleFan(DstType *dst, const SrcType *src, size_t indexCount)
template<typename IndexType>
void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index)
{
IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
for (unsigned i = 0; i < indexCount - 2; i++)
for (u32 i = 0; i < indexCount; ++i)
{
typedDst[3 * i] = typedSrc[0];
typedDst[3 * i + 1] = typedSrc[i + 2 - 1];
typedDst[3 * i + 2] = typedSrc[i + 2];
IndexType index = vm::ps3::_ref<IndexType>(address + i * sizeof(IndexType));
(IndexType&)dst[i * sizeof(IndexType)] = index;
if (is_primitive_restart_enabled && index == (IndexType)-1) // Cut
continue;
max_index = MAX2(max_index, index);
min_index = MIN2(min_index, index);
}
}
template<typename IndexType, typename DstType, typename SrcType>
void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount)
template<typename IndexType>
void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index)
{
for (unsigned i = 0; i < indexCount - 2; i++)
{
IndexType index0 = vm::ps3::_ref<IndexType>(address);
(IndexType&)dst[(3 * i) * sizeof(IndexType)] = index0;
IndexType index1 = vm::ps3::_ref<IndexType>(address + (i + 2 - 1) * sizeof(IndexType));
(IndexType&)dst[(3 * i + 1) * sizeof(IndexType)] = index1;
IndexType index2 = vm::ps3::_ref<IndexType>(address + (i + 2) * sizeof(IndexType));
(IndexType&)dst[(3 * i + 2) * sizeof(IndexType)] = index2;
if (!is_primitive_restart_enabled || index0 != (IndexType)-1) // Cut
{
min_index = MIN2(min_index, index0);
max_index = MAX2(max_index, index0);
}
if (!is_primitive_restart_enabled || index1 != (IndexType)-1) // Cut
{
min_index = MIN2(min_index, index1);
max_index = MAX2(max_index, index1);
}
if (!is_primitive_restart_enabled || index2 != (IndexType)-1) // Cut
{
min_index = MIN2(min_index, index2);
max_index = MAX2(max_index, index2);
}
}
}
template<typename IndexType>
void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index)
{
IndexType *typedDst = reinterpret_cast<IndexType *>(dst);
const IndexType *typedSrc = reinterpret_cast<const IndexType *>(src);
for (unsigned i = 0; i < indexCount / 4; i++)
{
// First triangle
typedDst[6 * i] = typedSrc[4 * i];
typedDst[6 * i + 1] = typedSrc[4 * i + 1];
typedDst[6 * i + 2] = typedSrc[4 * i + 2];
IndexType index0 = vm::ps3::_ref<IndexType>(address + 4 * i * sizeof(IndexType));
(IndexType&)dst[(6 * i) * sizeof(IndexType)] = index0;
IndexType index1 = vm::ps3::_ref<IndexType>(address + (4 * i + 1) * sizeof(IndexType));
(IndexType&)dst[(6 * i + 1) * sizeof(IndexType)] = index1;
IndexType index2 = vm::ps3::_ref<IndexType>(address + (4 * i + 2) * sizeof(IndexType));
(IndexType&)dst[(6 * i + 2) * sizeof(IndexType)] = index2;
// Second triangle
typedDst[6 * i + 3] = typedSrc[4 * i + 2];
typedDst[6 * i + 4] = typedSrc[4 * i + 3];
typedDst[6 * i + 5] = typedSrc[4 * i];
(IndexType&)dst[(6 * i + 3) * sizeof(IndexType)] = index2;
IndexType index3 = vm::ps3::_ref<IndexType>(address + (4 * i + 3) * sizeof(IndexType));
(IndexType&)dst[(6 * i + 4) * sizeof(IndexType)] = index3;
(IndexType&)dst[(6 * i + 5) * sizeof(IndexType)] = index0;
if (!is_primitive_restart_enabled || index0 != (IndexType)-1) // Cut
{
min_index = MIN2(min_index, index0);
max_index = MAX2(max_index, index0);
}
if (!is_primitive_restart_enabled || index1 != (IndexType)-1) // Cut
{
min_index = MIN2(min_index, index1);
max_index = MAX2(max_index, index1);
}
if (!is_primitive_restart_enabled || index2 != (IndexType)-1) // Cut
{
min_index = MIN2(min_index, index2);
max_index = MAX2(max_index, index2);
}
if (!is_primitive_restart_enabled || index3 != (IndexType)-1) // Cut
{
min_index = MIN2(min_index, index3);
max_index = MAX2(max_index, index3);
}
}
}
@ -178,79 +232,84 @@ size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count)
}
}
void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count)
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned draw_mode, unsigned first, unsigned count)
{
if (indexBuffer != nullptr)
unsigned short *typedDst = (unsigned short *)(dst);
switch (draw_mode)
{
switch (m_draw_mode)
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
for (unsigned i = 0; i < (count - 2); i++)
{
case CELL_GCM_PRIMITIVE_POINTS:
case CELL_GCM_PRIMITIVE_LINES:
case CELL_GCM_PRIMITIVE_LINE_LOOP:
case CELL_GCM_PRIMITIVE_LINE_STRIP:
case CELL_GCM_PRIMITIVE_TRIANGLES:
case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
case CELL_GCM_PRIMITIVE_QUAD_STRIP:
case CELL_GCM_PRIMITIVE_POLYGON:
typedDst[3 * i] = first;
typedDst[3 * i + 1] = i + 2 - 1;
typedDst[3 * i + 2] = i + 2;
}
return;
case CELL_GCM_PRIMITIVE_QUADS:
for (unsigned i = 0; i < count / 4; i++)
{
size_t indexSize = (index_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32) ? 4 : 2;
memcpy(bufferMap, indexBuffer, indexSize * element_count);
return;
}
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
switch (index_type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedTriangleFan<unsigned int>(bufferMap, indexBuffer, element_count);
return;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedTriangleFan<unsigned short>(bufferMap, indexBuffer, element_count);
return;
default:
abort();
return;
}
case CELL_GCM_PRIMITIVE_QUADS:
switch (index_type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedQuads<unsigned int>(bufferMap, indexBuffer, element_count);
return;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedQuads<unsigned short>(bufferMap, indexBuffer, element_count);
return;
default:
abort();
return;
}
// First triangle
typedDst[6 * i] = 4 * i + first;
typedDst[6 * i + 1] = 4 * i + 1 + first;
typedDst[6 * i + 2] = 4 * i + 2 + first;
// Second triangle
typedDst[6 * i + 3] = 4 * i + 2 + first;
typedDst[6 * i + 4] = 4 * i + 3 + first;
typedDst[6 * i + 5] = 4 * i + first;
}
return;
}
else
}
void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index)
{
u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_INDEX_ARRAY_ADDRESS], rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] & 0xf);
u32 type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4;
u32 type_size = type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32 ? sizeof(u32) : sizeof(u16);
u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET];
u32 base_index = 0;//rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX];
bool is_primitive_restart_enabled = !!rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE];
switch (m_draw_mode)
{
unsigned short *typedDst = static_cast<unsigned short *>(bufferMap);
switch (m_draw_mode)
case CELL_GCM_PRIMITIVE_POINTS:
case CELL_GCM_PRIMITIVE_LINES:
case CELL_GCM_PRIMITIVE_LINE_LOOP:
case CELL_GCM_PRIMITIVE_LINE_STRIP:
case CELL_GCM_PRIMITIVE_TRIANGLES:
case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP:
case CELL_GCM_PRIMITIVE_QUAD_STRIP:
case CELL_GCM_PRIMITIVE_POLYGON:
switch (type)
{
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
for (unsigned i = 0; i < (element_count - 2); i++)
{
typedDst[3 * i] = 0;
typedDst[3 * i + 1] = i + 2 - 1;
typedDst[3 * i + 2] = i + 2;
}
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
uploadAsIt<u32>(dst, address + (first + base_index) * sizeof(u32), count, is_primitive_restart_enabled, min_index, max_index);
return;
case CELL_GCM_PRIMITIVE_QUADS:
for (unsigned i = 0; i < element_count / 4; i++)
{
// First triangle
typedDst[6 * i] = 4 * i;
typedDst[6 * i + 1] = 4 * i + 1;
typedDst[6 * i + 2] = 4 * i + 2;
// Second triangle
typedDst[6 * i + 3] = 4 * i + 2;
typedDst[6 * i + 4] = 4 * i + 3;
typedDst[6 * i + 5] = 4 * i;
}
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
uploadAsIt<u16>(dst, address + (first + base_index) * sizeof(u16), count, is_primitive_restart_enabled, min_index, max_index);
return;
}
return;
case CELL_GCM_PRIMITIVE_TRIANGLE_FAN:
switch (type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedTriangleFan<u32>(dst, address + (first + base_index) * sizeof(u32), count, is_primitive_restart_enabled, min_index, max_index);
return;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedTriangleFan<u16>(dst, address + (first + base_index) * sizeof(u16), count, is_primitive_restart_enabled, min_index, max_index);
return;
}
case CELL_GCM_PRIMITIVE_QUADS:
switch (type)
{
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
expandIndexedQuads<u32>(dst, address + (first + base_index) * sizeof(u32), count, is_primitive_restart_enabled, min_index, max_index);
return;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
expandIndexedQuads<u16>(dst, address + (first + base_index) * sizeof(u16), count, is_primitive_restart_enabled, min_index, max_index);
return;
}
}

View File

@ -35,6 +35,13 @@ bool isNativePrimitiveMode(unsigned m_draw_mode);
size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count);
/*
* Write index information to bufferMap
* Write count indexes starting at first to dst buffer.
* Returns min/max index found during the process.
* The function expands index buffer for non native primitive type.
*/
void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count);
void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index);
/*
* Write index data needed to emulate non indexed non native primitive mode.
*/
void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count);

View File

@ -118,13 +118,17 @@ void D3D12GSRender::load_vertex_data(u32 first, u32 count)
vertex_draw_count += count;
}
void D3D12GSRender::upload_vertex_attributes()
void D3D12GSRender::upload_vertex_attributes(const std::vector<std::pair<u32, u32> > &vertex_ranges)
{
m_vertex_buffer_views.clear();
m_IASet.clear();
size_t inputSlot = 0;
size_t vertex_count = 0;
for (const auto &pair : vertex_ranges)
vertex_count += pair.second;
// First array attribute
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
@ -136,14 +140,14 @@ void D3D12GSRender::upload_vertex_attributes()
u32 type_size = rsx::get_vertex_type_size(info.type);
u32 element_size = type_size * info.size;
size_t subBufferSize = element_size * vertex_draw_count;
size_t subBufferSize = element_size * vertex_count;
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
for (const auto &range : m_first_count_pairs)
for (const auto &range : vertex_ranges)
{
write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info);
bufferMap = (char*)bufferMap + range.second * element_size;
@ -210,73 +214,11 @@ void D3D12GSRender::upload_vertex_attributes()
IAElement.InstanceDataStepRate = 1;
m_IASet.push_back(IAElement);
}
m_first_count_pairs.clear();
}
D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw)
void D3D12GSRender::load_vertex_index_data(u32 first, u32 count)
{
D3D12_INDEX_BUFFER_VIEW indexBufferView = {};
// No need for index buffer
if (!indexed_draw && isNativePrimitiveMode(draw_mode))
{
m_renderingInfo.m_indexed = false;
m_renderingInfo.m_count = vertex_draw_count;
m_renderingInfo.m_baseVertex = 0;
return indexBufferView;
}
m_renderingInfo.m_indexed = true;
u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4;
// Index type
size_t indexSize;
if (!indexed_draw)
{
indexBufferView.Format = DXGI_FORMAT_R16_UINT;
indexSize = 2;
}
else
{
switch (indexed_type)
{
default: abort();
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16:
indexBufferView.Format = DXGI_FORMAT_R16_UINT;
indexSize = 2;
break;
case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32:
indexBufferView.Format = DXGI_FORMAT_R32_UINT;
indexSize = 4;
break;
}
}
// Index count
m_renderingInfo.m_count = getIndexCount(draw_mode, indexed_draw ? (u32)(vertex_index_array.size() / indexSize) : vertex_draw_count);
// Base vertex
if (!indexed_draw && isNativePrimitiveMode(draw_mode))
m_renderingInfo.m_baseVertex = 0;
else
m_renderingInfo.m_baseVertex = 0;
// Alloc
size_t subBufferSize = align(m_renderingInfo.m_count * indexSize, 64);
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
uploadIndexData(draw_mode, indexed_type, indexed_draw ? vertex_index_array.data() : nullptr, bufferMap, indexed_draw ? (u32)(vertex_index_array.size() / indexSize) : vertex_draw_count);
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
m_timers.m_bufferUploadSize += subBufferSize;
indexBufferView.SizeInBytes = (UINT)subBufferSize;
indexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset;
return indexBufferView;
}
void D3D12GSRender::setScaleOffset(size_t descriptorIndex)
@ -419,4 +361,83 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer(size_t descriptorIndex)
CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV));
}
void D3D12GSRender::upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist)
{
// Index count
m_renderingInfo.m_count = 0;
for (const auto &pair : m_first_count_pairs)
m_renderingInfo.m_count += getIndexCount(draw_mode, pair.second);
if (!m_renderingInfo.m_indexed)
{
// Non indexed
upload_vertex_attributes(m_first_count_pairs);
cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
if (isNativePrimitiveMode(draw_mode))
return;
// Handle non native primitive
// Alloc
size_t subBufferSize = align(m_renderingInfo.m_count * sizeof(u16), 64);
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
size_t first = 0;
for (const auto &pair : m_first_count_pairs)
{
size_t element_count = getIndexCount(draw_mode, pair.second);
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)bufferMap, draw_mode, first, pair.second);
bufferMap = (char*)bufferMap + element_count * sizeof(u16);
first += pair.second;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_INDEX_BUFFER_VIEW indexBufferView = {
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset,
(UINT)subBufferSize,
DXGI_FORMAT_R16_UINT
};
cmdlist->IASetIndexBuffer(&indexBufferView);
m_renderingInfo.m_indexed = true;
}
else
{
u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4;
// Index type
size_t indexSize = (indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? 2 : 4;
// Alloc
size_t subBufferSize = align(m_renderingInfo.m_count * indexSize, 64);
assert(m_vertexIndexData.canAlloc(subBufferSize));
size_t heapOffset = m_vertexIndexData.alloc(subBufferSize);
void *buffer;
ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer));
void *bufferMap = (char*)buffer + heapOffset;
u32 min_index = (u32)-1, max_index = 0;
for (const auto &pair : m_first_count_pairs)
{
size_t element_count = getIndexCount(draw_mode, pair.second);
write_index_array_data_to_buffer((char*)bufferMap, draw_mode, pair.first, pair.second, min_index, max_index);
bufferMap = (char*)bufferMap + element_count * indexSize;
}
m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize));
D3D12_INDEX_BUFFER_VIEW indexBufferView = {
m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset,
(UINT)subBufferSize,
(indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT
};
m_timers.m_bufferUploadSize += subBufferSize;
cmdlist->IASetIndexBuffer(&indexBufferView);
m_renderingInfo.m_indexed = true;
upload_vertex_attributes({ std::make_pair(0, max_index + 1) });
cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
}
}
#endif

View File

@ -244,7 +244,7 @@ D3D12GSRender::D3D12GSRender()
m_rtts.Init(m_device.Get());
m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE);
if (rpcs3::config.rsx.d3d12.overlay.value())
@ -413,13 +413,7 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> vertexIndexDurationStart = std::chrono::system_clock::now();
if (!vertex_index_array.empty() || vertex_draw_count)
{
upload_vertex_attributes();
const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty());
getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data());
if (m_renderingInfo.m_indexed)
getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView);
}
upload_vertex_index_data(getCurrentResourceStorage().m_commandList.Get());
std::chrono::time_point<std::chrono::system_clock> vertexIndexDurationEnd = std::chrono::system_clock::now();
m_timers.m_vertexIndexDuration += std::chrono::duration_cast<std::chrono::microseconds>(vertexIndexDurationEnd - vertexIndexDurationStart).count();
@ -434,7 +428,7 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> programLoadEnd = std::chrono::system_clock::now();
m_timers.m_programLoadDuration += std::chrono::duration_cast<std::chrono::microseconds>(programLoadEnd - programLoadStart).count();
getCurrentResourceStorage().m_commandList->SetGraphicsRootSignature(m_rootSignatures[m_PSO->second].Get());
getCurrentResourceStorage().m_commandList->SetGraphicsRootSignature(m_rootSignatures[std::get<2>(*m_PSO)].Get());
getCurrentResourceStorage().m_commandList->OMSetStencilRef(rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]);
std::chrono::time_point<std::chrono::system_clock> constantsDurationStart = std::chrono::system_clock::now();
@ -448,15 +442,15 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> constantsDurationEnd = std::chrono::system_clock::now();
m_timers.m_constantsDuration += std::chrono::duration_cast<std::chrono::microseconds>(constantsDurationEnd - constantsDurationStart).count();
getCurrentResourceStorage().m_commandList->SetPipelineState(m_PSO->first);
getCurrentResourceStorage().m_commandList->SetPipelineState(std::get<0>(*m_PSO));
std::chrono::time_point<std::chrono::system_clock> textureDurationStart = std::chrono::system_clock::now();
if (m_PSO->second > 0)
if (std::get<2>(*m_PSO) > 0)
{
size_t usedTexture = UploadTextures(getCurrentResourceStorage().m_commandList.Get(), currentDescriptorIndex + 3);
// Fill empty slots
for (; usedTexture < m_PSO->second; usedTexture++)
for (; usedTexture < std::get<2>(*m_PSO); usedTexture++)
{
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
@ -595,9 +589,9 @@ void D3D12GSRender::end()
}
if (m_renderingInfo.m_indexed)
getCurrentResourceStorage().m_commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, (UINT)m_renderingInfo.m_baseVertex, 0);
getCurrentResourceStorage().m_commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0, 0);
else
getCurrentResourceStorage().m_commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, (UINT)m_renderingInfo.m_baseVertex, 0);
getCurrentResourceStorage().m_commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0);
vertex_index_array.clear();
std::chrono::time_point<std::chrono::system_clock> endDuration = std::chrono::system_clock::now();
@ -610,7 +604,8 @@ void D3D12GSRender::end()
m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().m_commandList.GetAddressOf());
getCurrentResourceStorage().setNewCommandList();
}
m_first_count_pairs.clear();
m_renderingInfo.m_indexed = false;
thread::end();
}

View File

@ -309,7 +309,7 @@ private:
RSXFragmentProgram fragment_program;
PipelineStateObjectCache m_cachePSO;
std::pair<ID3D12PipelineState *, size_t> *m_PSO;
std::tuple<ID3D12PipelineState *, std::vector<size_t>, size_t> *m_PSO;
std::unordered_map<u32, color4f> local_transform_constants;
struct
@ -405,19 +405,18 @@ private:
ResourceStorage &getNonCurrentResourceStorage();
// Constants storage
DataHeap<ID3D12Resource, 256> m_constantsData;
DataHeap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_constantsData;
// Vertex storage
DataHeap<ID3D12Resource, 256> m_vertexIndexData;
DataHeap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_vertexIndexData;
// Texture storage
DataHeap<ID3D12Resource, 65536> m_textureUploadData;
DataHeap<ID3D12Heap, 65536> m_UAVHeap;
DataHeap<ID3D12Heap, 65536> m_readbackResources;
DataHeap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> m_textureUploadData;
DataHeap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_UAVHeap;
DataHeap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_readbackResources;
struct
{
bool m_indexed; /*<! is draw call using an index buffer */
size_t m_count; /*<! draw call vertex count */
size_t m_baseVertex; /*<! Starting vertex for draw call */
} m_renderingInfo;
RenderTargets m_rtts;
@ -458,19 +457,19 @@ private:
bool LoadProgram();
/**
* Create vertex and index buffers (if needed) and set them to cmdlist.
* Non native primitive type are emulated by index buffers expansion.
*/
void upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist);
std::vector<std::pair<u32, u32> > m_first_count_pairs;
/**
* Upload all vertex attribute whose (first, count) info were previously accumulated.
* Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges.
* A range in vertex_range is a pair whose first element is the index of the beginning of the
* range, and whose second element is the number of vertex in this range.
*/
void upload_vertex_attributes();
/**
* Create index buffer for indexed rendering and non native primitive format if nedded, and
* update m_renderingInfo member accordingly. If m_renderingInfo::m_indexed is true,
* returns an index buffer view that can be passed to a command list.
*/
D3D12_INDEX_BUFFER_VIEW uploadIndexBuffers(bool indexed_draw = false);
void upload_vertex_attributes(const std::vector<std::pair<u32, u32> > &vertex_ranges);
void setScaleOffset(size_t descriptorIndex);
void FillVertexShaderConstantsBuffer(size_t descriptorIndex);
@ -504,4 +503,5 @@ protected:
virtual void flip(int buffer) override;
virtual void load_vertex_data(u32 first, u32 count) override;
virtual void load_vertex_index_data(u32 first, u32 count) override;
};

View File

@ -292,6 +292,9 @@ bool D3D12GSRender::LoadProgram()
prop.Blend.RenderTarget[i].RenderTargetWriteMask = mask;
prop.IASet = m_IASet;
if (!!rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE])
prop.CutValue = ((rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4) == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32) ?
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF : D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
m_PSO = m_cachePSO.getGraphicPipelineState(&vertex_program, &fragment_program, prop, std::make_pair(m_device.Get(), m_rootSignatures));
return m_PSO != nullptr;

View File

@ -16,6 +16,7 @@ struct D3D12PipelineProperties
unsigned numMRT : 3;
D3D12_DEPTH_STENCIL_DESC DepthStencil;
D3D12_RASTERIZER_DESC Rasterization;
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE CutValue;
bool operator==(const D3D12PipelineProperties &in) const
{
@ -90,6 +91,7 @@ public:
u32 id;
ComPtr<ID3DBlob> bytecode;
std::vector<size_t> vertex_shader_inputs;
std::vector<size_t> FragmentConstantOffsetCache;
size_t m_textureCount;
@ -103,11 +105,40 @@ public:
void Compile(const std::string &code, enum class SHADER_TYPE st);
};
static
bool has_attribute(size_t attribute, const std::vector<D3D12_INPUT_ELEMENT_DESC> &desc)
{
for (const auto &attribute_desc : desc)
{
if (attribute_desc.SemanticIndex == attribute)
return true;
}
return false;
}
static
std::vector<D3D12_INPUT_ELEMENT_DESC> completes_IA_desc(const std::vector<D3D12_INPUT_ELEMENT_DESC> &desc, const std::vector<size_t> &inputs)
{
std::vector<D3D12_INPUT_ELEMENT_DESC> result(desc);
for (size_t attribute : inputs)
{
if (has_attribute(attribute, desc))
continue;
D3D12_INPUT_ELEMENT_DESC extra_ia_desc = {};
extra_ia_desc.SemanticIndex = (UINT)attribute;
extra_ia_desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
extra_ia_desc.SemanticName = "TEXCOORD";
extra_ia_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
result.push_back(extra_ia_desc);
}
return result;
}
struct D3D12Traits
{
typedef Shader VertexProgramData;
typedef Shader FragmentProgramData;
typedef std::pair<ID3D12PipelineState *, size_t> PipelineData;
typedef std::tuple<ID3D12PipelineState *, std::vector<size_t>, size_t> PipelineData;
typedef D3D12PipelineProperties PipelineProperties;
typedef std::pair<ID3D12Device *, ComPtr<ID3D12RootSignature> *> ExtraData;
@ -144,7 +175,7 @@ struct D3D12Traits
D3D12VertexProgramDecompiler VS(RSXVP->data);
std::string shaderCode = VS.Decompile();
vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX);
vertexProgramData.vertex_shader_inputs = VS.input_slots;
// TODO: This shouldn't use current dir
std::string filename = "./VertexProgram" + std::to_string(ID) + ".hlsl";
fs::file(filename, fom::write | fom::create | fom::trunc).write(shaderCode.c_str(), shaderCode.size());
@ -155,7 +186,7 @@ struct D3D12Traits
PipelineData *BuildProgram(VertexProgramData &vertexProgramData, FragmentProgramData &fragmentProgramData, const PipelineProperties &pipelineProperties, const ExtraData& extraData)
{
std::pair<ID3D12PipelineState *, size_t> *result = new std::pair<ID3D12PipelineState *, size_t>();
std::tuple<ID3D12PipelineState *, std::vector<size_t>, size_t> *result = new std::tuple<ID3D12PipelineState *, std::vector<size_t>, size_t>();
D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {};
if (vertexProgramData.bytecode == nullptr)
@ -169,7 +200,7 @@ struct D3D12Traits
graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer();
graphicPipelineStateDesc.pRootSignature = extraData.second[fragmentProgramData.m_textureCount].Get();
result->second = fragmentProgramData.m_textureCount;
std::get<2>(*result) = fragmentProgramData.m_textureCount;
graphicPipelineStateDesc.BlendState = pipelineProperties.Blend;
graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil;
@ -181,20 +212,25 @@ struct D3D12Traits
graphicPipelineStateDesc.RTVFormats[i] = pipelineProperties.RenderTargetsFormat;
graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat;
graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data();
graphicPipelineStateDesc.InputLayout.NumElements = (UINT)pipelineProperties.IASet.size();
const std::vector<D3D12_INPUT_ELEMENT_DESC> &completed_IA_desc = completes_IA_desc(pipelineProperties.IASet, vertexProgramData.vertex_shader_inputs);
graphicPipelineStateDesc.InputLayout.pInputElementDescs = completed_IA_desc.data();
graphicPipelineStateDesc.InputLayout.NumElements = (UINT)completed_IA_desc.size();
graphicPipelineStateDesc.SampleDesc.Count = 1;
graphicPipelineStateDesc.SampleMask = UINT_MAX;
graphicPipelineStateDesc.NodeMask = 1;
extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result->first));
graphicPipelineStateDesc.IBStripCutValue = pipelineProperties.CutValue;
extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&std::get<0>(*result)));
std::get<1>(*result) = vertexProgramData.vertex_shader_inputs;
return result;
}
static
void DeleteProgram(PipelineData *ptr)
{
ptr->first->Release();
std::get<0>(*ptr)->Release();
delete ptr;
}
};

View File

@ -130,7 +130,7 @@ ComPtr<ID3D12Resource> uploadSingleTexture(
const rsx::texture &texture,
ID3D12Device *device,
ID3D12GraphicsCommandList *commandList,
DataHeap<ID3D12Resource, 65536> &textureBuffersHeap)
DataHeap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &textureBuffersHeap)
{
ComPtr<ID3D12Resource> vramTexture;
size_t w = texture.width(), h = texture.height();
@ -181,7 +181,7 @@ static
void updateExistingTexture(
const rsx::texture &texture,
ID3D12GraphicsCommandList *commandList,
DataHeap<ID3D12Resource, 65536> &textureBuffersHeap,
DataHeap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &textureBuffersHeap,
ID3D12Resource *existingTexture)
{
size_t w = texture.width(), h = texture.height();

View File

@ -43,9 +43,13 @@ void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const st
for (const ParamType PT : inputs)
{
for (const ParamItem &PI : PT.items)
{
OS << " " << PT.type << " " << PI.name << ": TEXCOORD" << PI.location << ";" << std::endl;
input_slots.push_back(PI.location);
}
}
OS << "};" << std::endl;
}
void D3D12VertexProgramDecompiler::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
@ -132,6 +136,8 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS)
OS << " " << PT.type << " " << PI.name;
if (!PI.value.empty())
OS << " = " << PI.value;
else
OS << " = " << "float4(0., 0., 0., 0.);";
OS << ";" << std::endl;
}
}

View File

@ -19,5 +19,6 @@ protected:
virtual void insertMainStart(std::stringstream &OS);
virtual void insertMainEnd(std::stringstream &OS);
public:
std::vector<size_t> input_slots;
D3D12VertexProgramDecompiler(std::vector<u32>& data);
};

View File

@ -144,9 +144,9 @@ namespace rsx
return (method_registers[NV4097_SET_TEXTURE_CONTROL1 + (m_index * 8)]);
}
u16 texture::bias() const
float texture::bias() const
{
return ((method_registers[NV4097_SET_TEXTURE_FILTER + (m_index * 8)]) & 0x1fff);
return float(f16((method_registers[NV4097_SET_TEXTURE_FILTER + (m_index * 8)]) & 0x1fff));
}
u8 texture::min_filter() const

View File

@ -44,7 +44,7 @@ namespace rsx
u32 remap() const;
// Filter
u16 bias() const;
float bias() const;
u8 min_filter() const;
u8 mag_filter() const;
u8 convolution_filter() const;

View File

@ -164,7 +164,7 @@ namespace rsx
u32 transform_program[512 * 4] = {};
virtual void load_vertex_data(u32 first, u32 count);
void load_vertex_index_data(u32 first, u32 count);
virtual void load_vertex_index_data(u32 first, u32 count);
public:
u32 ioAddress, ioSize;