1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 04:02:42 +01:00

Use aligned stores in write_index_array_data_to_buffer

Ensure that target buffer is cache line aligned.
Improve stx::make_single to support alignment.
This commit is contained in:
Nekotekina 2021-12-15 17:47:02 +03:00
parent 76ccaf5e6f
commit 262ff01619
3 changed files with 19 additions and 25 deletions

View File

@ -831,7 +831,7 @@ namespace
const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u16_mask);
max = _mm_max_epu16(max, value);
min = _mm_min_epu16(min, value);
_mm_storeu_si128(dst_stream++, value);
_mm_store_si128(dst_stream++, value);
}
const u16 min_index = sse41_hmin_epu16(min);
@ -857,7 +857,7 @@ namespace
const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u32_mask);
max = _mm_max_epu32(max, value);
min = _mm_min_epu32(min, value);
_mm_storeu_si128(dst_stream++, value);
_mm_store_si128(dst_stream++, value);
}
__m128i tmp = _mm_srli_si128(min, 8);
@ -944,7 +944,7 @@ namespace
const __m256i value_with_max_restart = _mm256_or_si256(mask, value);
max = _mm256_max_epu16(max, value_with_min_restart);
min = _mm256_min_epu16(min, value_with_max_restart);
_mm256_storeu_si256(dst_stream++, value_with_max_restart);
_mm256_store_si256(dst_stream++, value_with_max_restart);
}
__m128i tmp = _mm256_extracti128_si256(min, 1);
@ -981,7 +981,7 @@ namespace
const __m128i value_with_max_restart = _mm_or_si128(mask, value);
max = _mm_max_epu16(max, value_with_min_restart);
min = _mm_min_epu16(min, value_with_max_restart);
_mm_storeu_si128(dst_stream++, value_with_max_restart);
_mm_store_si128(dst_stream++, value_with_max_restart);
}
const u16 min_index = sse41_hmin_epu16(min);
@ -1010,7 +1010,7 @@ namespace
const __m128i value_with_max_restart = _mm_or_si128(mask, value);
max = _mm_max_epu32(max, value_with_min_restart);
min = _mm_min_epu32(min, value_with_max_restart);
_mm_storeu_si128(dst_stream++, value_with_max_restart);
_mm_store_si128(dst_stream++, value_with_max_restart);
}
__m128i tmp = _mm_srli_si128(min, 8);

View File

@ -144,15 +144,15 @@ namespace
if (emulate_restart) upload_size *= 2;
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<4>(upload_size);
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<64>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
std::span<std::byte> dst;
std::vector<std::byte> tmp;
stx::single_ptr<std::byte[]> tmp;
if (emulate_restart)
{
tmp.resize(upload_size);
dst = tmp;
tmp = stx::make_single<std::byte[], false, 64>(upload_size);
dst = std::span<std::byte>(tmp.get(), upload_size);
}
else
{
@ -182,11 +182,11 @@ namespace
{
if (index_type == rsx::index_array_type::u16)
{
index_count = rsx::remove_restart_index(static_cast<u16*>(buf), reinterpret_cast<u16*>(tmp.data()), index_count, u16{umax});
index_count = rsx::remove_restart_index(static_cast<u16*>(buf), reinterpret_cast<u16*>(tmp.get()), index_count, u16{umax});
}
else
{
index_count = rsx::remove_restart_index(static_cast<u32*>(buf), reinterpret_cast<u32*>(tmp.data()), index_count, u32{umax});
index_count = rsx::remove_restart_index(static_cast<u32*>(buf), reinterpret_cast<u32*>(tmp.get()), index_count, u32{umax});
}
}

View File

@ -49,7 +49,7 @@ namespace stx
// Control block with data and reference counter
template <typename T>
class alignas(T) shared_data final : align_filler<sizeof(shared_counter), alignof(T)>
class shared_data final : align_filler<sizeof(shared_counter), alignof(T)>
{
public:
shared_counter m_ctr{};
@ -64,7 +64,7 @@ namespace stx
};
template <typename T>
class alignas(T) shared_data<T[]> final : align_filler<sizeof(shared_counter) + sizeof(usz), alignof(T)>
class shared_data<T[]> final : align_filler<sizeof(shared_counter) + sizeof(usz), alignof(T)>
{
public:
usz m_count{};
@ -98,8 +98,6 @@ namespace stx
friend class atomic_ptr;
public:
using pointer = T*;
using element_type = std::remove_extent_t<T>;
constexpr single_ptr() noexcept = default;
@ -109,7 +107,7 @@ namespace stx
// Default constructor or null_ptr should be used instead
[[deprecated("Use null_ptr")]] single_ptr(std::nullptr_t) = delete;
explicit single_ptr(shared_data<T>&, pointer ptr) noexcept
explicit single_ptr(shared_data<T>&, element_type* ptr) noexcept
: m_ptr(ptr)
{
}
@ -258,7 +256,7 @@ namespace stx
return single_ptr<T>(*ptr, &ptr->m_data);
}
template <typename T, bool Init = true>
template <typename T, bool Init = true, usz Align = alignof(std::remove_extent_t<T>)>
static std::enable_if_t<std::is_unbounded_array_v<T>, single_ptr<T>> make_single(usz count) noexcept
{
static_assert(sizeof(shared_data<T>) - offsetof(shared_data<T>, m_ctr) == sizeof(shared_counter));
@ -269,9 +267,9 @@ namespace stx
std::byte* bytes = nullptr;
if constexpr (alignof(etype) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
if constexpr (Align > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
{
bytes = static_cast<std::byte*>(::operator new(size, std::align_val_t{alignof(etype)}));
bytes = static_cast<std::byte*>(::operator new(size, std::align_val_t{Align}));
}
else
{
@ -305,9 +303,9 @@ namespace stx
ptr->~shared_data<T>();
if constexpr (alignof(etype) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
if constexpr (Align > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
{
::operator delete[](bytes, std::align_val_t{alignof(etype)});
::operator delete[](bytes, std::align_val_t{Align});
}
else
{
@ -347,8 +345,6 @@ namespace stx
friend class atomic_ptr;
public:
using pointer = T*;
using element_type = std::remove_extent_t<T>;
constexpr shared_ptr() noexcept = default;
@ -594,8 +590,6 @@ namespace stx
friend class atomic_ptr;
public:
using pointer = T*;
using element_type = std::remove_extent_t<T>;
using shared_type = shared_ptr<T>;