mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-25 04:02:42 +01:00
Use aligned stores in write_index_array_data_to_buffer
Ensure that target buffer is cache line aligned. Improve stx::make_single to support alignment.
This commit is contained in:
parent
76ccaf5e6f
commit
262ff01619
@ -831,7 +831,7 @@ namespace
|
||||
const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u16_mask);
|
||||
max = _mm_max_epu16(max, value);
|
||||
min = _mm_min_epu16(min, value);
|
||||
_mm_storeu_si128(dst_stream++, value);
|
||||
_mm_store_si128(dst_stream++, value);
|
||||
}
|
||||
|
||||
const u16 min_index = sse41_hmin_epu16(min);
|
||||
@ -857,7 +857,7 @@ namespace
|
||||
const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u32_mask);
|
||||
max = _mm_max_epu32(max, value);
|
||||
min = _mm_min_epu32(min, value);
|
||||
_mm_storeu_si128(dst_stream++, value);
|
||||
_mm_store_si128(dst_stream++, value);
|
||||
}
|
||||
|
||||
__m128i tmp = _mm_srli_si128(min, 8);
|
||||
@ -944,7 +944,7 @@ namespace
|
||||
const __m256i value_with_max_restart = _mm256_or_si256(mask, value);
|
||||
max = _mm256_max_epu16(max, value_with_min_restart);
|
||||
min = _mm256_min_epu16(min, value_with_max_restart);
|
||||
_mm256_storeu_si256(dst_stream++, value_with_max_restart);
|
||||
_mm256_store_si256(dst_stream++, value_with_max_restart);
|
||||
}
|
||||
|
||||
__m128i tmp = _mm256_extracti128_si256(min, 1);
|
||||
@ -981,7 +981,7 @@ namespace
|
||||
const __m128i value_with_max_restart = _mm_or_si128(mask, value);
|
||||
max = _mm_max_epu16(max, value_with_min_restart);
|
||||
min = _mm_min_epu16(min, value_with_max_restart);
|
||||
_mm_storeu_si128(dst_stream++, value_with_max_restart);
|
||||
_mm_store_si128(dst_stream++, value_with_max_restart);
|
||||
}
|
||||
|
||||
const u16 min_index = sse41_hmin_epu16(min);
|
||||
@ -1010,7 +1010,7 @@ namespace
|
||||
const __m128i value_with_max_restart = _mm_or_si128(mask, value);
|
||||
max = _mm_max_epu32(max, value_with_min_restart);
|
||||
min = _mm_min_epu32(min, value_with_max_restart);
|
||||
_mm_storeu_si128(dst_stream++, value_with_max_restart);
|
||||
_mm_store_si128(dst_stream++, value_with_max_restart);
|
||||
}
|
||||
|
||||
__m128i tmp = _mm_srli_si128(min, 8);
|
||||
|
@ -144,15 +144,15 @@ namespace
|
||||
|
||||
if (emulate_restart) upload_size *= 2;
|
||||
|
||||
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<4>(upload_size);
|
||||
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<64>(upload_size);
|
||||
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
|
||||
|
||||
std::span<std::byte> dst;
|
||||
std::vector<std::byte> tmp;
|
||||
stx::single_ptr<std::byte[]> tmp;
|
||||
if (emulate_restart)
|
||||
{
|
||||
tmp.resize(upload_size);
|
||||
dst = tmp;
|
||||
tmp = stx::make_single<std::byte[], false, 64>(upload_size);
|
||||
dst = std::span<std::byte>(tmp.get(), upload_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -182,11 +182,11 @@ namespace
|
||||
{
|
||||
if (index_type == rsx::index_array_type::u16)
|
||||
{
|
||||
index_count = rsx::remove_restart_index(static_cast<u16*>(buf), reinterpret_cast<u16*>(tmp.data()), index_count, u16{umax});
|
||||
index_count = rsx::remove_restart_index(static_cast<u16*>(buf), reinterpret_cast<u16*>(tmp.get()), index_count, u16{umax});
|
||||
}
|
||||
else
|
||||
{
|
||||
index_count = rsx::remove_restart_index(static_cast<u32*>(buf), reinterpret_cast<u32*>(tmp.data()), index_count, u32{umax});
|
||||
index_count = rsx::remove_restart_index(static_cast<u32*>(buf), reinterpret_cast<u32*>(tmp.get()), index_count, u32{umax});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ namespace stx
|
||||
|
||||
// Control block with data and reference counter
|
||||
template <typename T>
|
||||
class alignas(T) shared_data final : align_filler<sizeof(shared_counter), alignof(T)>
|
||||
class shared_data final : align_filler<sizeof(shared_counter), alignof(T)>
|
||||
{
|
||||
public:
|
||||
shared_counter m_ctr{};
|
||||
@ -64,7 +64,7 @@ namespace stx
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class alignas(T) shared_data<T[]> final : align_filler<sizeof(shared_counter) + sizeof(usz), alignof(T)>
|
||||
class shared_data<T[]> final : align_filler<sizeof(shared_counter) + sizeof(usz), alignof(T)>
|
||||
{
|
||||
public:
|
||||
usz m_count{};
|
||||
@ -98,8 +98,6 @@ namespace stx
|
||||
friend class atomic_ptr;
|
||||
|
||||
public:
|
||||
using pointer = T*;
|
||||
|
||||
using element_type = std::remove_extent_t<T>;
|
||||
|
||||
constexpr single_ptr() noexcept = default;
|
||||
@ -109,7 +107,7 @@ namespace stx
|
||||
// Default constructor or null_ptr should be used instead
|
||||
[[deprecated("Use null_ptr")]] single_ptr(std::nullptr_t) = delete;
|
||||
|
||||
explicit single_ptr(shared_data<T>&, pointer ptr) noexcept
|
||||
explicit single_ptr(shared_data<T>&, element_type* ptr) noexcept
|
||||
: m_ptr(ptr)
|
||||
{
|
||||
}
|
||||
@ -258,7 +256,7 @@ namespace stx
|
||||
return single_ptr<T>(*ptr, &ptr->m_data);
|
||||
}
|
||||
|
||||
template <typename T, bool Init = true>
|
||||
template <typename T, bool Init = true, usz Align = alignof(std::remove_extent_t<T>)>
|
||||
static std::enable_if_t<std::is_unbounded_array_v<T>, single_ptr<T>> make_single(usz count) noexcept
|
||||
{
|
||||
static_assert(sizeof(shared_data<T>) - offsetof(shared_data<T>, m_ctr) == sizeof(shared_counter));
|
||||
@ -269,9 +267,9 @@ namespace stx
|
||||
|
||||
std::byte* bytes = nullptr;
|
||||
|
||||
if constexpr (alignof(etype) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
|
||||
if constexpr (Align > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
|
||||
{
|
||||
bytes = static_cast<std::byte*>(::operator new(size, std::align_val_t{alignof(etype)}));
|
||||
bytes = static_cast<std::byte*>(::operator new(size, std::align_val_t{Align}));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -305,9 +303,9 @@ namespace stx
|
||||
|
||||
ptr->~shared_data<T>();
|
||||
|
||||
if constexpr (alignof(etype) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
|
||||
if constexpr (Align > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
|
||||
{
|
||||
::operator delete[](bytes, std::align_val_t{alignof(etype)});
|
||||
::operator delete[](bytes, std::align_val_t{Align});
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -347,8 +345,6 @@ namespace stx
|
||||
friend class atomic_ptr;
|
||||
|
||||
public:
|
||||
using pointer = T*;
|
||||
|
||||
using element_type = std::remove_extent_t<T>;
|
||||
|
||||
constexpr shared_ptr() noexcept = default;
|
||||
@ -594,8 +590,6 @@ namespace stx
|
||||
friend class atomic_ptr;
|
||||
|
||||
public:
|
||||
using pointer = T*;
|
||||
|
||||
using element_type = std::remove_extent_t<T>;
|
||||
|
||||
using shared_type = shared_ptr<T>;
|
||||
|
Loading…
Reference in New Issue
Block a user