1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-25 20:22:30 +01:00

vk: Fix fconvert job issues

- Fix compilation bug caused by typo
- Invert to/from for consistent declarations
- Fix dst_swap when From == 2
This commit is contained in:
kd-11 2020-09-06 01:36:21 +03:00 committed by kd-11
parent 220e86bbd1
commit 85dd1b4ea9
2 changed files with 7 additions and 7 deletions

View File

@ -591,7 +591,7 @@ namespace vk
}
};
template<typename To, typename From, bool _SwapSrc = false, bool _SwapDst = false>
template<typename From, typename To, bool _SwapSrc = false, bool _SwapDst = false>
struct cs_fconvert_task : cs_shuffle_base
{
u32 m_ssbo_length = 0;
@ -629,7 +629,7 @@ namespace vk
" uint out_offset = params[0].z >> 2;\n"
" uvec4 tmp;\n";
work_kernel +=
work_kernel =
" if (index >= block_length)\n"
" return;\n";
@ -651,7 +651,7 @@ namespace vk
}
// Convert
work_kernel += " tmp.z = pack_e4m12_pack16(tmp);\n";
work_kernel += " tmp.z = pack_e4m12_pack16(tmp.xy);\n";
if constexpr (_SwapDst)
{
@ -681,7 +681,7 @@ namespace vk
if constexpr (_SwapDst)
{
work_kernel += " tmp.yz = bswap_u16(tmp.yz);\n";
work_kernel += " tmp.yz = bswap_u32(tmp.yz);\n";
}
work_kernel +=

View File

@ -108,12 +108,12 @@ namespace vk
// 3. Do conversion with byteswap [D32->D16F]
if (!swap_bytes) [[likely]]
{
auto job = vk::get_compute_task<vk::cs_fconvert_task<u16, u32>>();
auto job = vk::get_compute_task<vk::cs_fconvert_task<f32, f16>>();
job->run(cmd, dst, z32_offset, packed32_length, data_offset);
}
else
{
auto job = vk::get_compute_task<vk::cs_fconvert_task<u16, u32, false, true>>();
auto job = vk::get_compute_task<vk::cs_fconvert_task<f32, f16, false, true>>();
job->run(cmd, dst, z32_offset, packed32_length, data_offset);
}
@ -237,7 +237,7 @@ namespace vk
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
// 2. Do conversion with byteswap [D16F->D32F]
auto job = vk::get_compute_task<vk::cs_fconvert_task<u32, u16>>();
auto job = vk::get_compute_task<vk::cs_fconvert_task<f16, f32>>();
job->run(cmd, src, data_offset, packed16_length, z32_offset);
// 4. Post-compute barrier