mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
simd_builder: fixups
Fix resetting vmask in reduce() step. Fix AVX-512 loads in vec_load_unaligned(). Fix bzhi reg size in build_look().
This commit is contained in:
parent
5d91caebe9
commit
a9437d69ab
@ -420,9 +420,10 @@ void asmjit::simd_builder::_init(uint new_vsize)
|
|||||||
vsize = new_vsize ? new_vsize : 16;
|
vsize = new_vsize ? new_vsize : 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!new_vsize && utils::has_avx512())
|
if (utils::has_avx512())
|
||||||
{
|
{
|
||||||
vmask = -1;
|
if (!new_vsize)
|
||||||
|
vmask = -1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -604,7 +605,7 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
|||||||
this->emit(x86::Inst::kIdVpinsrw, x86::Xmm(v.id()), x86::Xmm(v.id()), src, Imm(0));
|
this->emit(x86::Inst::kIdVpinsrw, x86::Xmm(v.id()), x86::Xmm(v.id()), src, Imm(0));
|
||||||
else if (vsize == 2)
|
else if (vsize == 2)
|
||||||
this->emit(x86::Inst::kIdPinsrw, v, src, Imm(0));
|
this->emit(x86::Inst::kIdPinsrw, v, src, Imm(0));
|
||||||
else if (vmask && vmask < 8)
|
else if ((vmask && vmask < 8) || vsize >= 64)
|
||||||
this->emit(x86::Inst::kIdVmovdqu16, v, src);
|
this->emit(x86::Inst::kIdVmovdqu16, v, src);
|
||||||
else
|
else
|
||||||
return vec_load_unaligned(vsize, v, src);
|
return vec_load_unaligned(vsize, v, src);
|
||||||
@ -616,7 +617,7 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
|||||||
this->emit(x86::Inst::kIdVmovd, x86::Xmm(v.id()), src);
|
this->emit(x86::Inst::kIdVmovd, x86::Xmm(v.id()), src);
|
||||||
else if (vsize == 4)
|
else if (vsize == 4)
|
||||||
this->emit(x86::Inst::kIdMovd, v, src);
|
this->emit(x86::Inst::kIdMovd, v, src);
|
||||||
else if (vmask && vmask < 8)
|
else if ((vmask && vmask < 8) || vsize >= 64)
|
||||||
this->emit(x86::Inst::kIdVmovdqu32, v, src);
|
this->emit(x86::Inst::kIdVmovdqu32, v, src);
|
||||||
else
|
else
|
||||||
return vec_load_unaligned(vsize, v, src);
|
return vec_load_unaligned(vsize, v, src);
|
||||||
@ -628,7 +629,7 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
|||||||
this->emit(x86::Inst::kIdVmovq, x86::Xmm(v.id()), src);
|
this->emit(x86::Inst::kIdVmovq, x86::Xmm(v.id()), src);
|
||||||
else if (vsize == 8)
|
else if (vsize == 8)
|
||||||
this->emit(x86::Inst::kIdMovq, v, src);
|
this->emit(x86::Inst::kIdMovq, v, src);
|
||||||
else if (vmask && vmask < 8)
|
else if ((vmask && vmask < 8) || vsize >= 64)
|
||||||
this->emit(x86::Inst::kIdVmovdqu64, v, src);
|
this->emit(x86::Inst::kIdVmovdqu64, v, src);
|
||||||
else
|
else
|
||||||
return vec_load_unaligned(vsize, v, src);
|
return vec_load_unaligned(vsize, v, src);
|
||||||
@ -636,7 +637,9 @@ void asmjit::simd_builder::vec_load_unaligned(u32 esize, const Operand& v, const
|
|||||||
else if (esize >= 16)
|
else if (esize >= 16)
|
||||||
{
|
{
|
||||||
ensure(vsize >= 16);
|
ensure(vsize >= 16);
|
||||||
if (utils::has_avx())
|
if ((vmask && vmask < 8) || vsize >= 64)
|
||||||
|
this->emit(x86::Inst::kIdVmovdqu64, v, src); // Not really needed
|
||||||
|
else if (utils::has_avx())
|
||||||
this->emit(x86::Inst::kIdVmovdqu, v, src);
|
this->emit(x86::Inst::kIdVmovdqu, v, src);
|
||||||
else
|
else
|
||||||
this->emit(x86::Inst::kIdMovups, v, src);
|
this->emit(x86::Inst::kIdMovups, v, src);
|
||||||
|
@ -323,11 +323,20 @@ namespace asmjit
|
|||||||
// Build single last iteration (masked)
|
// Build single last iteration (masked)
|
||||||
this->test(reg_cnt, reg_cnt);
|
this->test(reg_cnt, reg_cnt);
|
||||||
this->jz(exit);
|
this->jz(exit);
|
||||||
this->bzhi(reg_cnt, x86::Mem(consts[~u128()], 0), reg_cnt);
|
|
||||||
this->kmovq(x86::k7, reg_cnt);
|
if (esize == 1 && vsize == 64)
|
||||||
|
{
|
||||||
|
this->bzhi(reg_cnt.r64(), x86::Mem(consts[~u128()], 0), reg_cnt.r64());
|
||||||
|
this->kmovq(x86::k7, reg_cnt.r64());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this->bzhi(reg_cnt.r32(), x86::Mem(consts[~u128()], 0), reg_cnt.r32());
|
||||||
|
this->kmovd(x86::k7, reg_cnt.r32());
|
||||||
|
}
|
||||||
|
|
||||||
vmask = 7;
|
vmask = 7;
|
||||||
build();
|
build();
|
||||||
vmask = -1;
|
|
||||||
|
|
||||||
// Rollout reduction step
|
// Rollout reduction step
|
||||||
this->bind(exit);
|
this->bind(exit);
|
||||||
|
Loading…
Reference in New Issue
Block a user