1
0
mirror of https://github.com/RPCS3/rpcs3.git synced 2024-11-23 03:02:53 +01:00

LLVM DSL: rewrite zshuffle, shuffle2, build

Add llvm_const_vector template.
This commit is contained in:
Nekotekina 2019-04-24 16:05:29 +03:00
parent b02503963e
commit 3e0b45719d
2 changed files with 136 additions and 100 deletions

View File

@ -395,6 +395,23 @@ struct llvm_const_float
} }
}; };
template <uint N, typename T>
struct llvm_const_vector
{
using type = T;
T data;
static constexpr bool is_ok = N && llvm_value_t<T>::is_vector == N;
llvm::Value* eval(llvm::IRBuilder<>* ir) const
{
static_assert(N && llvm_value_t<T>::is_vector == N, "llvm_const_vector<>: invalid type");
return llvm::ConstantDataVector::get(ir->getContext(), data);
}
};
template <typename A1, typename A2, typename T = llvm_common_t<A1, A2>> template <typename A1, typename A2, typename T = llvm_common_t<A1, A2>>
struct llvm_add struct llvm_add
{ {
@ -1497,6 +1514,48 @@ struct llvm_splat
} }
}; };
template <uint N, typename A1, typename T = llvm_common_t<A1>>
struct llvm_zshuffle
{
using type = std::remove_extent_t<T>[N];
llvm_expr_t<A1> a1;
u32 index_array[N];
static_assert(llvm_value_t<T>::is_vector, "llvm_zshuffle<>: invalid type");
static constexpr bool is_ok = llvm_value_t<T>::is_vector && 1;
llvm::Value* eval(llvm::IRBuilder<>* ir) const
{
const auto v1 = a1.eval(ir);
return ir->CreateShuffleVector(v1, llvm::ConstantAggregateZero::get(v1->getType()), index_array);
}
};
template <uint N, typename A1, typename A2, typename T = llvm_common_t<A1, A2>>
struct llvm_shuffle2
{
using type = std::remove_extent_t<T>[N];
llvm_expr_t<A1> a1;
llvm_expr_t<A2> a2;
u32 index_array[N];
static_assert(llvm_value_t<T>::is_vector, "llvm_shuffle2<>: invalid type");
static constexpr bool is_ok = llvm_value_t<T>::is_vector && 1;
llvm::Value* eval(llvm::IRBuilder<>* ir) const
{
const auto v1 = a1.eval(ir);
const auto v2 = a2.eval(ir);
return ir->CreateShuffleVector(v1, v2, index_array);
}
};
class cpu_translator class cpu_translator
{ {
protected: protected:
@ -1693,6 +1752,24 @@ public:
return llvm_splat<T, U>{std::forward<U>(v)}; return llvm_splat<T, U>{std::forward<U>(v)};
} }
template <typename T, typename... Args, typename = std::enable_if_t<llvm_const_vector<sizeof...(Args), T>::is_ok>>
static auto build(Args... args)
{
return llvm_const_vector<sizeof...(Args), T>{static_cast<std::remove_extent_t<T>>(args)...};
}
template <typename T, typename... Args, typename = std::enable_if_t<llvm_zshuffle<sizeof...(Args), T>::is_ok>>
static auto zshuffle(T&& v, Args... indices)
{
return llvm_zshuffle<sizeof...(Args), T>{std::forward<T>(v), {static_cast<u32>(indices)...}};
}
template <typename T, typename U, typename... Args, typename = std::enable_if_t<llvm_shuffle2<sizeof...(Args), T, U>::is_ok>>
static auto shuffle2(T&& v1, U&& v2, Args... indices)
{
return llvm_shuffle2<sizeof...(Args), T, U>{std::forward<T>(v1), std::forward<U>(v2), {static_cast<u32>(indices)...}};
}
// Average: (a + b + 1) >> 1 // Average: (a + b + 1) >> 1
template <typename T> template <typename T>
inline auto avg(T a, T b) inline auto avg(T a, T b)
@ -1714,40 +1791,6 @@ public:
return result; return result;
} }
// Shuffle single vector using all zeros second vector of the same size
template <typename T, typename T1, typename... Args>
auto zshuffle(T1 a, Args... args)
{
static_assert(sizeof(T) / sizeof(std::remove_extent_t<T>) == sizeof...(Args), "zshuffle: unexpected result type");
const u32 values[]{static_cast<u32>(args)...};
value_t<T> result;
result.value = a.eval(m_ir);
result.value = m_ir->CreateShuffleVector(result.value, llvm::ConstantInt::get(result.value->getType(), 0), values);
return result;
}
template <typename T, typename T1, typename T2, typename... Args>
auto shuffle2(T1 a, T2 b, Args... args)
{
static_assert(sizeof(T) / sizeof(std::remove_extent_t<T>) == sizeof...(Args), "shuffle2: unexpected result type");
const u32 values[]{static_cast<u32>(args)...};
value_t<T> result;
result.value = a.eval(m_ir);
result.value = m_ir->CreateShuffleVector(result.value, b.eval(m_ir), values);
return result;
}
template <typename T, typename... Args>
auto build(Args... args)
{
using value_type = std::remove_extent_t<T>;
const value_type values[]{static_cast<value_type>(args)...};
static_assert(sizeof(T) / sizeof(value_type) == sizeof...(Args), "build: unexpected number of arguments");
value_t<T> result;
result.value = llvm::ConstantDataVector::get(m_context, values);
return result;
}
template <typename... Types> template <typename... Types>
llvm::Function* get_intrinsic(llvm::Intrinsic::ID id) llvm::Function* get_intrinsic(llvm::Intrinsic::ID id)
{ {

View File

@ -5027,22 +5027,22 @@ public:
void ROTQBYBI(spu_opcode_t op) void ROTQBYBI(spu_opcode_t op)
{ {
auto sh = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sc = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
sh = eval((sh - (zshuffle<u8[16]>(get_vr<u8[16]>(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)) & 0xf); const auto sh = (sc - (zshuffle(get_vr<u8[16]>(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)) & 0xf;
set_vr(op.rt, pshufb(get_vr<u8[16]>(op.ra), sh)); set_vr(op.rt, pshufb(get_vr<u8[16]>(op.ra), sh));
} }
void ROTQMBYBI(spu_opcode_t op) void ROTQMBYBI(spu_opcode_t op)
{ {
auto sh = build<u8[16]>(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); const auto sc = build<u8[16]>(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127);
sh = eval(sh + (-(zshuffle<u8[16]>(get_vr<u8[16]>(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3) & 0x1f)); const auto sh = sc + (-(zshuffle(get_vr<u8[16]>(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3) & 0x1f);
set_vr(op.rt, pshufb(get_vr<u8[16]>(op.ra), sh)); set_vr(op.rt, pshufb(get_vr<u8[16]>(op.ra), sh));
} }
void SHLQBYBI(spu_opcode_t op) void SHLQBYBI(spu_opcode_t op)
{ {
auto sh = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sc = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
sh = eval(sh - (zshuffle<u8[16]>(get_vr<u8[16]>(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3)); const auto sh = sc - (zshuffle(get_vr<u8[16]>(op.rb), 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) >> 3);
set_vr(op.rt, pshufb(get_vr<u8[16]>(op.ra), sh)); set_vr(op.rt, pshufb(get_vr<u8[16]>(op.ra), sh));
} }
@ -5081,30 +5081,30 @@ public:
void ROTQBI(spu_opcode_t op) void ROTQBI(spu_opcode_t op)
{ {
const auto a = get_vr(op.ra); const auto a = get_vr(op.ra);
const auto b = zshuffle<u32[4]>(get_vr(op.rb) & 0x7, 3, 3, 3, 3); const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3);
set_vr(op.rt, fshl(a, zshuffle<u32[4]>(a, 3, 0, 1, 2), b)); set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b));
} }
void ROTQMBI(spu_opcode_t op) void ROTQMBI(spu_opcode_t op)
{ {
const auto a = get_vr(op.ra); const auto a = get_vr(op.ra);
const auto b = zshuffle<u32[4]>(-get_vr(op.rb) & 0x7, 3, 3, 3, 3); const auto b = zshuffle(-get_vr(op.rb) & 0x7, 3, 3, 3, 3);
set_vr(op.rt, fshr(zshuffle<u32[4]>(a, 1, 2, 3, 4), a, b)); set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b));
} }
void SHLQBI(spu_opcode_t op) void SHLQBI(spu_opcode_t op)
{ {
const auto a = get_vr(op.ra); const auto a = get_vr(op.ra);
const auto b = zshuffle<u32[4]>(get_vr(op.rb) & 0x7, 3, 3, 3, 3); const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3);
set_vr(op.rt, fshl(a, zshuffle<u32[4]>(a, 4, 0, 1, 2), b)); set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b));
} }
void ROTQBY(spu_opcode_t op) void ROTQBY(spu_opcode_t op)
{ {
const auto a = get_vr<u8[16]>(op.ra); const auto a = get_vr<u8[16]>(op.ra);
const auto b = get_vr<u8[16]>(op.rb); const auto b = get_vr<u8[16]>(op.rb);
auto sh = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sc = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
sh = eval((sh - zshuffle<u8[16]>(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf); const auto sh = eval((sc - zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf);
set_vr(op.rt, pshufb(a, sh)); set_vr(op.rt, pshufb(a, sh));
} }
@ -5112,8 +5112,8 @@ public:
{ {
const auto a = get_vr<u8[16]>(op.ra); const auto a = get_vr<u8[16]>(op.ra);
const auto b = get_vr<u8[16]>(op.rb); const auto b = get_vr<u8[16]>(op.rb);
auto sh = build<u8[16]>(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); const auto sc = build<u8[16]>(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127);
sh = eval(sh + (-zshuffle<u8[16]>(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f)); const auto sh = sc + (-zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f);
set_vr(op.rt, pshufb(a, sh)); set_vr(op.rt, pshufb(a, sh));
} }
@ -5121,17 +5121,17 @@ public:
{ {
const auto a = get_vr<u8[16]>(op.ra); const auto a = get_vr<u8[16]>(op.ra);
const auto b = get_vr<u8[16]>(op.rb); const auto b = get_vr<u8[16]>(op.rb);
auto sh = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sc = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
sh = eval(sh - (zshuffle<u8[16]>(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f)); const auto sh = sc - (zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) & 0x1f);
set_vr(op.rt, pshufb(a, sh)); set_vr(op.rt, pshufb(a, sh));
} }
void ORX(spu_opcode_t op) void ORX(spu_opcode_t op)
{ {
const auto a = get_vr(op.ra); const auto a = get_vr(op.ra);
const auto x = zshuffle<u32[4]>(a, 2, 3, 0, 1) | a; const auto x = zshuffle(a, 2, 3, 0, 1) | a;
const auto y = zshuffle<u32[4]>(x, 1, 0, 3, 2) | x; const auto y = zshuffle(x, 1, 0, 3, 2) | x;
set_vr(op.rt, zshuffle<u32[4]>(y, 4, 4, 4, 3)); set_vr(op.rt, zshuffle(y, 4, 4, 4, 3));
} }
void CBD(spu_opcode_t op) void CBD(spu_opcode_t op)
@ -5170,44 +5170,44 @@ public:
{ {
const auto a = get_vr(op.ra); const auto a = get_vr(op.ra);
const auto b = eval(get_imm(op.i7, false) & 0x7); const auto b = eval(get_imm(op.i7, false) & 0x7);
set_vr(op.rt, fshl(a, zshuffle<u32[4]>(a, 3, 0, 1, 2), b)); set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b));
} }
void ROTQMBII(spu_opcode_t op) void ROTQMBII(spu_opcode_t op)
{ {
const auto a = get_vr(op.ra); const auto a = get_vr(op.ra);
const auto b = eval(-get_imm(op.i7, false) & 0x7); const auto b = eval(-get_imm(op.i7, false) & 0x7);
set_vr(op.rt, fshr(zshuffle<u32[4]>(a, 1, 2, 3, 4), a, b)); set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b));
} }
void SHLQBII(spu_opcode_t op) void SHLQBII(spu_opcode_t op)
{ {
const auto a = get_vr(op.ra); const auto a = get_vr(op.ra);
const auto b = eval(get_imm(op.i7, false) & 0x7); const auto b = eval(get_imm(op.i7, false) & 0x7);
set_vr(op.rt, fshl(a, zshuffle<u32[4]>(a, 4, 0, 1, 2), b)); set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b));
} }
void ROTQBYI(spu_opcode_t op) void ROTQBYI(spu_opcode_t op)
{ {
const auto a = get_vr<u8[16]>(op.ra); const auto a = get_vr<u8[16]>(op.ra);
auto sh = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sc = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
sh = eval((sh - get_imm<u8[16]>(op.i7, false)) & 0xf); const auto sh = (sc - get_imm<u8[16]>(op.i7, false)) & 0xf;
set_vr(op.rt, pshufb(a, sh)); set_vr(op.rt, pshufb(a, sh));
} }
void ROTQMBYI(spu_opcode_t op) void ROTQMBYI(spu_opcode_t op)
{ {
const auto a = get_vr<u8[16]>(op.ra); const auto a = get_vr<u8[16]>(op.ra);
auto sh = build<u8[16]>(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); const auto sc = build<u8[16]>(112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127);
sh = eval(sh + (-get_imm<u8[16]>(op.i7, false) & 0x1f)); const auto sh = sc + (-get_imm<u8[16]>(op.i7, false) & 0x1f);
set_vr(op.rt, pshufb(a, sh)); set_vr(op.rt, pshufb(a, sh));
} }
void SHLQBYI(spu_opcode_t op) void SHLQBYI(spu_opcode_t op)
{ {
const auto a = get_vr<u8[16]>(op.ra); const auto a = get_vr<u8[16]>(op.ra);
auto sh = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto sc = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
sh = eval(sh - (get_imm<u8[16]>(op.i7, false) & 0x1f)); const auto sh = sc - (get_imm<u8[16]>(op.i7, false) & 0x1f);
set_vr(op.rt, pshufb(a, sh)); set_vr(op.rt, pshufb(a, sh));
} }
@ -5242,8 +5242,8 @@ public:
const auto b = get_vr<u16[8]>(op.rb); const auto b = get_vr<u16[8]>(op.rb);
const auto ahs = eval((a >> 8) + (a & 0xff)); const auto ahs = eval((a >> 8) + (a & 0xff));
const auto bhs = eval((b >> 8) + (b & 0xff)); const auto bhs = eval((b >> 8) + (b & 0xff));
const auto lsh = shuffle2<u16[8]>(ahs, bhs, 0, 9, 2, 11, 4, 13, 6, 15); const auto lsh = shuffle2(ahs, bhs, 0, 9, 2, 11, 4, 13, 6, 15);
const auto hsh = shuffle2<u16[8]>(ahs, bhs, 1, 8, 3, 10, 5, 12, 7, 14); const auto hsh = shuffle2(ahs, bhs, 1, 8, 3, 10, 5, 12, 7, 14);
set_vr(op.rt, lsh + hsh); set_vr(op.rt, lsh + hsh);
} }
@ -5952,7 +5952,7 @@ public:
{ {
if (g_cfg.core.spu_accurate_xfloat) if (g_cfg.core.spu_accurate_xfloat)
{ {
const auto r = shuffle2<f64[2]>(get_vr<f64[4]>(op.ra), fsplat<f64[4]>(0.), 1, 3); const auto r = shuffle2(get_vr<f64[4]>(op.ra), fsplat<f64[4]>(0.), 1, 3);
const auto d = bitcast<s64[2]>(r); const auto d = bitcast<s64[2]>(r);
const auto a = eval(d & 0x7fffffffffffffff); const auto a = eval(d & 0x7fffffffffffffff);
const auto s = eval(d & 0x8000000000000000); const auto s = eval(d & 0x8000000000000000);
@ -5963,7 +5963,7 @@ public:
else else
{ {
value_t<f64[2]> r; value_t<f64[2]> r;
r.value = m_ir->CreateFPExt(shuffle2<f32[2]>(get_vr<f32[4]>(op.ra), fsplat<f32[4]>(0.), 1, 3).value, get_type<f64[2]>()); r.value = m_ir->CreateFPExt(shuffle2(get_vr<f32[4]>(op.ra), fsplat<f32[4]>(0.), 1, 3).eval(m_ir), get_type<f64[2]>());
set_vr(op.rt, r); set_vr(op.rt, r);
} }
} }
@ -5979,13 +5979,13 @@ public:
const auto i = select(a > 0x47f0000000000000, eval(s | 0x47f0000000000000), d); const auto i = select(a > 0x47f0000000000000, eval(s | 0x47f0000000000000), d);
const auto n = select(a > 0x7ff0000000000000, splat<s64[2]>(0x47f8000000000000), i); const auto n = select(a > 0x7ff0000000000000, splat<s64[2]>(0x47f8000000000000), i);
const auto z = select(a < 0x3810000000000000, s, n); const auto z = select(a < 0x3810000000000000, s, n);
set_vr(op.rt, shuffle2<f64[4]>(bitcast<f64[2]>(z), fsplat<f64[2]>(0.), 2, 0, 3, 1), false); set_vr(op.rt, shuffle2(bitcast<f64[2]>(z), fsplat<f64[2]>(0.), 2, 0, 3, 1), false);
} }
else else
{ {
value_t<f32[2]> r; value_t<f32[2]> r;
r.value = m_ir->CreateFPTrunc(get_vr<f64[2]>(op.ra).value, get_type<f32[2]>()); r.value = m_ir->CreateFPTrunc(get_vr<f64[2]>(op.ra).value, get_type<f32[2]>());
set_vr(op.rt, shuffle2<f32[4]>(r, fsplat<f32[2]>(0.), 2, 0, 3, 1)); set_vr(op.rt, shuffle2(r, fsplat<f32[2]>(0.), 2, 0, 3, 1));
} }
} }
@ -6214,7 +6214,7 @@ public:
if (auto ca = llvm::dyn_cast<llvm::Constant>(a.value)) if (auto ca = llvm::dyn_cast<llvm::Constant>(a.value))
{ {
v128 data = get_const_vector(ca, m_pos, 25971); v128 data = get_const_vector(ca, m_pos, 25971);
r = build<f64[4]>(data._s32[0], data._s32[1], data._s32[2], data._s32[3]); r.value = build<f64[4]>(data._s32[0], data._s32[1], data._s32[2], data._s32[3]).eval(m_ir);
} }
else else
{ {
@ -6255,7 +6255,7 @@ public:
if (auto ca = llvm::dyn_cast<llvm::Constant>(a.value)) if (auto ca = llvm::dyn_cast<llvm::Constant>(a.value))
{ {
v128 data = get_const_vector(ca, m_pos, 20971); v128 data = get_const_vector(ca, m_pos, 20971);
r = build<f64[4]>(data._u32[0], data._u32[1], data._u32[2], data._u32[3]); r.value = build<f64[4]>(data._u32[0], data._u32[1], data._u32[2], data._u32[3]).eval(m_ir);
} }
else else
{ {
@ -6286,38 +6286,41 @@ public:
} }
} }
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
{
const auto bswapped = zshuffle(data, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
}
auto make_load_ls(value_t<u64> addr)
{
value_t<u8[16]> data;
data.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
return zshuffle(data, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
}
void STQX(spu_opcode_t op) void STQX(spu_opcode_t op)
{ {
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0));
value_t<u8[16]> r = get_vr<u8[16]>(op.rt); make_store_ls(addr, get_vr<u8[16]>(op.rt));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
} }
void LQX(spu_opcode_t op) void LQX(spu_opcode_t op)
{ {
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0)); value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0));
value_t<u8[16]> r; set_vr(op.rt, make_load_ls(addr));
r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
set_vr(op.rt, r);
} }
void STQA(spu_opcode_t op) void STQA(spu_opcode_t op)
{ {
value_t<u64> addr = eval((get_imm<u64>(op.i16, false) << 2) & 0x3fff0); value_t<u64> addr = eval((get_imm<u64>(op.i16, false) << 2) & 0x3fff0);
value_t<u8[16]> r = get_vr<u8[16]>(op.rt); make_store_ls(addr, get_vr<u8[16]>(op.rt));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
} }
void LQA(spu_opcode_t op) void LQA(spu_opcode_t op)
{ {
value_t<u64> addr = eval((get_imm<u64>(op.i16, false) << 2) & 0x3fff0); value_t<u64> addr = eval((get_imm<u64>(op.i16, false) << 2) & 0x3fff0);
value_t<u8[16]> r; set_vr(op.rt, make_load_ls(addr));
r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
set_vr(op.rt, r);
} }
void STQR(spu_opcode_t op) // void STQR(spu_opcode_t op) //
@ -6325,9 +6328,7 @@ public:
value_t<u64> addr; value_t<u64> addr;
addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type<u64>()) : m_ir->getInt64(m_pos); addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type<u64>()) : m_ir->getInt64(m_pos);
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0); addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0);
value_t<u8[16]> r = get_vr<u8[16]>(op.rt); make_store_ls(addr, get_vr<u8[16]>(op.rt));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
} }
void LQR(spu_opcode_t op) // void LQR(spu_opcode_t op) //
@ -6335,27 +6336,19 @@ public:
value_t<u64> addr; value_t<u64> addr;
addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type<u64>()) : m_ir->getInt64(m_pos); addr.value = m_interp_magn ? m_ir->CreateZExt(m_interp_pc, get_type<u64>()) : m_ir->getInt64(m_pos);
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0); addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0);
value_t<u8[16]> r; set_vr(op.rt, make_load_ls(addr));
r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
set_vr(op.rt, r);
} }
void STQD(spu_opcode_t op) void STQD(spu_opcode_t op)
{ {
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0)); value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0));
value_t<u8[16]> r = get_vr<u8[16]>(op.rt); make_store_ls(addr, get_vr<u8[16]>(op.rt));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
m_ir->CreateStore(r.value, m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
} }
void LQD(spu_opcode_t op) void LQD(spu_opcode_t op)
{ {
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0)); value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0));
value_t<u8[16]> r; set_vr(op.rt, make_load_ls(addr));
r.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
r.value = m_ir->CreateShuffleVector(r.value, r.value, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
set_vr(op.rt, r);
} }
void make_halt(value_t<bool> cond) void make_halt(value_t<bool> cond)