mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
d648be35e8
Specified in https://github.com/WebAssembly/simd/pull/237, these instructions load the first vector lane from memory and zero the other lanes. Since these instructions are not officially part of the SIMD proposal, they are only available on an opt-in basis via LLVM intrinsics and clang builtin functions. If these instructions are merged to the proposal, this implementation will change so that the instructions will be generated from normal IR. At that point the intrinsics and builtin functions would be removed. This PR also changes the opcodes for the experimental f32x4.qfm{a,s} instructions because their opcodes conflicted with those of the v128.load{32,64}_zero instructions. The new opcodes were chosen to match those used in V8. Differential Revision: https://reviews.llvm.org/D84820
1096 lines
46 KiB
TableGen
1096 lines
46 KiB
TableGen
// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// WebAssembly SIMD operand code-gen constructs.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Instructions requiring HasSIMD128 and the simd128 prefix byte
|
|
multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
|
|
list<dag> pattern_r, string asmstr_r = "",
|
|
string asmstr_s = "", bits<32> simdop = -1> {
|
|
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
|
|
!or(0xfd00, !and(0xff, simdop))>,
|
|
Requires<[HasSIMD128]>;
|
|
}
|
|
|
|
defm "" : ARGUMENT<V128, v16i8>;
|
|
defm "" : ARGUMENT<V128, v8i16>;
|
|
defm "" : ARGUMENT<V128, v4i32>;
|
|
defm "" : ARGUMENT<V128, v2i64>;
|
|
defm "" : ARGUMENT<V128, v4f32>;
|
|
defm "" : ARGUMENT<V128, v2f64>;
|
|
|
|
// Constrained immediate argument types
|
|
foreach SIZE = [8, 16] in
|
|
def ImmI#SIZE : ImmLeaf<i32,
|
|
"return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));"
|
|
>;
|
|
foreach SIZE = [2, 4, 8, 16, 32] in
|
|
def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Load and store
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Load: v128.load
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_V128_A32 :
|
|
SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
"v128.load\t$dst, ${off}(${addr})$p2align",
|
|
"v128.load\t$off$p2align", 0>;
|
|
defm LOAD_V128_A64 :
|
|
SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
"v128.load\t$dst, ${off}(${addr})$p2align",
|
|
"v128.load\t$off$p2align", 0>;
|
|
}
|
|
|
|
// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
|
|
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
|
|
defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">;
|
|
defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">;
|
|
defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">;
|
|
defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">;
|
|
defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">;
|
|
}
|
|
|
|
// vNxM.load_splat
|
|
multiclass SIMDLoadSplat<string vec, bits<32> simdop> {
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_SPLAT_#vec#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs),
|
|
(ins P2Align:$p2align, offset32_op:$off), [],
|
|
vec#".load_splat\t$dst, ${off}(${addr})$p2align",
|
|
vec#".load_splat\t$off$p2align", simdop>;
|
|
defm LOAD_SPLAT_#vec#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs),
|
|
(ins P2Align:$p2align, offset64_op:$off), [],
|
|
vec#".load_splat\t$dst, ${off}(${addr})$p2align",
|
|
vec#".load_splat\t$off$p2align", simdop>;
|
|
}
|
|
}
|
|
|
|
defm "" : SIMDLoadSplat<"v8x16", 7>;
|
|
defm "" : SIMDLoadSplat<"v16x8", 8>;
|
|
defm "" : SIMDLoadSplat<"v32x4", 9>;
|
|
defm "" : SIMDLoadSplat<"v64x2", 10>;
|
|
|
|
def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
|
|
def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t,
|
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
|
def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>;
|
|
|
|
foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"],
|
|
["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in {
|
|
defm : LoadPatNoOffset<!cast<ValueType>(args[0]),
|
|
load_splat,
|
|
"LOAD_SPLAT_"#args[1]>;
|
|
defm : LoadPatImmOff<!cast<ValueType>(args[0]),
|
|
load_splat,
|
|
regPlusImm,
|
|
"LOAD_SPLAT_"#args[1]>;
|
|
defm : LoadPatImmOff<!cast<ValueType>(args[0]),
|
|
load_splat,
|
|
or_is_add,
|
|
"LOAD_SPLAT_"#args[1]>;
|
|
defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]),
|
|
load_splat,
|
|
"LOAD_SPLAT_"#args[1]>;
|
|
defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]),
|
|
load_splat,
|
|
"LOAD_SPLAT_"#args[1]>;
|
|
}
|
|
|
|
// Load and extend
|
|
multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> {
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_EXTEND_S_#vec_t#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
name#"_s\t$dst, ${off}(${addr})$p2align",
|
|
name#"_s\t$off$p2align", simdop>;
|
|
defm LOAD_EXTEND_U_#vec_t#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
name#"_u\t$dst, ${off}(${addr})$p2align",
|
|
name#"_u\t$off$p2align", !add(simdop, 1)>;
|
|
defm LOAD_EXTEND_S_#vec_t#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
name#"_s\t$dst, ${off}(${addr})$p2align",
|
|
name#"_s\t$off$p2align", simdop>;
|
|
defm LOAD_EXTEND_U_#vec_t#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
name#"_u\t$dst, ${off}(${addr})$p2align",
|
|
name#"_u\t$off$p2align", !add(simdop, 1)>;
|
|
}
|
|
}
|
|
|
|
defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>;
|
|
defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>;
|
|
defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>;
|
|
|
|
foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in
|
|
foreach exts = [["sextloadv", "_S"],
|
|
["zextloadv", "_U"],
|
|
["extloadv", "_U"]] in {
|
|
defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]),
|
|
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
|
|
defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm,
|
|
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
|
|
defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add,
|
|
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
|
|
defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
|
|
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
|
|
defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
|
|
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
|
|
}
|
|
|
|
// Load lane into zero vector
|
|
multiclass SIMDLoadZero<ValueType vec_t, string name, bits<32> simdop> {
|
|
let mayLoad = 1, UseNamedOperandTable = 1 in {
|
|
defm LOAD_ZERO_#vec_t#_A32 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
name#"\t$dst, ${off}(${addr})$p2align",
|
|
name#"\t$off$p2align", simdop>;
|
|
defm LOAD_ZERO_#vec_t#_A64 :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
name#"\t$dst, ${off}(${addr})$p2align",
|
|
name#"\t$off$p2align", simdop>;
|
|
} // mayLoad = 1, UseNamedOperandTable = 1
|
|
}
|
|
|
|
// TODO: Also support v4f32 and v2f64 once the instructions are merged
|
|
// to the proposal
|
|
defm "" : SIMDLoadZero<v4i32, "v128.load32_zero", 252>;
|
|
defm "" : SIMDLoadZero<v2i64, "v128.load64_zero", 253>;
|
|
|
|
defm : LoadPatNoOffset<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">;
|
|
defm : LoadPatNoOffset<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">;
|
|
|
|
defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, regPlusImm, "LOAD_ZERO_v4i32">;
|
|
defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, regPlusImm, "LOAD_ZERO_v2i64">;
|
|
|
|
defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, or_is_add, "LOAD_ZERO_v4i32">;
|
|
defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, or_is_add, "LOAD_ZERO_v2i64">;
|
|
|
|
defm : LoadPatOffsetOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">;
|
|
defm : LoadPatOffsetOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">;
|
|
|
|
defm : LoadPatGlobalAddrOffOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">;
|
|
defm : LoadPatGlobalAddrOffOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">;
|
|
|
|
// Store: v128.store
|
|
let mayStore = 1, UseNamedOperandTable = 1 in {
|
|
defm STORE_V128_A32 :
|
|
SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
|
|
"v128.store\t${off}(${addr})$p2align, $vec",
|
|
"v128.store\t$off$p2align", 11>;
|
|
defm STORE_V128_A64 :
|
|
SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec),
|
|
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
|
|
"v128.store\t${off}(${addr})$p2align, $vec",
|
|
"v128.store\t$off$p2align", 11>;
|
|
}
|
|
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
|
|
// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
|
|
defm : StorePatNoOffset<vec_t, store, "STORE_V128">;
|
|
defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">;
|
|
defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">;
|
|
defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">;
|
|
defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Constructing SIMD values
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Constant: v128.const
|
|
multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
|
|
let isMoveImm = 1, isReMaterializable = 1,
|
|
Predicates = [HasUnimplementedSIMD128] in
|
|
defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
|
|
[(set V128:$dst, (vec_t pat))],
|
|
"v128.const\t$dst, "#args,
|
|
"v128.const\t"#args, 12>;
|
|
}
|
|
|
|
defm "" : ConstVec<v16i8,
|
|
(ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
|
|
vec_i8imm_op:$i2, vec_i8imm_op:$i3,
|
|
vec_i8imm_op:$i4, vec_i8imm_op:$i5,
|
|
vec_i8imm_op:$i6, vec_i8imm_op:$i7,
|
|
vec_i8imm_op:$i8, vec_i8imm_op:$i9,
|
|
vec_i8imm_op:$iA, vec_i8imm_op:$iB,
|
|
vec_i8imm_op:$iC, vec_i8imm_op:$iD,
|
|
vec_i8imm_op:$iE, vec_i8imm_op:$iF),
|
|
(build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
|
|
ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
|
|
ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
|
|
ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
|
|
!strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
|
|
"$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
|
|
defm "" : ConstVec<v8i16,
|
|
(ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
|
|
vec_i16imm_op:$i2, vec_i16imm_op:$i3,
|
|
vec_i16imm_op:$i4, vec_i16imm_op:$i5,
|
|
vec_i16imm_op:$i6, vec_i16imm_op:$i7),
|
|
(build_vector
|
|
ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
|
|
ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
|
|
"$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
|
|
let IsCanonical = 1 in
|
|
defm "" : ConstVec<v4i32,
|
|
(ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
|
|
vec_i32imm_op:$i2, vec_i32imm_op:$i3),
|
|
(build_vector (i32 imm:$i0), (i32 imm:$i1),
|
|
(i32 imm:$i2), (i32 imm:$i3)),
|
|
"$i0, $i1, $i2, $i3">;
|
|
defm "" : ConstVec<v2i64,
|
|
(ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
|
|
(build_vector (i64 imm:$i0), (i64 imm:$i1)),
|
|
"$i0, $i1">;
|
|
defm "" : ConstVec<v4f32,
|
|
(ins f32imm_op:$i0, f32imm_op:$i1,
|
|
f32imm_op:$i2, f32imm_op:$i3),
|
|
(build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
|
|
(f32 fpimm:$i2), (f32 fpimm:$i3)),
|
|
"$i0, $i1, $i2, $i3">;
|
|
defm "" : ConstVec<v2f64,
|
|
(ins f64imm_op:$i0, f64imm_op:$i1),
|
|
(build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
|
|
"$i0, $i1">;
|
|
|
|
// Shuffle lanes: shuffle
|
|
defm SHUFFLE :
|
|
SIMD_I<(outs V128:$dst),
|
|
(ins V128:$x, V128:$y,
|
|
vec_i8imm_op:$m0, vec_i8imm_op:$m1,
|
|
vec_i8imm_op:$m2, vec_i8imm_op:$m3,
|
|
vec_i8imm_op:$m4, vec_i8imm_op:$m5,
|
|
vec_i8imm_op:$m6, vec_i8imm_op:$m7,
|
|
vec_i8imm_op:$m8, vec_i8imm_op:$m9,
|
|
vec_i8imm_op:$mA, vec_i8imm_op:$mB,
|
|
vec_i8imm_op:$mC, vec_i8imm_op:$mD,
|
|
vec_i8imm_op:$mE, vec_i8imm_op:$mF),
|
|
(outs),
|
|
(ins
|
|
vec_i8imm_op:$m0, vec_i8imm_op:$m1,
|
|
vec_i8imm_op:$m2, vec_i8imm_op:$m3,
|
|
vec_i8imm_op:$m4, vec_i8imm_op:$m5,
|
|
vec_i8imm_op:$m6, vec_i8imm_op:$m7,
|
|
vec_i8imm_op:$m8, vec_i8imm_op:$m9,
|
|
vec_i8imm_op:$mA, vec_i8imm_op:$mB,
|
|
vec_i8imm_op:$mC, vec_i8imm_op:$mD,
|
|
vec_i8imm_op:$mE, vec_i8imm_op:$mF),
|
|
[],
|
|
"v8x16.shuffle\t$dst, $x, $y, "#
|
|
"$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
|
|
"$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
|
|
"v8x16.shuffle\t"#
|
|
"$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
|
|
"$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
|
|
13>;
|
|
|
|
// Shuffles after custom lowering
|
|
def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
|
|
def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
|
|
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
|
|
def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
|
|
(i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
|
|
(i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
|
|
(i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
|
|
(i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
|
|
(i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
|
|
(i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
|
|
(i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
|
|
(i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
|
|
(vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y),
|
|
(i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
|
|
(i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
|
|
(i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
|
|
(i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
|
|
(i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
|
|
(i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
|
|
(i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
|
|
(i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
|
|
}
|
|
|
|
// Swizzle lanes: v8x16.swizzle
|
|
def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
|
|
def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
|
|
defm SWIZZLE :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
|
|
[(set (v16i8 V128:$dst),
|
|
(wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
|
|
"v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>;
|
|
|
|
def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
|
|
(SWIZZLE V128:$src, V128:$mask)>;
|
|
|
|
// Create vector with identical lanes: splat
|
|
def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
|
|
def splat4 : PatFrag<(ops node:$x), (build_vector
|
|
node:$x, node:$x, node:$x, node:$x)>;
|
|
def splat8 : PatFrag<(ops node:$x), (build_vector
|
|
node:$x, node:$x, node:$x, node:$x,
|
|
node:$x, node:$x, node:$x, node:$x)>;
|
|
def splat16 : PatFrag<(ops node:$x), (build_vector
|
|
node:$x, node:$x, node:$x, node:$x,
|
|
node:$x, node:$x, node:$x, node:$x,
|
|
node:$x, node:$x, node:$x, node:$x,
|
|
node:$x, node:$x, node:$x, node:$x)>;
|
|
|
|
multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
|
|
PatFrag splat_pat, bits<32> simdop> {
|
|
defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
|
|
[(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
|
|
vec#".splat\t$dst, $x", vec#".splat", simdop>;
|
|
}
|
|
|
|
defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>;
|
|
defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>;
|
|
defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>;
|
|
defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>;
|
|
defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>;
|
|
defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>;
|
|
|
|
// scalar_to_vector leaves high lanes undefined, so can be a splat
|
|
class ScalarSplatPat<ValueType vec_t, ValueType lane_t,
|
|
WebAssemblyRegClass reg_t> :
|
|
Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))),
|
|
(!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>;
|
|
|
|
def : ScalarSplatPat<v16i8, i32, I32>;
|
|
def : ScalarSplatPat<v8i16, i32, I32>;
|
|
def : ScalarSplatPat<v4i32, i32, I32>;
|
|
def : ScalarSplatPat<v2i64, i64, I64>;
|
|
def : ScalarSplatPat<v4f32, f32, F32>;
|
|
def : ScalarSplatPat<v2f64, f64, F64>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Accessing lanes
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
|
|
multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
|
|
bits<32> simdop, string suffix = ""> {
|
|
defm EXTRACT_LANE_#vec_t#suffix :
|
|
SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
|
|
(outs), (ins vec_i8imm_op:$idx), [],
|
|
vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
|
|
vec#".extract_lane"#suffix#"\t$idx", simdop>;
|
|
}
|
|
|
|
defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">;
|
|
defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">;
|
|
defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">;
|
|
defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">;
|
|
defm "" : ExtractLane<v4i32, "i32x4", I32, 27>;
|
|
defm "" : ExtractLane<v2i64, "i64x2", I64, 29>;
|
|
defm "" : ExtractLane<v4f32, "f32x4", F32, 31>;
|
|
defm "" : ExtractLane<v2f64, "f64x2", F64, 33>;
|
|
|
|
def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
|
|
(EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
|
|
(EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
|
|
(EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
|
|
(EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
|
|
(EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>;
|
|
def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
|
|
(EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>;
|
|
|
|
def : Pat<
|
|
(sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
|
|
(EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>;
|
|
def : Pat<
|
|
(and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
|
|
(EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
|
|
def : Pat<
|
|
(sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
|
|
(EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>;
|
|
def : Pat<
|
|
(and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
|
|
(EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
|
|
|
|
// Replace lane value: replace_lane
|
|
multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
|
|
WebAssemblyRegClass reg_t, ValueType lane_t,
|
|
bits<32> simdop> {
|
|
defm REPLACE_LANE_#vec_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x),
|
|
(outs), (ins vec_i8imm_op:$idx),
|
|
[(set V128:$dst, (vector_insert
|
|
(vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))],
|
|
vec#".replace_lane\t$dst, $vec, $idx, $x",
|
|
vec#".replace_lane\t$idx", simdop>;
|
|
}
|
|
|
|
defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>;
|
|
defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>;
|
|
defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>;
|
|
defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>;
|
|
defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>;
|
|
defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>;
|
|
|
|
// Lower undef lane indices to zero
|
|
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
|
|
(REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>;
|
|
def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
|
|
(REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>;
|
|
def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
|
|
(REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>;
|
|
def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
|
|
(REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>;
|
|
def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
|
|
(REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>;
|
|
def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
|
|
(REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Comparisons
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec,
|
|
string name, CondCode cond, bits<32> simdop> {
|
|
defm _#vec_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
|
|
[(set (out_t V128:$dst),
|
|
(setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond)
|
|
)],
|
|
vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>;
|
|
}
|
|
|
|
multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
|
|
defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>;
|
|
defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond,
|
|
!add(baseInst, 10)>;
|
|
defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond,
|
|
!add(baseInst, 20)>;
|
|
}
|
|
|
|
multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
|
|
defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>;
|
|
defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond,
|
|
!add(baseInst, 6)>;
|
|
}
|
|
|
|
// Equality: eq
|
|
let isCommutable = 1 in {
|
|
defm EQ : SIMDConditionInt<"eq", SETEQ, 35>;
|
|
defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>;
|
|
} // isCommutable = 1
|
|
|
|
// Non-equality: ne
|
|
let isCommutable = 1 in {
|
|
defm NE : SIMDConditionInt<"ne", SETNE, 36>;
|
|
defm NE : SIMDConditionFP<"ne", SETUNE, 66>;
|
|
} // isCommutable = 1
|
|
|
|
// Less than: lt_s / lt_u / lt
|
|
defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>;
|
|
defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>;
|
|
defm LT : SIMDConditionFP<"lt", SETOLT, 67>;
|
|
|
|
// Greater than: gt_s / gt_u / gt
|
|
defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>;
|
|
defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>;
|
|
defm GT : SIMDConditionFP<"gt", SETOGT, 68>;
|
|
|
|
// Less than or equal: le_s / le_u / le
|
|
defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>;
|
|
defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>;
|
|
defm LE : SIMDConditionFP<"le", SETOLE, 69>;
|
|
|
|
// Greater than or equal: ge_s / ge_u / ge
|
|
defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>;
|
|
defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>;
|
|
defm GE : SIMDConditionFP<"ge", SETOGE, 70>;
|
|
|
|
// Lower float comparisons that don't care about NaN to standard WebAssembly
|
|
// float comparisons. These instructions are generated with nnan and in the
|
|
// target-independent expansion of unordered comparisons and ordered ne.
|
|
foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32],
|
|
[setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in
|
|
def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
|
|
(v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
|
|
|
|
foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64],
|
|
[setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in
|
|
def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
|
|
(v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Bitwise operations
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name,
|
|
bits<32> simdop> {
|
|
defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
|
|
(outs), (ins),
|
|
[(set (vec_t V128:$dst),
|
|
(node (vec_t V128:$lhs), (vec_t V128:$rhs))
|
|
)],
|
|
vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
|
|
simdop>;
|
|
}
|
|
|
|
multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> {
|
|
defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>;
|
|
defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>;
|
|
defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>;
|
|
defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>;
|
|
}
|
|
|
|
multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name,
|
|
bits<32> simdop> {
|
|
defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
|
|
[(set (vec_t V128:$dst),
|
|
(vec_t (node (vec_t V128:$vec)))
|
|
)],
|
|
vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
|
|
}
|
|
|
|
// Bitwise logic: v128.not
|
|
foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
|
|
defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>;
|
|
|
|
// Bitwise logic: v128.and / v128.or / v128.xor
|
|
let isCommutable = 1 in {
|
|
defm AND : SIMDBitwise<and, "and", 78>;
|
|
defm OR : SIMDBitwise<or, "or", 80>;
|
|
defm XOR : SIMDBitwise<xor, "xor", 81>;
|
|
} // isCommutable = 1
|
|
|
|
// Bitwise logic: v128.andnot
|
|
def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>;
|
|
defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>;
|
|
|
|
// Bitwise select: v128.bitselect
|
|
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
|
|
defm BITSELECT_#vec_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
|
|
[(set (vec_t V128:$dst),
|
|
(vec_t (int_wasm_bitselect
|
|
(vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c)
|
|
))
|
|
)],
|
|
"v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
|
|
|
|
// Bitselect is equivalent to (c & v1) | (~c & v2)
|
|
foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
|
|
def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)),
|
|
(and (vnot V128:$c), (vec_t V128:$v2)))),
|
|
(!cast<Instruction>("BITSELECT_"#vec_t)
|
|
V128:$v1, V128:$v2, V128:$c)>;
|
|
|
|
// Also implement vselect in terms of bitselect
|
|
foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
|
|
[v4f32, v4i32], [v2f64, v2i64]] in
|
|
def : Pat<(types[0] (vselect
|
|
(types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2)
|
|
)),
|
|
(!cast<Instruction>("BITSELECT_"#types[0])
|
|
V128:$v1, V128:$v2, V128:$c
|
|
)>;
|
|
|
|
// MVP select on v128 values
|
|
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
|
|
defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond),
|
|
(outs), (ins),
|
|
[(set V128:$dst,
|
|
(select I32:$cond,
|
|
(vec_t V128:$lhs), (vec_t V128:$rhs)
|
|
)
|
|
)],
|
|
"v128.select\t$dst, $lhs, $rhs, $cond",
|
|
"v128.select", 0x1b>;
|
|
|
|
// ISD::SELECT requires its operand to conform to getBooleanContents, but
|
|
// WebAssembly's select interprets any non-zero value as true, so we can fold
|
|
// a setne with 0 into a select.
|
|
def : Pat<(select
|
|
(i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs)
|
|
),
|
|
(!cast<Instruction>("SELECT_"#vec_t)
|
|
V128:$lhs, V128:$rhs, I32:$cond
|
|
)>;
|
|
|
|
// And again, this time with seteq instead of setne and the arms reversed.
|
|
def : Pat<(select
|
|
(i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs)
|
|
),
|
|
(!cast<Instruction>("SELECT_"#vec_t)
|
|
V128:$rhs, V128:$lhs, I32:$cond
|
|
)>;
|
|
} // foreach vec_t
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Integer unary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>;
|
|
defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
|
|
defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
|
|
defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name,
|
|
bits<32> simdop> {
|
|
defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
|
|
[(set I32:$dst, (i32 (op (vec_t V128:$vec))))],
|
|
vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
|
|
}
|
|
|
|
multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> {
|
|
defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>;
|
|
defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>;
|
|
defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>;
|
|
defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
// Integer vector negation
|
|
def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
|
|
|
|
// Integer absolute value: abs
|
|
defm ABS : SIMDUnaryInt<abs, "abs", 96>;
|
|
|
|
// Integer negation: neg
|
|
defm NEG : SIMDUnaryInt<ivneg, "neg", 97>;
|
|
|
|
// Any lane true: any_true
|
|
defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>;
|
|
|
|
// All lanes true: all_true
|
|
defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>;
|
|
|
|
// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
|
|
// can be folded out
|
|
foreach reduction =
|
|
[["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in
|
|
foreach ty = [v16i8, v8i16, v4i32, v2i64] in {
|
|
def : Pat<(i32 (and
|
|
(i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
|
|
(i32 1)
|
|
)),
|
|
(i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
|
|
def : Pat<(i32 (setne
|
|
(i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
|
|
(i32 0)
|
|
)),
|
|
(i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
|
|
def : Pat<(i32 (seteq
|
|
(i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
|
|
(i32 1)
|
|
)),
|
|
(i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
|
|
}
|
|
|
|
multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> {
|
|
defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
|
|
[(set I32:$dst,
|
|
(i32 (int_wasm_bitmask (vec_t V128:$vec)))
|
|
)],
|
|
vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>;
|
|
}
|
|
|
|
defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>;
|
|
defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>;
|
|
defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Bit shifts
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name,
|
|
bits<32> simdop> {
|
|
defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
|
|
(outs), (ins),
|
|
[(set (vec_t V128:$dst), (node V128:$vec, I32:$x))],
|
|
vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
|
|
}
|
|
|
|
multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>;
|
|
defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
|
|
defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
|
|
defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
// WebAssembly SIMD shifts are nonstandard in that the shift amount is
|
|
// an i32 rather than a vector, so they need custom nodes.
|
|
def wasm_shift_t : SDTypeProfile<1, 2,
|
|
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]
|
|
>;
|
|
def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
|
|
def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
|
|
def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
|
|
|
|
// Left shift by scalar: shl
|
|
defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>;
|
|
|
|
// Right shift by scalar: shr_s / shr_u
|
|
defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>;
|
|
defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Integer binary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
|
|
defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
|
|
defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
|
|
defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
|
|
}
|
|
|
|
multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
|
|
defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
|
|
}
|
|
|
|
multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
|
|
defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
|
|
}
|
|
|
|
// Integer addition: add / add_saturate_s / add_saturate_u
|
|
let isCommutable = 1 in {
|
|
defm ADD : SIMDBinaryInt<add, "add", 110>;
|
|
defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 111>;
|
|
defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 112>;
|
|
} // isCommutable = 1
|
|
|
|
// Integer subtraction: sub / sub_saturate_s / sub_saturate_u
|
|
defm SUB : SIMDBinaryInt<sub, "sub", 113>;
|
|
defm SUB_SAT_S :
|
|
SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 114>;
|
|
defm SUB_SAT_U :
|
|
SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 115>;
|
|
|
|
// Integer multiplication: mul
|
|
let isCommutable = 1 in
|
|
defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>;
|
|
|
|
// Integer min_s / min_u / max_s / max_u
|
|
let isCommutable = 1 in {
|
|
defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>;
|
|
defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>;
|
|
defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>;
|
|
defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>;
|
|
} // isCommutable = 1
|
|
|
|
// Integer unsigned rounding average: avgr_u
|
|
let isCommutable = 1 in {
|
|
defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>;
|
|
defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>;
|
|
}
|
|
|
|
def add_nuw : PatFrag<(ops node:$lhs, node:$rhs),
|
|
(add node:$lhs, node:$rhs),
|
|
"return N->getFlags().hasNoUnsignedWrap();">;
|
|
|
|
foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in
|
|
def : Pat<(wasm_shr_u
|
|
(add_nuw
|
|
(add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)),
|
|
(nodes[1] (i32 1))
|
|
),
|
|
(i32 1)
|
|
),
|
|
(!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>;
|
|
|
|
// Widening dot product: i32x4.dot_i16x8_s
|
|
let isCommutable = 1 in
|
|
defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
|
|
[(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
|
|
"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
|
|
186>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Floating-point unary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;
|
|
defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;
|
|
}
|
|
|
|
// Absolute value: abs
|
|
defm ABS : SIMDUnaryFP<fabs, "abs", 224>;
|
|
|
|
// Negation: neg
|
|
defm NEG : SIMDUnaryFP<fneg, "neg", 225>;
|
|
|
|
// Square root: sqrt
|
|
defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
|
|
|
|
// Rounding: ceil, floor, trunc, nearest
|
|
defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>;
|
|
defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>;
|
|
defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>;
|
|
defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>;
|
|
defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>;
|
|
defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>;
|
|
defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>;
|
|
defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Floating-point binary arithmetic
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
|
|
defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
|
|
defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;
|
|
}
|
|
|
|
// Addition: add
|
|
let isCommutable = 1 in
|
|
defm ADD : SIMDBinaryFP<fadd, "add", 228>;
|
|
|
|
// Subtraction: sub
|
|
defm SUB : SIMDBinaryFP<fsub, "sub", 229>;
|
|
|
|
// Multiplication: mul
|
|
let isCommutable = 1 in
|
|
defm MUL : SIMDBinaryFP<fmul, "mul", 230>;
|
|
|
|
// Division: div
|
|
defm DIV : SIMDBinaryFP<fdiv, "div", 231>;
|
|
|
|
// NaN-propagating minimum: min
|
|
defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
|
|
|
|
// NaN-propagating maximum: max
|
|
defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
|
|
|
|
// Pseudo-minimum: pmin
|
|
defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>;
|
|
|
|
// Pseudo-maximum: pmax
|
|
defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Conversions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op,
|
|
string name, bits<32> simdop> {
|
|
defm op#_#vec_t#_#arg_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
|
|
[(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))],
|
|
name#"\t$dst, $vec", name, simdop>;
|
|
}
|
|
|
|
// Floating point to integer with saturation: trunc_sat
|
|
defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>;
|
|
defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>;
|
|
|
|
// Integer to floating point: convert
|
|
defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>;
|
|
defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>;
|
|
|
|
// Lower llvm.wasm.trunc.saturate.* to saturating instructions
|
|
def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
|
|
(fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
|
|
def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
|
|
(fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
|
|
|
|
// Widening operations
|
|
def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
|
|
def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>;
|
|
def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>;
|
|
def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>;
|
|
def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>;
|
|
|
|
multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
|
|
bits<32> baseInst> {
|
|
defm "" : SIMDConvert<vec_t, arg_t, widen_low_s,
|
|
vec#".widen_low_"#arg#"_s", baseInst>;
|
|
defm "" : SIMDConvert<vec_t, arg_t, widen_high_s,
|
|
vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>;
|
|
defm "" : SIMDConvert<vec_t, arg_t, widen_low_u,
|
|
vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>;
|
|
defm "" : SIMDConvert<vec_t, arg_t, widen_high_u,
|
|
vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>;
|
|
}
|
|
|
|
defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>;
|
|
defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>;
|
|
|
|
// Narrowing operations
|
|
multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg,
|
|
bits<32> baseInst> {
|
|
defm NARROW_S_#vec_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
|
|
[(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed
|
|
(arg_t V128:$low), (arg_t V128:$high))))],
|
|
vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s",
|
|
baseInst>;
|
|
defm NARROW_U_#vec_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
|
|
[(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned
|
|
(arg_t V128:$low), (arg_t V128:$high))))],
|
|
vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u",
|
|
!add(baseInst, 1)>;
|
|
}
|
|
|
|
defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>;
|
|
defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>;
|
|
|
|
// Use narrowing operations for truncating stores. Since the narrowing
|
|
// operations are saturating instead of truncating, we need to mask
|
|
// the stored values first.
|
|
// TODO: Use consts instead of splats
|
|
def store_v8i8_trunc_v8i16 :
|
|
OutPatFrag<(ops node:$val),
|
|
(EXTRACT_LANE_v2i64
|
|
(NARROW_U_v16i8
|
|
(AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x00ff00ff)), node:$val),
|
|
node:$val // Unused input
|
|
),
|
|
0
|
|
)>;
|
|
|
|
def store_v4i16_trunc_v4i32 :
|
|
OutPatFrag<(ops node:$val),
|
|
(EXTRACT_LANE_v2i64
|
|
(NARROW_U_v8i16
|
|
(AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x0000ffff)), node:$val),
|
|
node:$val // Unused input
|
|
),
|
|
0
|
|
)>;
|
|
|
|
// Store patterns adapted from WebAssemblyInstrMemory.td
|
|
multiclass NarrowingStorePatNoOffset<ValueType ty, PatFrag node,
|
|
OutPatFrag out> {
|
|
def : Pat<(node ty:$val, I32:$addr),
|
|
(STORE_I64_A32 0, 0, I32:$addr, (i64 (out ty:$val)))>,
|
|
Requires<[HasAddr32]>;
|
|
def : Pat<(node ty:$val, I64:$addr),
|
|
(STORE_I64_A64 0, 0, I64:$addr, (i64 (out ty:$val)))>,
|
|
Requires<[HasAddr64]>;
|
|
}
|
|
|
|
defm : NarrowingStorePatNoOffset<v8i16, truncstorevi8, store_v8i8_trunc_v8i16>;
|
|
defm : NarrowingStorePatNoOffset<v4i32, truncstorevi16,
|
|
store_v4i16_trunc_v4i32>;
|
|
|
|
multiclass NarrowingStorePatImmOff<ValueType ty, PatFrag kind,
|
|
PatFrag operand, OutPatFrag out> {
|
|
def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)),
|
|
(STORE_I64_A32 0, imm:$off, I32:$addr, (i64 (out ty:$val)))>,
|
|
Requires<[HasAddr32]>;
|
|
def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)),
|
|
(STORE_I64_A64 0, imm:$off, I64:$addr, (i64 (out ty:$val)))>,
|
|
Requires<[HasAddr64]>;
|
|
}
|
|
|
|
defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, regPlusImm,
|
|
store_v8i8_trunc_v8i16>;
|
|
defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, regPlusImm,
|
|
store_v4i16_trunc_v4i32>;
|
|
defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, or_is_add,
|
|
store_v8i8_trunc_v8i16>;
|
|
defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, or_is_add,
|
|
store_v4i16_trunc_v4i32>;
|
|
|
|
multiclass NarrowingStorePatOffsetOnly<ValueType ty, PatFrag kind,
|
|
OutPatFrag out> {
|
|
def : Pat<(kind ty:$val, imm:$off),
|
|
(STORE_I64_A32 0, imm:$off, (CONST_I32 0), (i64 (out ty:$val)))>,
|
|
Requires<[HasAddr32]>;
|
|
def : Pat<(kind ty:$val, imm:$off),
|
|
(STORE_I64_A64 0, imm:$off, (CONST_I64 0), (i64 (out ty:$val)))>,
|
|
Requires<[HasAddr64]>;
|
|
}
|
|
|
|
defm : NarrowingStorePatOffsetOnly<v8i16, truncstorevi8,
|
|
store_v8i8_trunc_v8i16>;
|
|
defm : NarrowingStorePatOffsetOnly<v4i32, truncstorevi16,
|
|
store_v4i16_trunc_v4i32>;
|
|
|
|
multiclass NarrowingStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind,
|
|
OutPatFrag out> {
|
|
def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
|
|
(STORE_I64_A32
|
|
0, tglobaladdr:$off, (CONST_I32 0), (i64 (out ty:$val)))>,
|
|
Requires<[IsNotPIC, HasAddr32]>;
|
|
def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
|
|
(STORE_I64_A64
|
|
0, tglobaladdr:$off, (CONST_I64 0), (i64 (out ty:$val)))>,
|
|
Requires<[IsNotPIC, HasAddr64]>;
|
|
}
|
|
|
|
defm : NarrowingStorePatGlobalAddrOffOnly<v8i16, truncstorevi8,
|
|
store_v8i8_trunc_v8i16>;
|
|
defm : NarrowingStorePatGlobalAddrOffOnly<v4i32, truncstorevi16,
|
|
store_v4i16_trunc_v4i32>;
|
|
|
|
// Bitcasts are nops
|
|
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
|
|
foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
|
|
foreach t2 = !foldl(
|
|
[]<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
|
acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)),
|
|
acc, !listconcat(acc, [cur])
|
|
)
|
|
) in
|
|
def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS)
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> simdopA,
|
|
bits<32> simdopS> {
|
|
defm QFMA_#vec_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
|
|
(outs), (ins),
|
|
[(set (vec_t V128:$dst),
|
|
(int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
|
|
vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", simdopA>;
|
|
defm QFMS_#vec_t :
|
|
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
|
|
(outs), (ins),
|
|
[(set (vec_t V128:$dst),
|
|
(int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
|
|
vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", simdopS>;
|
|
}
|
|
|
|
defm "" : SIMDQFM<v4f32, "f32x4", 180, 212>;
|
|
defm "" : SIMDQFM<v2f64, "f64x2", 254, 255>;
|