From a397d1a74f852e9205b170862fb1df91706ffdf5 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Sun, 11 Apr 2021 11:13:16 -0700
Subject: [PATCH] [WebAssembly] Update v128.any_true

In the final SIMD spec, there is only a single v128.any_true instruction, rather
than one for each lane interpretation because the semantics do not depend on the
lane interpretation.

Differential Revision: https://reviews.llvm.org/D100241
---
 .../WebAssembly/WebAssemblyInstrSIMD.td       | 54 +++++++++++--------
 test/CodeGen/WebAssembly/simd-intrinsics.ll   |  8 +--
 test/CodeGen/WebAssembly/simd-reductions.ll   | 24 ++++-----
 test/MC/Disassembler/WebAssembly/wasm.txt     |  3 +-
 test/MC/WebAssembly/simd-encodings.s          |  3 +-
 5 files changed, 50 insertions(+), 42 deletions(-)
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 41607a6385d..4f1f9b8b91b 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -800,20 +800,6 @@ multiclass SIMDUnaryInt<SDPatternOperator node, string name, bits<32> baseInst>
   defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>;
 }
 
-multiclass SIMDReduceVec<Vec vec, SDPatternOperator op, string name, bits<32> simdop> {
-  defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
-                      [(set I32:$dst, (i32 (op (vec.vt V128:$vec))))],
-                      vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name,
-                      simdop>;
-}
-
-multiclass SIMDReduce<SDPatternOperator op, string name, bits<32> baseInst> {
-  defm "" : SIMDReduceVec<I8x16, op, name, baseInst>;
-  defm "" : SIMDReduceVec<I16x8, op, name, !add(baseInst, 32)>;
-  defm "" : SIMDReduceVec<I32x4, op, name, !add(baseInst, 64)>;
-  defm "" : SIMDReduceVec<I64x2, op, name, !add(baseInst, 96)>;
-}
-
 // Integer vector negation
 def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>;
 
@@ -823,22 +809,44 @@ defm ABS : SIMDUnaryInt<abs, "abs", 96>;
 // Integer negation: neg
 defm NEG : SIMDUnaryInt<ivneg, "neg", 97>;
 
-// Any lane true: any_true
-defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>;
-
-// All lanes true: all_true
-defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>;
-
 // Population count: popcnt
 defm POPCNT : SIMDUnary<I8x16, int_wasm_popcnt, "popcnt", 0x62>;
 
+// Any lane true: any_true
+defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [],
+                      "v128.any_true\t$dst, $vec", "v128.any_true", 0x53>;
+
+foreach vec = IntVecs in
+def : Pat<(int_wasm_anytrue (vec.vt V128:$vec)), (ANYTRUE V128:$vec)>;
+
+// All lanes true: all_true
+multiclass SIMDAllTrue<Vec vec, bits<32> simdop> {
+  defm ALLTRUE_#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
+                             [(set I32:$dst,
+                               (i32 (int_wasm_alltrue (vec.vt V128:$vec))))],
+                             vec.prefix#".all_true\t$dst, $vec",
+                             vec.prefix#".all_true", simdop>;
+}
+
+defm "" : SIMDAllTrue<I8x16, 0x63>;
+defm "" : SIMDAllTrue<I16x8, 0x83>;
+defm "" : SIMDAllTrue<I32x4, 0xa3>;
+defm "" : SIMDAllTrue<I64x2, 0xc3>;
+
 // Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
 // can be folded out
 foreach reduction =
-  [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in
-foreach vec = IntVecs in {
+  [["int_wasm_anytrue", "ANYTRUE", "I8x16"],
+   ["int_wasm_anytrue", "ANYTRUE", "I16x8"],
+   ["int_wasm_anytrue", "ANYTRUE", "I32x4"],
+   ["int_wasm_anytrue", "ANYTRUE", "I64x2"],
+   ["int_wasm_alltrue", "ALLTRUE_I8x16", "I8x16"],
+   ["int_wasm_alltrue", "ALLTRUE_I16x8", "I16x8"],
+   ["int_wasm_alltrue", "ALLTRUE_I32x4", "I32x4"],
+   ["int_wasm_alltrue", "ALLTRUE_I64x2", "I64x2"]] in {
 defvar intrinsic = !cast<Intrinsic>(reduction[0]);
-defvar inst = !cast<NI>(reduction[1]#"_"#vec);
+defvar inst = !cast<NI>(reduction[1]);
+defvar vec = !cast<Vec>(reduction[2]);
 def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
 def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
 def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
diff --git a/test/CodeGen/WebAssembly/simd-intrinsics.ll b/test/CodeGen/WebAssembly/simd-intrinsics.ll
index e78b167ed0e..5d98f2b5637 100644
--- a/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -87,7 +87,7 @@ define <16 x i8> @popcnt_v16i8(<16 x i8> %x) {
 
 ; CHECK-LABEL: any_v16i8:
 ; CHECK-NEXT: .functype any_v16i8 (v128) -> (i32){{$}}
-; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 declare i32 @llvm.wasm.anytrue.v16i8(<16 x i8>)
 define i32 @any_v16i8(<16 x i8> %x) {
@@ -319,7 +319,7 @@ define <8 x i16> @extadd_pairwise_u_v8i16(<16 x i8> %x) {
 
 ; CHECK-LABEL: any_v8i16:
 ; CHECK-NEXT: .functype any_v8i16 (v128) -> (i32){{$}}
-; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 declare i32 @llvm.wasm.anytrue.v8i16(<8 x i16>)
 define i32 @any_v8i16(<8 x i16> %x) {
@@ -468,7 +468,7 @@ define <4 x i32> @extadd_pairwise_u_v4i32(<8 x i16> %x) {
 
 ; CHECK-LABEL: any_v4i32:
 ; CHECK-NEXT: .functype any_v4i32 (v128) -> (i32){{$}}
-; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 declare i32 @llvm.wasm.anytrue.v4i32(<4 x i32>)
 define i32 @any_v4i32(<4 x i32> %x) {
@@ -643,7 +643,7 @@ define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %x, <4 x i32> %y) {
 
 ; CHECK-LABEL: any_v2i64:
 ; CHECK-NEXT: .functype any_v2i64 (v128) -> (i32){{$}}
-; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 declare i32 @llvm.wasm.anytrue.v2i64(<2 x i64>)
 define i32 @any_v2i64(<2 x i64> %x) {
diff --git a/test/CodeGen/WebAssembly/simd-reductions.ll b/test/CodeGen/WebAssembly/simd-reductions.ll
index 259ef3b3a81..500a4495028 100644
--- a/test/CodeGen/WebAssembly/simd-reductions.ll
+++ b/test/CodeGen/WebAssembly/simd-reductions.ll
@@ -14,7 +14,7 @@ declare i32 @llvm.wasm.alltrue.v16i8(<16 x i8>)
 
 ; CHECK-LABEL: any_v16i8_trunc:
 ; CHECK-NEXT: .functype any_v16i8_trunc (v128) -> (i32){{$}}
-; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v16i8_trunc(<16 x i8> %x) {
   %a = call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> %x)
@@ -25,7 +25,7 @@ define i32 @any_v16i8_trunc(<16 x i8> %x) {
 
 ; CHECK-LABEL: any_v16i8_ne:
 ; CHECK-NEXT: .functype any_v16i8_ne (v128) -> (i32){{$}}
-; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v16i8_ne(<16 x i8> %x) {
   %a = call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> %x)
@@ -36,7 +36,7 @@ define i32 @any_v16i8_ne(<16 x i8> %x) {
 
 ; CHECK-LABEL: any_v16i8_eq:
 ; CHECK-NEXT: .functype any_v16i8_eq (v128) -> (i32){{$}}
-; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v16i8_eq(<16 x i8> %x) {
   %a = call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> %x)
@@ -86,7 +86,7 @@ declare i32 @llvm.wasm.alltrue.v8i16(<8 x i16>)
 
 ; CHECK-LABEL: any_v8i16_trunc:
 ; CHECK-NEXT: .functype any_v8i16_trunc (v128) -> (i32){{$}}
-; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v8i16_trunc(<8 x i16> %x) {
   %a = call i32 @llvm.wasm.anytrue.v8i16(<8 x i16> %x)
@@ -97,7 +97,7 @@ define i32 @any_v8i16_trunc(<8 x i16> %x) {
 
 ; CHECK-LABEL: any_v8i16_ne:
 ; CHECK-NEXT: .functype any_v8i16_ne (v128) -> (i32){{$}}
-; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v8i16_ne(<8 x i16> %x) {
   %a = call i32 @llvm.wasm.anytrue.v8i16(<8 x i16> %x)
@@ -108,7 +108,7 @@ define i32 @any_v8i16_ne(<8 x i16> %x) {
 
 ; CHECK-LABEL: any_v8i16_eq:
 ; CHECK-NEXT: .functype any_v8i16_eq (v128) -> (i32){{$}}
-; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v8i16_eq(<8 x i16> %x) {
   %a = call i32 @llvm.wasm.anytrue.v8i16(<8 x i16> %x)
@@ -158,7 +158,7 @@ declare i32 @llvm.wasm.alltrue.v4i32(<4 x i32>)
 
 ; CHECK-LABEL: any_v4i32_trunc:
 ; CHECK-NEXT: .functype any_v4i32_trunc (v128) -> (i32){{$}}
-; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v4i32_trunc(<4 x i32> %x) {
   %a = call i32 @llvm.wasm.anytrue.v4i32(<4 x i32> %x)
@@ -169,7 +169,7 @@ define i32 @any_v4i32_trunc(<4 x i32> %x) {
 
 ; CHECK-LABEL: any_v4i32_ne:
 ; CHECK-NEXT: .functype any_v4i32_ne (v128) -> (i32){{$}}
-; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v4i32_ne(<4 x i32> %x) {
   %a = call i32 @llvm.wasm.anytrue.v4i32(<4 x i32> %x)
@@ -180,7 +180,7 @@ define i32 @any_v4i32_ne(<4 x i32> %x) {
 
 ; CHECK-LABEL: any_v4i32_eq:
 ; CHECK-NEXT: .functype any_v4i32_eq (v128) -> (i32){{$}}
-; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v4i32_eq(<4 x i32> %x) {
   %a = call i32 @llvm.wasm.anytrue.v4i32(<4 x i32> %x)
@@ -230,7 +230,7 @@ declare i32 @llvm.wasm.alltrue.v2i64(<2 x i64>)
 
 ; CHECK-LABEL: any_v2i64_trunc:
 ; CHECK-NEXT: .functype any_v2i64_trunc (v128) -> (i32){{$}}
-; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v2i64_trunc(<2 x i64> %x) {
   %a = call i32 @llvm.wasm.anytrue.v2i64(<2 x i64> %x)
@@ -241,7 +241,7 @@ define i32 @any_v2i64_trunc(<2 x i64> %x) {
 
 ; CHECK-LABEL: any_v2i64_ne:
 ; CHECK-NEXT: .functype any_v2i64_ne (v128) -> (i32){{$}}
-; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v2i64_ne(<2 x i64> %x) {
   %a = call i32 @llvm.wasm.anytrue.v2i64(<2 x i64> %x)
@@ -252,7 +252,7 @@ define i32 @any_v2i64_ne(<2 x i64> %x) {
 
 ; CHECK-LABEL: any_v2i64_eq:
 ; CHECK-NEXT: .functype any_v2i64_eq (v128) -> (i32){{$}}
-; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
 define i32 @any_v2i64_eq(<2 x i64> %x) {
   %a = call i32 @llvm.wasm.anytrue.v2i64(<2 x i64> %x)
diff --git a/test/MC/Disassembler/WebAssembly/wasm.txt b/test/MC/Disassembler/WebAssembly/wasm.txt
index 783d59416f9..0cbf584d968 100644
--- a/test/MC/Disassembler/WebAssembly/wasm.txt
+++ b/test/MC/Disassembler/WebAssembly/wasm.txt
@@ -43,8 +43,7 @@
 0xFD 0x83 0x01
 
 # Including non-canonical LEB128 encodings
-# CHECK: i16x8.any_true
-# CHECK-NOT: i16x8.neg
+# CHECK: i16x8.q15mulr_sat_s
 0xFD 0x82 0x81 0x80 0x80 0x80 0x80 0x00
 
 # Check br_table, which has its own operand type.
diff --git a/test/MC/WebAssembly/simd-encodings.s b/test/MC/WebAssembly/simd-encodings.s
index 2ce4eb62290..c1047add02b 100644
--- a/test/MC/WebAssembly/simd-encodings.s
+++ b/test/MC/WebAssembly/simd-encodings.s
@@ -280,7 +280,8 @@ main:
     # CHECK: v128.bitselect # encoding: [0xfd,0x52]
     v128.bitselect
 
-    # TODO: v128.any_true # encoding: [0xfd,0x53]
+    # CHECK: v128.any_true # encoding: [0xfd,0x53]
+    v128.any_true
 
     # CHECK: v128.load8_lane 32, 1 # encoding: [0xfd,0x54,0x00,0x20,0x01]
     v128.load8_lane 32, 1