[WebAssembly] Implement SIMD {i8x16,i16x8}.avgr_u instructions

Summary: These instructions were added to the spec proposal in https://github.com/WebAssembly/simd/pull/126. Their semantics are equivalent to `(a + b + 1) / 2`. The opcode for the experimental i32x4.dot_i16x8_s is also bumped due to a collision with the i8x16.avgr_u opcode. Reviewers: aheejin Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71628
2024-11-23 03:02:36 +01:00 · 2019-12-17 13:58:39 -08:00 · 2019-12-17 13:58:39 -08:00 · a355453a29
commit a355453a29
parent 1024f30fcf
3 changed files with 53 additions and 2 deletions
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@ -738,12 +738,31 @@ defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 96>;
 defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 97>;
 } // isCommutable = 1

+// Integer unsigned rounding average: avgr_u
+def avgr_u_v16i8 :
+  PatFrag<(ops node:$lhs, node:$rhs),
+          (srl
+            (add (add node:$lhs, node:$rhs), (splat16 (i32 1))),
+            (v16i8 (splat16 (i32 1)))
+          )>;
+def avgr_u_v8i16 :
+  PatFrag<(ops node:$lhs, node:$rhs),
+          (srl
+            (add (add node:$lhs, node:$rhs), (splat8 (i32 1))),
+            (v8i16 (splat8 (i32 1)))
+          )>;
+
+let isCommutable = 1, Predicates = [HasUnimplementedSIMD128] in {
+defm AVGR_U : SIMDBinary<v16i8, "i8x16", avgr_u_v16i8, "avgr_u", 217>;
+defm AVGR_U : SIMDBinary<v8i16, "i16x8", avgr_u_v8i16, "avgr_u", 218>;
+}
+
 // Widening dot product: i32x4.dot_i16x8_s
 let isCommutable = 1 in
 defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
                  [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
                  "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
-                  217>;
+                  219>;

 //===----------------------------------------------------------------------===//
 // Floating-point unary arithmetic
--- a/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/test/CodeGen/WebAssembly/simd-arith.ll
@ -91,6 +91,20 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
  ret <16 x i8> %a
 }

+; CHECK-LABEL: avgr_u_v16i8:
+; NO-SIMD128-NOT: i8x16
+; SIMD128-NEXT: .functype avgr_u_v16i8 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i8x16.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
+  %a = add <16 x i8> %x, %y
+  %b = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %c = udiv <16 x i8> %b, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
+                           i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <16 x i8> %c
+}
+
 ; CHECK-LABEL: neg_v16i8:
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
@ -381,6 +395,18 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
  ret <8 x i16> %a
 }

+; CHECK-LABEL: avgr_u_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .functype avgr_u_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i16x8.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
+  %a = add <8 x i16> %x, %y
+  %b = add <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %c = udiv <8 x i16> %b, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <8 x i16> %c
+}
+
 ; CHECK-LABEL: neg_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
--- a/test/MC/WebAssembly/simd-encodings.s
+++ b/test/MC/WebAssembly/simd-encodings.s
@ -571,7 +571,13 @@ main:
    # CHECK: v128.andnot # encoding: [0xfd,0xd8,0x01]
    v128.andnot

-    # CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xd9,0x01]
+    # CHECK: i8x16.avgr_u # encoding: [0xfd,0xd9,0x01]
+    i8x16.avgr_u
+
+    # CHECK: i16x8.avgr_u # encoding: [0xfd,0xda,0x01]
+    i16x8.avgr_u
+
+    # CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xdb,0x01]
    i32x4.dot_i16x8_s

    end_function