[WebAssembly] Support for atomic fences

Summary: This adds support for translation of LLVM IR fence instruction. We convert a singlethread fence to a pseudo compiler barrier which becomes 0 instructions in final binary, and a thread fence to an idempotent atomicrmw instruction to a memory address. Reviewers: dschuff, jfb, sunfish, tlively Subscribers: sbc100, jgravelle-google, llvm-commits Differential Revision: https://reviews.llvm.org/D50277 llvm-svn: 361884
2024-11-23 19:23:23 +01:00 · 2019-05-28 22:09:12 +00:00 · 2019-05-28 22:09:12 +00:00 · 674e8fe80d
commit 674e8fe80d
parent e52eb7c7d0
4 changed files with 154 additions and 4 deletions
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@ -369,6 +369,10 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
      OutStreamer->AddBlankLine();
    }
    break;
+  case WebAssembly::COMPILER_FENCE:
+    // This is a compiler barrier that prevents instruction reordering during
+    // backend compilation, and should not be emitted.
+    break;
  case WebAssembly::EXTRACT_EXCEPTION_I32:
  case WebAssembly::EXTRACT_EXCEPTION_I32_S:
    // These are pseudo instructions that simulates popping values from stack.
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@ -77,14 +77,103 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
    return;
  }

-  // Few custom selection stuff. If we need WebAssembly-specific selection,
-  // uncomment this block add corresponding case statements.
-  /*
+  // Few custom selection stuff.
+  SDLoc DL(Node);
+  MachineFunction &MF = CurDAG->getMachineFunction();
  switch (Node->getOpcode()) {
+  case ISD::ATOMIC_FENCE: {
+    if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics())
+      break;
+
+    uint64_t SyncScopeID =
+        cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
+    switch (SyncScopeID) {
+    case SyncScope::SingleThread: {
+      // We lower a single-thread fence to a pseudo compiler barrier instruction
+      // preventing instruction reordering. This will not be emitted in final
+      // binary.
+      MachineSDNode *Fence =
+          CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
+                                 DL,                 // debug loc
+                                 MVT::Other,         // outchain type
+                                 Node->getOperand(0) // inchain
+          );
+      ReplaceNode(Node, Fence);
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+
+    case SyncScope::System: {
+      // For non-emscripten systems, we have not decided on what we should
+      // traslate fences to yet.
+      if (!Subtarget->getTargetTriple().isOSEmscripten())
+        report_fatal_error(
+            "ATOMIC_FENCE is not yet supported in non-emscripten OSes");
+
+      // Wasm does not have a fence instruction, but because all atomic
+      // instructions in wasm are sequentially consistent, we translate a
+      // fence to an idempotent atomic RMW instruction to a linear memory
+      // address. All atomic instructions in wasm are sequentially consistent,
+      // but this is to ensure a fence also prevents reordering of non-atomic
+      // instructions in the VM. Even though LLVM IR's fence instruction does
+      // not say anything about its relationship with non-atomic instructions,
+      // we think this is more user-friendly.
+      //
+      // While any address can work, here we use a value stored in
+      // __stack_pointer wasm global because there's high chance that area is
+      // in cache.
+      //
+      // So the selected instructions will be in the form of:
+      //   %addr = get_global $__stack_pointer
+      //   %0 = i32.const 0
+      //   i32.atomic.rmw.or %addr, %0
+      SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
+          "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
+      MachineSDNode *GetGlobal =
+          CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
+                                 DL,                          // debug loc
+                                 MVT::i32,                    // result type
+                                 StackPtrSym // __stack_pointer symbol
+          );
+
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      auto *MMO = MF.getMachineMemOperand(
+          MachinePointerInfo::getUnknownStack(MF),
+          // FIXME Volatile isn't really correct, but currently all LLVM
+          // atomic instructions are treated as volatiles in the backend, so
+          // we should be consistent.
+          MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
+              MachineMemOperand::MOStore,
+          4, 4, AAMDNodes(), nullptr, SyncScope::System,
+          AtomicOrdering::SequentiallyConsistent);
+      MachineSDNode *Const0 =
+          CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
+      MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
+          WebAssembly::ATOMIC_RMW_OR_I32, // opcode
+          DL,                             // debug loc
+          MVT::i32,                       // result type
+          MVT::Other,                     // outchain type
+          {
+              Zero,                  // alignment
+              Zero,                  // offset
+              SDValue(GetGlobal, 0), // __stack_pointer
+              SDValue(Const0, 0),    // OR with 0 to make it idempotent
+              Node->getOperand(0)    // inchain
+          });
+
+      CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
+      ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+    default:
+      llvm_unreachable("Unknown scope!");
+    }
+  }
+
  default:
    break;
  }
-  */

  // Select the default instruction.
  SelectCode(Node);
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@ -887,3 +887,13 @@ defm : TerRMWTruncExtPattern<
  ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
  ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
  ATOMIC_RMW32_U_CMPXCHG_I64>;
+
+//===----------------------------------------------------------------------===//
+// Atomic fences
+//===----------------------------------------------------------------------===//
+
+// A compiler fence instruction that prevents reordering of instructions.
+let Defs = [ARGUMENTS] in {
+let isPseudo = 1, hasSideEffects = 1 in
+defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
+} // Defs = [ARGUMENTS]
--- a/test/CodeGen/WebAssembly/atomic-fence.ll
+++ b/test/CodeGen/WebAssembly/atomic-fence.ll
@ -0,0 +1,47 @@
+; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC
+; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
+; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
+; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; NOEMSCRIPTEN: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes
+
+; A multithread fence turns into 'global.get $__stack_pointer' followed by an
+; idempotent atomicrmw instruction.
+; CHECK-LABEL: multithread_fence:
+; CHECK:      global.get  $push[[SP:[0-9]+]]=, __stack_pointer
+; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0
+; CHECK-NEXT: i32.atomic.rmw.or  $drop=, 0($pop[[SP]]), $pop[[ZERO]]
+; NOATOMIC-NOT: i32.atomic.rmw.or
+define void @multithread_fence() {
+  fence seq_cst
+  ret void
+}
+
+; Fences with weaker memory orderings than seq_cst should be treated the same
+; because atomic memory access in wasm are sequentially consistent.
+; CHECK-LABEL: multithread_weak_fence:
+; CHECK:  global.get  $push{{.+}}=, __stack_pointer
+; CHECK:  i32.atomic.rmw.or
+; CHECK:  i32.atomic.rmw.or
+; CHECK:  i32.atomic.rmw.or
+define void @multithread_weak_fence() {
+  fence acquire
+  fence release
+  fence acq_rel
+  ret void
+}
+
+; A singlethread fence becomes compiler_fence instruction, a pseudo instruction
+; that acts as a compiler barrier. The barrier should not be emitted to .s file.
+; CHECK-LABEL: singlethread_fence:
+; CHECK-NOT:  compiler_fence
+define void @singlethread_fence() {
+  fence syncscope("singlethread") seq_cst
+  fence syncscope("singlethread") acquire
+  fence syncscope("singlethread") release
+  fence syncscope("singlethread") acq_rel
+  ret void
+}