[WebAssembly] Use named operands to identify loads and stores

Summary: Uses the named operands tablegen feature to look up the indices of offset, address, and p2align operands for all load and store instructions. This replaces brittle, incorrect logic for identifying loads and store when eliminating frame indices, which previously crashed on bulk-memory ops. It also cleans up the SetP2Alignment pass. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59007 llvm-svn: 355770
2024-11-23 03:02:36 +01:00 · 2019-03-09 04:31:37 +00:00 · 2019-03-09 04:31:37 +00:00 · f98161801a
commit f98161801a
parent 7ca29a3649
9 changed files with 107 additions and 135 deletions
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@ -331,14 +331,6 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
  }
 }

-/// The operand number of the load or store address in load/store instructions.
-static const unsigned LoadAddressOperandNo = 3;
-static const unsigned StoreAddressOperandNo = 2;
-
-/// The operand number of the load or store p2align in load/store instructions.
-static const unsigned LoadP2AlignOperandNo = 1;
-static const unsigned StoreP2AlignOperandNo = 0;
-
 /// This is used to indicate block signatures.
 enum class ExprType : unsigned {
  Void = 0x40,
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@ -11,6 +11,7 @@
 ///
 //===----------------------------------------------------------------------===//

+let UseNamedOperandTable = 1 in
 multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
                    list<dag> pattern_r, string asmstr_r = "",
                    string asmstr_s = "", bits<32> atomic_op = -1> {
@ -810,9 +811,9 @@ multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name,
  defm "" :
    ATOMIC_I<(outs rc:$dst),
             (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
-                  rc:$new),
+                  rc:$new_),
             (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-             !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
+             !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
             !strconcat(name, "\t${off}${p2align}"), atomic_op>;
 }

--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@ -27,6 +27,10 @@ using namespace llvm;
 #define GET_INSTRINFO_CTOR_DTOR
 #include "WebAssemblyGenInstrInfo.inc"

+// defines WebAssembly::getNamedOperandIdx
+#define GET_INSTRINFO_NAMED_OPS
+#include "WebAssemblyGenInstrInfo.inc"
+
 WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
    : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
                              WebAssembly::ADJCALLSTACKUP,
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@ -21,8 +21,17 @@
 #define GET_INSTRINFO_HEADER
 #include "WebAssemblyGenInstrInfo.inc"

+#define GET_INSTRINFO_OPERAND_ENUM
+#include "WebAssemblyGenInstrInfo.inc"
+
 namespace llvm {

+namespace WebAssembly {
+
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
+
+}
+
 class WebAssemblySubtarget;

 class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@ -52,7 +52,7 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),

 // Defines atomic and non-atomic loads, regular and extending.
 multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
-  let mayLoad = 1 in
+  let mayLoad = 1, UseNamedOperandTable = 1 in
  defm "": I<(outs rc:$dst),
             (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
             (outs), (ins P2Align:$p2align, offset32_op:$off),
@ -294,7 +294,7 @@ def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;

 // Defines atomic and non-atomic stores, regular and truncating
 multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
-  let mayStore = 1 in
+  let mayStore = 1, UseNamedOperandTable = 1 in
  defm "" : I<(outs),
              (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
              (outs),
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@ -41,12 +41,12 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;

 // Load: v128.load
 multiclass SIMDLoad<ValueType vec_t> {
-  let mayLoad = 1 in
+  let mayLoad = 1, UseNamedOperandTable = 1 in
  defm LOAD_#vec_t :
-    SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
-           (outs), (ins P2Align:$align, offset32_op:$off), [],
-           "v128.load\t$dst, ${off}(${addr})$align",
-           "v128.load\t$off$align", 0>;
+    SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+           "v128.load\t$dst, ${off}(${addr})$p2align",
+           "v128.load\t$off$p2align", 0>;
 }

 foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
@ -65,12 +65,12 @@ def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;

 // Store: v128.store
 multiclass SIMDStore<ValueType vec_t> {
-  let mayStore = 1 in
+  let mayStore = 1, UseNamedOperandTable = 1 in
  defm STORE_#vec_t :
-    SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
-           (outs), (ins P2Align:$align, offset32_op:$off), [],
-           "v128.store\t${off}(${addr})$align, $vec",
-           "v128.store\t$off$align", 1>;
+    SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
+           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+           "v128.store\t${off}(${addr})$p2align, $vec",
+           "v128.store\t$off$p2align", 1>;
 }

 foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@ -70,13 +70,16 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(

  // If this is the address operand of a load or store, make it relative to SP
  // and fold the frame offset directly in.
-  if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) ||
-      (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) {
-    assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0);
-    int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset;
+  unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx(
+      MI.getOpcode(), WebAssembly::OpName::addr);
+  if (AddrOperandNum == FIOperandNum) {
+    unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx(
+        MI.getOpcode(), WebAssembly::OpName::off);
+    assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0);
+    int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset;

    if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
-      MI.getOperand(FIOperandNum - 1).setImm(Offset);
+      MI.getOperand(OffsetOperandNum).setImm(Offset);
      MI.getOperand(FIOperandNum)
          .ChangeToRegister(FrameRegister, /*IsDef=*/false);
      return;
--- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@ -13,6 +13,7 @@

 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
+#include "WebAssemblyInstrInfo.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@ -83,114 +84,11 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {

  for (auto &MBB : MF) {
    for (auto &MI : MBB) {
-      switch (MI.getOpcode()) {
-      case WebAssembly::LOAD_I32:
-      case WebAssembly::LOAD_I64:
-      case WebAssembly::LOAD_F32:
-      case WebAssembly::LOAD_F64:
-      case WebAssembly::LOAD_v16i8:
-      case WebAssembly::LOAD_v8i16:
-      case WebAssembly::LOAD_v4i32:
-      case WebAssembly::LOAD_v2i64:
-      case WebAssembly::LOAD_v4f32:
-      case WebAssembly::LOAD_v2f64:
-      case WebAssembly::LOAD8_S_I32:
-      case WebAssembly::LOAD8_U_I32:
-      case WebAssembly::LOAD16_S_I32:
-      case WebAssembly::LOAD16_U_I32:
-      case WebAssembly::LOAD8_S_I64:
-      case WebAssembly::LOAD8_U_I64:
-      case WebAssembly::LOAD16_S_I64:
-      case WebAssembly::LOAD16_U_I64:
-      case WebAssembly::LOAD32_S_I64:
-      case WebAssembly::LOAD32_U_I64:
-      case WebAssembly::ATOMIC_LOAD_I32:
-      case WebAssembly::ATOMIC_LOAD8_U_I32:
-      case WebAssembly::ATOMIC_LOAD16_U_I32:
-      case WebAssembly::ATOMIC_LOAD_I64:
-      case WebAssembly::ATOMIC_LOAD8_U_I64:
-      case WebAssembly::ATOMIC_LOAD16_U_I64:
-      case WebAssembly::ATOMIC_LOAD32_U_I64:
-      case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
-      case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
-      case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
-      case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
-      case WebAssembly::ATOMIC_RMW8_U_AND_I32:
-      case WebAssembly::ATOMIC_RMW8_U_AND_I64:
-      case WebAssembly::ATOMIC_RMW8_U_OR_I32:
-      case WebAssembly::ATOMIC_RMW8_U_OR_I64:
-      case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
-      case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
-      case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
-      case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
-      case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
-      case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
-      case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
-      case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
-      case WebAssembly::ATOMIC_RMW16_U_AND_I32:
-      case WebAssembly::ATOMIC_RMW16_U_AND_I64:
-      case WebAssembly::ATOMIC_RMW16_U_OR_I32:
-      case WebAssembly::ATOMIC_RMW16_U_OR_I64:
-      case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
-      case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
-      case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
-      case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
-      case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_RMW_ADD_I32:
-      case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
-      case WebAssembly::ATOMIC_RMW_SUB_I32:
-      case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
-      case WebAssembly::ATOMIC_RMW_AND_I32:
-      case WebAssembly::ATOMIC_RMW32_U_AND_I64:
-      case WebAssembly::ATOMIC_RMW_OR_I32:
-      case WebAssembly::ATOMIC_RMW32_U_OR_I64:
-      case WebAssembly::ATOMIC_RMW_XOR_I32:
-      case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
-      case WebAssembly::ATOMIC_RMW_XCHG_I32:
-      case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
-      case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_RMW_ADD_I64:
-      case WebAssembly::ATOMIC_RMW_SUB_I64:
-      case WebAssembly::ATOMIC_RMW_AND_I64:
-      case WebAssembly::ATOMIC_RMW_OR_I64:
-      case WebAssembly::ATOMIC_RMW_XOR_I64:
-      case WebAssembly::ATOMIC_RMW_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_NOTIFY:
-      case WebAssembly::ATOMIC_WAIT_I32:
-      case WebAssembly::ATOMIC_WAIT_I64:
-        rewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
-        break;
-      case WebAssembly::STORE_I32:
-      case WebAssembly::STORE_I64:
-      case WebAssembly::STORE_F32:
-      case WebAssembly::STORE_F64:
-      case WebAssembly::STORE_v16i8:
-      case WebAssembly::STORE_v8i16:
-      case WebAssembly::STORE_v4i32:
-      case WebAssembly::STORE_v2i64:
-      case WebAssembly::STORE_v4f32:
-      case WebAssembly::STORE_v2f64:
-      case WebAssembly::STORE8_I32:
-      case WebAssembly::STORE16_I32:
-      case WebAssembly::STORE8_I64:
-      case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64:
-      case WebAssembly::ATOMIC_STORE_I32:
-      case WebAssembly::ATOMIC_STORE8_I32:
-      case WebAssembly::ATOMIC_STORE16_I32:
-      case WebAssembly::ATOMIC_STORE_I64:
-      case WebAssembly::ATOMIC_STORE8_I64:
-      case WebAssembly::ATOMIC_STORE16_I64:
-      case WebAssembly::ATOMIC_STORE32_I64:
-        rewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo);
-        break;
-      default:
-        break;
+      int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx(
+          MI.getOpcode(), WebAssembly::OpName::p2align);
+      if (P2AlignOpNum != -1) {
+        rewriteP2Align(MI, P2AlignOpNum);
+        Changed = true;
      }
    }
  }
--- a/test/CodeGen/WebAssembly/bulk-memory.ll
+++ b/test/CodeGen/WebAssembly/bulk-memory.ll
@ -140,3 +140,68 @@ define void @memset_1024(i8* %dest, i8 %val) {
  call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0)
  ret void
 }
+
+; The following tests check that frame index elimination works for
+; bulk memory instructions. The stack pointer is bumped by 16 instead
+; of 10 because the stack pointer in WebAssembly is currently always
+; 16-byte aligned, even in leaf functions, although it is not written
+; back to the global in this case.
+
+; TODO: Change TransientStackAlignment to 1 to avoid this extra
+; arithmetic. This will require forcing the use of StackAlignment in
+; PrologEpilogEmitter.cpp when
+; WebAssemblyFrameLowering::needsSPWriteback would be true.
+
+; CHECK-LABEL: memcpy_alloca_src:
+; NO-BULK-MEM-NOT: memory.copy
+; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
+; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
+; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
+; BULK-MEM-NEXT: return
+define void @memcpy_alloca_src(i8* %dst) {
+  %a = alloca [10 x i8]
+  %p = bitcast [10 x i8]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 10, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: memcpy_alloca_dst:
+; NO-BULK-MEM-NOT: memory.copy
+; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
+; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
+; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
+; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
+; BULK-MEM-NEXT: return
+define void @memcpy_alloca_dst(i8* %src) {
+  %a = alloca [10 x i8]
+  %p = bitcast [10 x i8]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 10, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: memset_alloca:
+; NO-BULK-MEM-NOT: memory.fill
+; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
+; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
+; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
+; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
+; BULK-MEM-NEXT: return
+define void @memset_alloca(i8 %val) {
+  %a = alloca [10 x i8]
+  %p = bitcast [10 x i8]* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 10, i1 false)
+  ret void
+}