[WebAssembly] Adding 64-bit versions of __stack_pointer and other globals

We have 6 globals, all of which except for __table_base are 64-bit under wasm64. Differential Revision: https://reviews.llvm.org/D82130
2025-01-31 20:51:52 +01:00 · 2020-06-15 15:31:10 -07:00 · 2020-06-15 15:31:10 -07:00 · 3e722aec8c
commit 3e722aec8c
parent 3c623abe2f
14 changed files with 269 additions and 182 deletions
--- a/include/llvm/BinaryFormat/Wasm.h
+++ b/include/llvm/BinaryFormat/Wasm.h
@ -254,11 +254,13 @@ enum : unsigned {
  WASM_OPCODE_GLOBAL_GET = 0x23,
  WASM_OPCODE_GLOBAL_SET = 0x24,
  WASM_OPCODE_I32_STORE = 0x36,
+  WASM_OPCODE_I64_STORE = 0x37,
  WASM_OPCODE_I32_CONST = 0x41,
  WASM_OPCODE_I64_CONST = 0x42,
  WASM_OPCODE_F32_CONST = 0x43,
  WASM_OPCODE_F64_CONST = 0x44,
  WASM_OPCODE_I32_ADD = 0x6a,
+  WASM_OPCODE_I64_ADD = 0x7c,
  WASM_OPCODE_REF_NULL = 0xd0,
 };

--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@ -434,9 +434,12 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
        // GetExternalSymbolSymbol does, since if there's no code that
        // refers to this symbol, we have to set it here.
        SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
-        // FIXME: need to check subtarget to see if its wasm64, but we
-        // can't cast to WebAssemblySubtarget here.
-        SPSym->setGlobalType(wasm::WasmGlobalType{wasm::WASM_TYPE_I32, true});
+        SPSym->setGlobalType(wasm::WasmGlobalType{
+            uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() ==
+                            Triple::wasm64
+                        ? wasm::WASM_TYPE_I64
+                        : wasm::WASM_TYPE_I32),
+            true});
        DIELoc *Loc = new (DIEValueAllocator) DIELoc;
        addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
        addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind);
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@ -87,8 +87,8 @@ bool WebAssemblyFrameLowering::needsSPForLocalFrame(
 }

 // In function with EH pads, we need to make a copy of the value of
-// __stack_pointer global in SP32 register, in order to use it when restoring
-// __stack_pointer after an exception is caught.
+// __stack_pointer global in SP32/64 register, in order to use it when
+// restoring __stack_pointer after an exception is caught.
 bool WebAssemblyFrameLowering::needsPrologForEH(
    const MachineFunction &MF) const {
  auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType();
@ -123,6 +123,57 @@ bool WebAssemblyFrameLowering::needsSPWriteback(
  return needsSPForLocalFrame(MF) && !CanUseRedZone;
 }

+unsigned WebAssemblyFrameLowering::getSPReg(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::SP64
+             : WebAssembly::SP32;
+}
+
+unsigned WebAssemblyFrameLowering::getFPReg(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::FP64
+             : WebAssembly::FP32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcConst(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::CONST_I64
+             : WebAssembly::CONST_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcAdd(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::ADD_I64
+             : WebAssembly::ADD_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcSub(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::SUB_I64
+             : WebAssembly::SUB_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcAnd(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::AND_I64
+             : WebAssembly::AND_I32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcGlobGet(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::GLOBAL_GET_I64
+             : WebAssembly::GLOBAL_GET_I32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcGlobSet(const MachineFunction &MF) {
+  return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+             ? WebAssembly::GLOBAL_SET_I64
+             : WebAssembly::GLOBAL_SET_I32;
+}
+
 void WebAssemblyFrameLowering::writeSPToGlobal(
    unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB,
    MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const {
@ -130,7 +181,8 @@ void WebAssemblyFrameLowering::writeSPToGlobal(

  const char *ES = "__stack_pointer";
  auto *SPSymbol = MF.createExternalSymbolName(ES);
-  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
+
+  BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF)))
      .addExternalSymbol(SPSymbol)
      .addReg(SrcReg);
 }
@ -141,11 +193,12 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
    MachineBasicBlock::iterator I) const {
  assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) &&
         "Call frame pseudos should only be used for dynamic stack adjustment");
-  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+  const auto *TII = ST.getInstrInfo();
  if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
      needsSPWriteback(MF)) {
    DebugLoc DL = I->getDebugLoc();
-    writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL);
+    writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL);
  }
  return MBB.erase(I);
 }
@ -161,7 +214,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
    return;
  uint64_t StackSize = MFI.getStackSize();

-  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+  const auto *TII = ST.getInstrInfo();
  auto &MRI = MF.getRegInfo();

  auto InsertPt = MBB.begin();
@ -172,13 +226,13 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,

  const TargetRegisterClass *PtrRC =
      MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
-  unsigned SPReg = WebAssembly::SP32;
+  unsigned SPReg = getSPReg(MF);
  if (StackSize)
    SPReg = MRI.createVirtualRegister(PtrRC);

  const char *ES = "__stack_pointer";
  auto *SPSymbol = MF.createExternalSymbolName(ES);
-  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
+  BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg)
      .addExternalSymbol(SPSymbol);

  bool HasBP = hasBP(MF);
@ -192,32 +246,30 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
  if (StackSize) {
    // Subtract the frame size
    Register OffsetReg = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg)
        .addImm(StackSize);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32),
-            WebAssembly::SP32)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcSub(MF)), getSPReg(MF))
        .addReg(SPReg)
        .addReg(OffsetReg);
  }
  if (HasBP) {
    Register BitmaskReg = MRI.createVirtualRegister(PtrRC);
    Align Alignment = MFI.getMaxAlign();
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
-        .addImm((int)~(Alignment.value() - 1));
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
-            WebAssembly::SP32)
-        .addReg(WebAssembly::SP32)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), BitmaskReg)
+        .addImm((int64_t) ~(Alignment.value() - 1));
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcAnd(MF)), getSPReg(MF))
+        .addReg(getSPReg(MF))
        .addReg(BitmaskReg);
  }
  if (hasFP(MF)) {
    // Unlike most conventional targets (where FP points to the saved FP),
    // FP points to the bottom of the fixed-size locals, so we can use positive
    // offsets in load/store instructions.
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32)
-        .addReg(WebAssembly::SP32);
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), getFPReg(MF))
+        .addReg(getSPReg(MF));
  }
  if (StackSize && needsSPWriteback(MF)) {
-    writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL);
+    writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL);
  }
 }

@ -226,7 +278,8 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
  uint64_t StackSize = MF.getFrameInfo().getStackSize();
  if (!needsSP(MF) || !needsSPWriteback(MF))
    return;
-  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+  const auto *TII = ST.getInstrInfo();
  auto &MRI = MF.getRegInfo();
  auto InsertPt = MBB.getFirstTerminator();
  DebugLoc DL;
@ -237,6 +290,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
  // Restore the stack pointer. If we had fixed-size locals, add the offset
  // subtracted in the prolog.
  unsigned SPReg = 0;
+  unsigned SPFPReg = hasFP(MF) ? getFPReg(MF) : getSPReg(MF);
  if (hasBP(MF)) {
    auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
    SPReg = FI->getBasePointerVreg();
@ -244,16 +298,17 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
    const TargetRegisterClass *PtrRC =
        MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
    Register OffsetReg = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg)
        .addImm(StackSize);
-    // In the epilog we don't need to write the result back to the SP32 physreg
-    // because it won't be used again. We can use a stackified register instead.
+    // In the epilog we don't need to write the result back to the SP32/64
+    // physreg because it won't be used again. We can use a stackified register
+    // instead.
    SPReg = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
-        .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcAdd(MF)), SPReg)
+        .addReg(SPFPReg)
        .addReg(OffsetReg);
  } else {
-    SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
+    SPReg = SPFPReg;
  }

  writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@ -53,6 +53,15 @@ public:
                       MachineBasicBlock::iterator &InsertStore,
                       const DebugLoc &DL) const;

+  static unsigned getSPReg(const MachineFunction &MF);
+  static unsigned getFPReg(const MachineFunction &MF);
+  static unsigned getOpcConst(const MachineFunction &MF);
+  static unsigned getOpcAdd(const MachineFunction &MF);
+  static unsigned getOpcSub(const MachineFunction &MF);
+  static unsigned getOpcAnd(const MachineFunction &MF);
+  static unsigned getOpcGlobGet(const MachineFunction &MF);
+  static unsigned getOpcGlobSet(const MachineFunction &MF);
+
 private:
  bool hasBP(const MachineFunction &MF) const;
  bool needsSPForLocalFrame(const MachineFunction &MF) const;
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@ -77,6 +77,13 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
    return;
  }

+  MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+  auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
+                                        : WebAssembly::GLOBAL_GET_I32;
+  auto ConstIns =
+      PtrVT == MVT::i64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
+  auto AddIns = PtrVT == MVT::i64 ? WebAssembly::ADD_I64 : WebAssembly::ADD_I32;
+
  // Few custom selection stuff.
  SDLoc DL(Node);
  MachineFunction &MF = CurDAG->getMachineFunction();
@ -140,20 +147,16 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
                         false);
    }

-    MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-    assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
    SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
    SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
        GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);

-    MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32,
-                                                    DL, MVT::i32, TLSBaseSym);
-    MachineSDNode *TLSOffset = CurDAG->getMachineNode(
-        WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym);
-    MachineSDNode *TLSAddress =
-        CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32,
-                               SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
+    MachineSDNode *TLSBase =
+        CurDAG->getMachineNode(GlobalGetIns, DL, PtrVT, TLSBaseSym);
+    MachineSDNode *TLSOffset =
+        CurDAG->getMachineNode(ConstIns, DL, PtrVT, TLSOffsetSym);
+    MachineSDNode *TLSAddress = CurDAG->getMachineNode(
+        AddIns, DL, PtrVT, SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
    ReplaceNode(Node, TLSAddress);
    return;
  }
@ -162,22 +165,16 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
    switch (IntNo) {
    case Intrinsic::wasm_tls_size: {
-      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
      MachineSDNode *TLSSize = CurDAG->getMachineNode(
-          WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
-          CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32));
+          GlobalGetIns, DL, PtrVT,
+          CurDAG->getTargetExternalSymbol("__tls_size", PtrVT));
      ReplaceNode(Node, TLSSize);
      return;
    }
    case Intrinsic::wasm_tls_align: {
-      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
      MachineSDNode *TLSAlign = CurDAG->getMachineNode(
-          WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
-          CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32));
+          GlobalGetIns, DL, PtrVT,
+          CurDAG->getTargetExternalSymbol("__tls_align", PtrVT));
      ReplaceNode(Node, TLSAlign);
      return;
    }
@ -188,11 +185,8 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
    switch (IntNo) {
    case Intrinsic::wasm_tls_base: {
-      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
-      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
      MachineSDNode *TLSBase = CurDAG->getMachineNode(
-          WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other,
+          GlobalGetIns, DL, PtrVT, MVT::Other,
          CurDAG->getTargetExternalSymbol("__tls_base", PtrVT),
          Node->getOperand(0));
      ReplaceNode(Node, TLSBase);
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@ -209,6 +209,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);

  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+  setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
  setOperationAction(ISD::CopyToReg, MVT::Other, Custom);

  // Expand these forms; we pattern-match the forms that we can handle in isel.
@ -613,7 +614,11 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
  if (VT.isVector())
    return VT.changeVectorElementTypeToInteger();

-  return TargetLowering::getSetCCResultType(DL, C, VT);
+  // So far, all branch instructions in Wasm take an I32 condition.
+  // The default TargetLowering::getSetCCResultType returns the pointer size,
+  // which would be useful to reduce instruction counts when testing
+  // against 64-bit pointers/values if at some point Wasm supports that.
+  return EVT::getIntegerVT(C, 32);
 }

 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
--- a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@ -408,8 +408,8 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
      ++InsertPos;
    if (InsertPos->getOpcode() == WebAssembly::CATCH)
      ++InsertPos;
-    FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
-                                   MBB.begin()->getDebugLoc());
+    FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB,
+                                   InsertPos, MBB.begin()->getDebugLoc());
  }
  return Changed;
 }
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@ -81,8 +81,9 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
        strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0;
    WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
    WasmSym->setGlobalType(wasm::WasmGlobalType{
-        uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64
-                                      : wasm::WASM_TYPE_I32),
+        uint8_t(Subtarget.hasAddr64() && strcmp(Name, "__table_base") != 0
+                    ? wasm::WASM_TYPE_I64
+                    : wasm::WASM_TYPE_I32),
        Mutable});
    return WasmSym;
  }
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@ -248,7 +248,8 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
  }

  // Check for writes to __stack_pointer global.
-  if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
+  if ((MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 ||
+       MI.getOpcode() == WebAssembly::GLOBAL_SET_I64) &&
      strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
    StackPointer = true;

--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@ -88,16 +88,17 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(

  // If this is an address being added to a constant, fold the frame offset
  // into the constant.
-  if (MI.getOpcode() == WebAssembly::ADD_I32) {
+  if (MI.getOpcode() == WebAssemblyFrameLowering::getOpcAdd(MF)) {
    MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum);
    if (OtherMO.isReg()) {
      Register OtherMOReg = OtherMO.getReg();
      if (Register::isVirtualRegister(OtherMOReg)) {
        MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg);
        // TODO: For now we just opportunistically do this in the case where
-        // the CONST_I32 happens to have exactly one def and one use. We
+        // the CONST_I32/64 happens to have exactly one def and one use. We
        // should generalize this to optimize in more cases.
-        if (Def && Def->getOpcode() == WebAssembly::CONST_I32 &&
+        if (Def && Def->getOpcode() ==
+              WebAssemblyFrameLowering::getOpcConst(MF) &&
            MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) {
          MachineOperand &ImmMO = Def->getOperand(1);
          ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
@ -109,20 +110,22 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
    }
  }

-  // Otherwise create an i32.add SP, offset and make it the operand.
+  // Otherwise create an i32/64.add SP, offset and make it the operand.
  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();

  unsigned FIRegOperand = FrameRegister;
  if (FrameOffset) {
-    // Create i32.add SP, offset and make it the operand.
+    // Create i32/64.add SP, offset and make it the operand.
    const TargetRegisterClass *PtrRC =
        MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
    Register OffsetOp = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
+    BuildMI(MBB, *II, II->getDebugLoc(),
+            TII->get(WebAssemblyFrameLowering::getOpcConst(MF)),
            OffsetOp)
        .addImm(FrameOffset);
    FIRegOperand = MRI.createVirtualRegister(PtrRC);
-    BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
+    BuildMI(MBB, *II, II->getDebugLoc(),
+            TII->get(WebAssemblyFrameLowering::getOpcAdd(MF)),
            FIRegOperand)
        .addReg(FrameRegister)
        .addReg(OffsetOp);
--- a/test/CodeGen/WebAssembly/stack-alignment.ll
+++ b/test/CodeGen/WebAssembly/stack-alignment.ll
@ -1,22 +1,20 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s
+; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s

 declare void @somefunc(i32*)

 ; CHECK-LABEL: underalign:
 ; CHECK:      global.get $push[[L1:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: i32.const $push[[L2:.+]]=, 16
-; CHECK-NEXT: i32.sub   $push[[L10:.+]]=, $pop[[L1]], $pop[[L2]]
+; CHECK-NEXT: i[[PTR]].const $push[[L2:.+]]=, 16
+; CHECK-NEXT: i[[PTR]].sub $push[[L10:.+]]=, $pop[[L1]], $pop[[L2]]
 ; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L10]]

 ; CHECK:      local.get $push[[L3:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add   $push[[underaligned:.+]]=, $pop[[L3]], $pop{{.+}}
-; CHECK-NEXT: call      somefunc, $pop[[underaligned]]
+; CHECK:      i[[PTR]].add $push[[underaligned:.+]]=, $pop[[L3]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[underaligned]]

 ; CHECK:      local.get $push[[M4:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add   $push[[L5:.+]]=, $pop[[M4]], $pop{{.+}}
+; CHECK:      i[[PTR]].add $push[[L5:.+]]=, $pop[[M4]], $pop{{.+}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L5]]
 define void @underalign() {
 entry:
@ -27,17 +25,17 @@ entry:

 ; CHECK-LABEL: overalign:
 ; CHECK:      global.get $push[[L10:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L9:.+]]=, [[BP:.+]], $pop[[L10]]
-; CHECK-NEXT: i32.const  $push[[L2:.+]]=, 32
-; CHECK-NEXT: i32.sub    $push[[L8:.+]]=, $pop[[L9]], $pop[[L2]]
-; CHECK-NEXT: i32.const  $push[[L3:.+]]=, -32
-; CHECK-NEXT: i32.and    $push[[L7:.+]]=, $pop[[L8]], $pop[[L3]]
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L7]]
+; CHECK-NEXT: local.tee $push[[L9:.+]]=, [[BP:.+]], $pop[[L10]]
+; CHECK-NEXT: i[[PTR]].const $push[[L2:.+]]=, 32
+; CHECK-NEXT: i[[PTR]].sub $push[[L8:.+]]=, $pop[[L9]], $pop[[L2]]
+; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, -32
+; CHECK-NEXT: i[[PTR]].and $push[[L7:.+]]=, $pop[[L8]], $pop[[L3]]
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L7]]

-; CHECK:      local.get  $push[[M5:.+]]=, [[SP]]{{$}}
-; CHECK:      call       somefunc, $pop[[M5]]{{$}}
+; CHECK:      local.get $push[[M5:.+]]=, [[SP]]{{$}}
+; CHECK:      call somefunc, $pop[[M5]]{{$}}

-; CHECK:      local.get  $push[[M6:.+]]=, [[BP]]{{$}}
+; CHECK:      local.get $push[[M6:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M6]]
 define void @overalign() {
 entry:
@ -48,19 +46,19 @@ entry:

 ; CHECK-LABEL: over_and_normal_align:
 ; CHECK:      global.get $push[[L14:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L13:.+]]=, [[BP:.+]], $pop[[L14]]
-; CHECK:      i32.sub    $push[[L12:.+]]=, $pop[[L13]], $pop{{.+}}
-; CHECK:      i32.and    $push[[L11:.+]]=, $pop[[L12]], $pop{{.+}}
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L11]]
+; CHECK-NEXT: local.tee $push[[L13:.+]]=, [[BP:.+]], $pop[[L14]]
+; CHECK:      i[[PTR]].sub $push[[L12:.+]]=, $pop[[L13]], $pop{{.+}}
+; CHECK:      i[[PTR]].and $push[[L11:.+]]=, $pop[[L12]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L11]]

 ; CHECK:      local.get  $push[[M6:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add    $push[[L6:.+]]=, $pop[[M6]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[L6]]
-; CHECK:      local.get  $push[[M7:.+]]=, [[SP]]{{$}}
-; CHECK:      i32.add    $push[[L8:.+]]=, $pop[[M7]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[L8]]
+; CHECK:      i[[PTR]].add $push[[L6:.+]]=, $pop[[M6]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[L6]]
+; CHECK:      local.get $push[[M7:.+]]=, [[SP]]{{$}}
+; CHECK:      i[[PTR]].add $push[[L8:.+]]=, $pop[[M7]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[L8]]

-; CHECK:      local.get  $push[[L6:.+]]=, [[BP]]{{$}}
+; CHECK:      local.get $push[[L6:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L6]]
 define void @over_and_normal_align() {
 entry:
@ -73,14 +71,14 @@ entry:

 ; CHECK-LABEL: dynamic_overalign:
 ; CHECK:      global.get $push[[L18:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L17:.+]]=, [[SP:.+]], $pop[[L18]]
-; CHECK-NEXT: local.set  [[BP:.+]], $pop[[L17]]
-; CHECK:      local.tee  $push{{.+}}=, [[SP_2:.+]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push[[L17:.+]]=, [[SP:.+]], $pop[[L18]]
+; CHECK-NEXT: local.set [[BP:.+]], $pop[[L17]]
+; CHECK:      local.tee $push{{.+}}=, [[SP_2:.+]], $pop{{.+}}

-; CHECK:      local.get  $push[[M8:.+]]=, [[SP_2]]{{$}}
-; CHECK:      call       somefunc, $pop[[M8]]
+; CHECK:      local.get $push[[M8:.+]]=, [[SP_2]]{{$}}
+; CHECK:      call somefunc, $pop[[M8]]

-; CHECK:      local.get  $push[[M9:.+]]=, [[BP]]{{$}}
+; CHECK:      local.get $push[[M9:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M9]]
 define void @dynamic_overalign(i32 %num) {
 entry:
@ -91,18 +89,18 @@ entry:

 ; CHECK-LABEL: overalign_and_dynamic:
 ; CHECK:      global.get $push[[L21:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L20:.+]]=, [[BP:.+]], $pop[[L21]]
-; CHECK:      i32.sub    $push[[L19:.+]]=, $pop[[L20]], $pop{{.+}}
-; CHECK:      i32.and    $push[[L18:.+]]=, $pop[[L19]], $pop{{.+}}
-; CHECK:      local.tee  $push{{.+}}=, [[FP:.+]], $pop[[L18]]
-; CHECK:      local.get  $push[[M10:.+]]=, [[FP]]{{$}}
-; CHECK:      i32.sub    $push[[L16:.+]]=, $pop[[M10]], $pop{{.+}}
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L16]]
+; CHECK-NEXT: local.tee $push[[L20:.+]]=, [[BP:.+]], $pop[[L21]]
+; CHECK:      i[[PTR]].sub $push[[L19:.+]]=, $pop[[L20]], $pop{{.+}}
+; CHECK:      i[[PTR]].and $push[[L18:.+]]=, $pop[[L19]], $pop{{.+}}
+; CHECK:      local.tee $push{{.+}}=, [[FP:.+]], $pop[[L18]]
+; CHECK:      local.get $push[[M10:.+]]=, [[FP]]{{$}}
+; CHECK:      i[[PTR]].sub $push[[L16:.+]]=, $pop[[M10]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L16]]

-; CHECK:      local.get  $push[[over:.+]]=, [[FP]]
-; CHECK-NEXT: call       somefunc, $pop[[over]]
-; CHECK:      local.get  $push[[another:.+]]=, [[SP]]
-; CHECK-NEXT: call       somefunc, $pop[[another]]
+; CHECK:      local.get $push[[over:.+]]=, [[FP]]
+; CHECK-NEXT: call somefunc, $pop[[over]]
+; CHECK:      local.get $push[[another:.+]]=, [[SP]]
+; CHECK-NEXT: call somefunc, $pop[[another]]

 ; CHECK:      local.get  $push[[M11:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M11]]
@ -117,23 +115,23 @@ entry:

 ; CHECK-LABEL: overalign_static_and_dynamic:
 ; CHECK:      global.get $push[[L26:.+]]=, __stack_pointer{{$}}
-; CHECK-NEXT: local.tee  $push[[L25:.+]]=, [[BP:.+]], $pop[[L26]]
-; CHECK:      i32.sub    $push[[L24:.+]]=, $pop[[L25]], $pop{{.+}}
-; CHECK:      i32.and    $push[[L23:.+]]=, $pop[[L24]], $pop{{.+}}
-; CHECK:      local.tee  $push{{.+}}=, [[FP:.+]], $pop[[L23]]
-; CHECK:      local.get  $push[[M12:.+]]=, [[FP]]{{$}}
-; CHECK:      i32.sub    $push[[L21:.+]]=, $pop[[M12]], $pop{{.+}}
-; CHECK-NEXT: local.tee  $push{{.+}}=, [[SP:.+]], $pop[[L21]]
+; CHECK-NEXT: local.tee $push[[L25:.+]]=, [[BP:.+]], $pop[[L26]]
+; CHECK:      i[[PTR]].sub $push[[L24:.+]]=, $pop[[L25]], $pop{{.+}}
+; CHECK:      i[[PTR]].and $push[[L23:.+]]=, $pop[[L24]], $pop{{.+}}
+; CHECK:      local.tee $push{{.+}}=, [[FP:.+]], $pop[[L23]]
+; CHECK:      local.get $push[[M12:.+]]=, [[FP]]{{$}}
+; CHECK:      i[[PTR]].sub $push[[L21:.+]]=, $pop[[M12]], $pop{{.+}}
+; CHECK-NEXT: local.tee $push{{.+}}=, [[SP:.+]], $pop[[L21]]

-; CHECK:      local.get  $push[[L19:.+]]=, [[FP]]
-; CHECK:      local.tee  $push[[L18:.+]]=, [[FP_2:.+]], $pop[[L19]]
-; CHECK:      i32.add    $push[[over:.+]]=, $pop[[L18]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[over]]
-; CHECK:      local.get  $push[[M12:.+]]=, [[SP]]
-; CHECK:      call       somefunc, $pop[[M12]]
-; CHECK:      local.get  $push[[M13:.+]]=, [[FP_2]]
-; CHECK:      i32.add    $push[[static:.+]]=, $pop[[M13]], $pop{{.+}}
-; CHECK-NEXT: call       somefunc, $pop[[static]]
+; CHECK:      local.get $push[[L19:.+]]=, [[FP]]
+; CHECK:      local.tee $push[[L18:.+]]=, [[FP_2:.+]], $pop[[L19]]
+; CHECK:      i[[PTR]].add $push[[over:.+]]=, $pop[[L18]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[over]]
+; CHECK:      local.get $push[[M12:.+]]=, [[SP]]
+; CHECK:      call somefunc, $pop[[M12]]
+; CHECK:      local.get $push[[M13:.+]]=, [[FP_2]]
+; CHECK:      i[[PTR]].add $push[[static:.+]]=, $pop[[M13]], $pop{{.+}}
+; CHECK-NEXT: call somefunc, $pop[[static]]

 ; CHECK:      local.get  $push[[M14:.+]]=, [[BP]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[M14]]
--- a/test/CodeGen/WebAssembly/userstack.ll
+++ b/test/CodeGen/WebAssembly/userstack.ll
@ -1,18 +1,16 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s
+; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s

 declare void @ext_func(i64* %ptr)
 declare void @ext_func_i32(i32* %ptr)

 ; CHECK-LABEL: alloca32:
 ; Check that there is an extra local for the stack pointer.
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @alloca32() noredzone {
 ; CHECK-NEXT: global.get $push[[L2:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]]
 ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP:.+]], $pop[[L9]]{{$}}
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]{{$}}
 %retval = alloca i32
@ -21,18 +19,18 @@ define void @alloca32() noredzone {
 ; CHECK: i32.store 12($pop[[L4]]), $pop[[L0]]
 store i32 0, i32* %retval
 ; CHECK: local.get $push[[L6:.+]]=, [[SP]]{{$}}
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]]
 ret void
 }

 ; CHECK-LABEL: alloca3264:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @alloca3264() {
 ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
 ; CHECK-NEXT: local.tee $push[[L5:.+]]=, [[SP:.+]], $pop[[L6]]
 %r1 = alloca i32
 %r2 = alloca double
@ -48,17 +46,17 @@ define void @alloca3264() {
 }

 ; CHECK-LABEL: allocarray:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @allocarray() {
 ; CHECK-NEXT: global.get $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 144{{$}}
- ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 144{{$}}
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]]
 ; CHECK-NEXT: local.tee $push[[L11:.+]]=, 0, $pop[[L12]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]]
 %r = alloca [33 x i32]

- ; CHECK:      i32.const $push{{.+}}=, 24
- ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}}
+ ; CHECK:      i[[PTR]].const $push{{.+}}=, 24
+ ; CHECK-NEXT: i[[PTR]].add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}}
 ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}}
 ; CHECK-NEXT: i32.store 0($pop[[L3]]), $pop[[L1]]{{$}}
 ; CHECK-NEXT: local.get $push[[L4:.+]]=, 0{{$}}
@ -70,16 +68,16 @@ define void @allocarray() {
 store i32 1, i32* %p2

 ; CHECK-NEXT: local.get $push[[L2:.+]]=, [[SP]]{{$}}
- ; CHECK-NEXT: i32.const $push[[L7:.+]]=, 144
- ; CHECK-NEXT: i32.add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L7:.+]]=, 144
+ ; CHECK-NEXT: i[[PTR]].add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]
 ret void
 }

 ; CHECK-LABEL: non_mem_use
 define void @non_mem_use(i8** %addr) {
- ; CHECK: i32.const $push[[L2:.+]]=, 48
- ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]]
+ ; CHECK: i[[PTR]].const $push[[L2:.+]]=, 48
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]]
 ; CHECK-NEXT: local.tee $push[[L11:.+]]=, [[SP:.+]], $pop[[L12]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]]
 %buf = alloca [27 x i8], align 16
@ -87,8 +85,8 @@ define void @non_mem_use(i8** %addr) {
 %r2 = alloca i64
 ; %r is at SP+8
 ; CHECK: local.get $push[[L3:.+]]=, [[SP]]
- ; CHECK: i32.const $push[[OFF:.+]]=, 8
- ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 8
+ ; CHECK-NEXT: i[[PTR]].add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]]
 ; CHECK-NEXT: call ext_func, $pop[[ARG1]]
 call void @ext_func(i64* %r)
 ; %r2 is at SP+0, no add needed
@ -98,20 +96,20 @@ define void @non_mem_use(i8** %addr) {
 ; Use as a value, but in a store
 ; %buf is at SP+16
 ; CHECK: local.get $push[[L5:.+]]=, [[SP]]
- ; CHECK: i32.const $push[[OFF:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]]
- ; CHECK-NEXT: i32.store 0($pop{{.+}}), $pop[[VAL]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]]
+ ; CHECK-NEXT: i[[PTR]].store 0($pop{{.+}}), $pop[[VAL]]
 %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0
 store i8* %gep, i8** %addr
 ret void
 }

 ; CHECK-LABEL: allocarray_inbounds:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @allocarray_inbounds() {
 ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]]
 ; CHECK-NEXT: local.tee $push[[L10:.+]]=, [[SP:.+]], $pop[[L11]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L10]]{{$}}
 %r = alloca [5 x i32]
@ -125,8 +123,8 @@ define void @allocarray_inbounds() {
 store i32 1, i32* %p2
 call void @ext_func(i64* null);
 ; CHECK: call ext_func
- ; CHECK: i32.const $push[[L5:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]]
+ ; CHECK: i[[PTR]].const $push[[L5:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]]
 ret void
 }
@ -136,7 +134,7 @@ define void @dynamic_alloca(i32 %alloc) {
 ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}}
 ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
 ; Target independent codegen bumps the stack pointer.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
 ; Check that SP is written back to memory after decrement
 ; CHECK: global.set __stack_pointer,
 %r = alloca i32, i32 %alloc
@ -152,12 +150,12 @@ define void @dynamic_alloca_redzone(i32 %alloc) {
 ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}}
 ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
 ; Target independent codegen bumps the stack pointer
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
 %r = alloca i32, i32 %alloc
- ; CHECK-NEXT: local.tee       $push[[L8:.+]]=, {{.+}}, $pop
- ; CHECK: local.get $push[[L7:.+]]=, 0{{$}}
- ; CHECK-NEXT: i32.const       $push[[L6:.+]]=, 0{{$}}
- ; CHECK-NEXT: i32.store       0($pop[[L7]]), $pop[[L6]]{{$}}
+ ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP2:.+]], $pop
+ ; CHECK: local.get $push[[L7:.+]]=, [[SP2]]{{$}}
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}}
+ ; CHECK-NEXT: i32.store 0($pop[[L7]]), $pop[[L6]]{{$}}
 store i32 0, i32* %r
 ; CHECK-NEXT: return
 ret void
@ -167,8 +165,8 @@ define void @dynamic_alloca_redzone(i32 %alloc) {
 define void @dynamic_static_alloca(i32 %alloc) noredzone {
 ; Decrement SP in the prolog by the static amount and writeback to memory.
 ; CHECK: global.get $push[[L11:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L12:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L12:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]]
 ; CHECK-NEXT: local.tee $push[[L22:.+]]=, [[SP:.+]], $pop[[L23]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L22]]

@ -181,7 +179,7 @@ define void @dynamic_static_alloca(i32 %alloc) noredzone {
 store volatile i32 101, i32* %static

 ; Decrement SP in the body by the dynamic amount.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
 ; CHECK: local.tee $push[[L16:.+]]=, [[dynamic_local:.+]], $pop{{.+}}
 ; CHECK: local.tee $push[[L15:.+]]=, [[other:.+]], $pop[[L16]]{{$}}
 ; CHECK: global.set __stack_pointer, $pop[[L15]]{{$}}
@ -201,7 +199,7 @@ define void @dynamic_static_alloca(i32 %alloc) noredzone {
 store volatile i32 103, i32* %dynamic

 ; Decrement SP in the body by the dynamic amount.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
 ; CHECK: local.tee $push{{.+}}=, [[dynamic2_local:.+]], $pop{{.+}}
 %dynamic.2 = alloca i32, i32 %alloc

@ -224,8 +222,8 @@ define void @dynamic_static_alloca(i32 %alloc) noredzone {

 ; Writeback to memory.
 ; CHECK: local.get $push[[L24:.+]]=, [[FP]]{{$}}
- ; CHECK: i32.const $push[[L18:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]]
+ ; CHECK: i[[PTR]].const $push[[L18:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]]
 ; CHECK-NEXT: global.set __stack_pointer, $pop[[L19]]
 ret void
 }
@ -273,11 +271,11 @@ define void @dynamic_alloca_nouse(i32 %alloc) noredzone {
 ; CHECK-LABEL: copytoreg_fi:
 define void @copytoreg_fi(i1 %cond, i32* %b) {
 entry:
- ; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK: i[[PTR]].const $push[[L1:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
 %addr = alloca i32
- ; CHECK: i32.const $push[[OFF:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 12
+ ; CHECK-NEXT: i[[PTR]].add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
 ; CHECK-NEXT: local.set [[COPY:.+]], $pop[[ADDR]]
 br label %body
 body:
@ -309,7 +307,7 @@ define void @frameaddress_0() {
 ; Test __builtin_frame_address(1).

 ; CHECK-LABEL: frameaddress_1:
-; CHECK:      i32.const $push0=, 0{{$}}
+; CHECK:      i[[PTR]].const $push0=, 0{{$}}
 ; CHECK-NEXT: call use_i8_star, $pop0{{$}}
 ; CHECK-NEXT: return{{$}}
 define void @frameaddress_1() {
@ -330,6 +328,6 @@ define void @inline_asm() {
  ret void
 }

-; CHECK: .globaltype	__stack_pointer, i32{{$}}
+; CHECK: .globaltype	__stack_pointer, i[[PTR]]{{$}}

 ; TODO: test over-aligned alloca
--- a/test/MC/WebAssembly/stack-ptr.ll
+++ b/test/MC/WebAssembly/stack-ptr.ll
@ -1,6 +1,5 @@
-; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s
-
-target triple = "wasm32-unknown-unknown"
+; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK32 %s
+; RUN: llc --mtriple=wasm64-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK64 %s

 ; Function that uses explict stack, and should generate a reference to
 ; __stack_pointer, along with the corresponding reloction entry.
@ -15,7 +14,8 @@ entry:
 ; CHECK:       - Module:          env
 ; CHECK:         Field:           __stack_pointer
 ; CHECK:         Kind:            GLOBAL
-; CHECK:         GlobalType:      I32
+; CHK32:         GlobalType:      I32
+; CHK64:         GlobalType:      I64
 ; CHECK:         GlobalMutable:   true
 ; CHECK:   - Type:            CODE
 ; CHECK:     Relocations:
--- a/test/MC/WebAssembly/wasm64.s
+++ b/test/MC/WebAssembly/wasm64.s
@ -51,6 +51,11 @@ test:
    i64.const   0
    f32.store   .L.str    # relocatable offset!

+    ### 64-bit SP
+
+    global.get  __stack_pointer
+    drop
+
    end_function

    .section    .rodata..L.str,"",@
@ -62,7 +67,7 @@ test:
    .size       .L.str, 24

    .globaltype myglob64, i64
-
+    .globaltype __stack_pointer, i64


 # CHECK:              .functype       test (i64) -> ()
@ -155,6 +160,11 @@ test:
 # BIN-NEXT:         Kind:            GLOBAL
 # BIN-NEXT:         GlobalType:      I64
 # BIN-NEXT:         GlobalMutable:   true
+# BIN-NEXT:       - Module:          env
+# BIN-NEXT:         Field:           __stack_pointer
+# BIN-NEXT:         Kind:            GLOBAL
+# BIN-NEXT:         GlobalType:      I64
+# BIN-NEXT:         GlobalMutable:   true
 # BIN-NEXT:   - Type:            FUNCTION
 # BIN-NEXT:     FunctionTypes:   [ 0 ]
 # BIN-NEXT:   - Type:            DATACOUNT
@ -179,12 +189,15 @@ test:
 # BIN-NEXT:       - Type:            R_WASM_MEMORY_ADDR_LEB64
 # BIN-NEXT:         Index:           1
 # BIN-NEXT:         Offset:          0x00000078
+# BIN-NEXT:       - Type: R_WASM_GLOBAL_INDEX_LEB
+# BIN-NEXT:         Index: 3
+# BIN-NEXT:         Offset: 0x00000083
 # BIN-NEXT:     Functions:
 # BIN-NEXT:       - Index:           0
 # BIN-NEXT:         Locals:
 # BIN-NEXT:           - Type:            I64
 # BIN-NEXT:             Count:           1
-# BIN-NEXT:         Body:            42002A02001A20002A02001A42808080808080808080002A02001A2380808080002A02001A42002A02808080808080808080001A4300000000420038020043000000002000380200430000000042808080808080808080003802004300000000238080808000380200430000000042003802808080808080808080000B
+# BIN-NEXT:         Body:            42002A02001A20002A02001A42808080808080808080002A02001A2380808080002A02001A42002A02808080808080808080001A4300000000420038020043000000002000380200430000000042808080808080808080003802004300000000238080808000380200430000000042003802808080808080808080002381808080001A0B
 # BIN-NEXT:   - Type:            DATA
 # BIN-NEXT:     Relocations:
 # BIN-NEXT:       - Type:            R_WASM_MEMORY_ADDR_I64
@ -217,6 +230,11 @@ test:
 # BIN-NEXT:         Name:            myglob64
 # BIN-NEXT:         Flags:           [ UNDEFINED ]
 # BIN-NEXT:         Global:          0
+# BIN-NEXT:       - Index:           3
+# BIN-NEXT:         Kind:            GLOBAL
+# BIN-NEXT:         Name:            __stack_pointer
+# BIN-NEXT:         Flags:           [ UNDEFINED ]
+# BIN-NEXT:         Global:          1
 # BIN-NEXT:     SegmentInfo:
 # BIN-NEXT:       - Index:           0
 # BIN-NEXT:         Name:            .rodata..L.str