[SystemZ] Support stackmaps and patchpoints

This adds back-end support for the @llvm.experimental.stackmap and @llvm.experimental.patchpoint intrinsics. llvm-svn: 326611
2025-01-31 12:41:49 +01:00 · 2018-03-02 20:39:30 +00:00 · 2018-03-02 20:39:30 +00:00 · 6a6a68334a
commit 6a6a68334a
parent 68f7d9ae66
9 changed files with 1028 additions and 1 deletions
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@ -460,6 +460,14 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    }
    break;

+  case TargetOpcode::STACKMAP:
+    LowerSTACKMAP(*MI);
+    return;
+
+  case TargetOpcode::PATCHPOINT:
+    LowerPATCHPOINT(*MI, Lower);
+    return;
+
  default:
    Lower.lower(MI, LoweredMI);
    break;
@ -467,6 +475,123 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
  EmitToStreamer(*OutStreamer, LoweredMI);
 }

+
+// Emit the largest nop instruction smaller than or equal to NumBytes
+// bytes.  Return the size of nop emitted.
+static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
+                        unsigned NumBytes, const MCSubtargetInfo &STI) {
+  if (NumBytes < 2) {
+    llvm_unreachable("Zero nops?");
+    return 0;
+  }
+  else if (NumBytes < 4) {
+    OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCRAsm)
+                                  .addImm(0).addReg(SystemZ::R0D), STI);
+    return 2;
+  }
+  else if (NumBytes < 6) {
+    OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCAsm)
+                                  .addImm(0).addReg(0).addImm(0).addReg(0),
+                                STI);
+    return 4;
+  }
+  else {
+    MCSymbol *DotSym = OutContext.createTempSymbol();
+    const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm)
+                                  .addImm(0).addExpr(Dot), STI);
+    OutStreamer.EmitLabel(DotSym);
+    return 6;
+  }
+}
+
+void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
+  const SystemZInstrInfo *TII =
+    static_cast<const SystemZInstrInfo *>(MF->getSubtarget().getInstrInfo());
+
+  unsigned NumNOPBytes = MI.getOperand(1).getImm();
+
+  SM.recordStackMap(MI);
+  assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!");
+
+  // Scan ahead to trim the shadow.
+  unsigned ShadowBytes = 0;
+  const MachineBasicBlock &MBB = *MI.getParent();
+  MachineBasicBlock::const_iterator MII(MI);
+  ++MII;
+  while (ShadowBytes < NumNOPBytes) {
+    if (MII == MBB.end() ||
+        MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+        MII->getOpcode() == TargetOpcode::STACKMAP)
+      break;
+    ShadowBytes += TII->getInstSizeInBytes(*MII);
+    if (MII->isCall())
+      break;
+    ++MII;
+  }
+
+  // Emit nops.
+  while (ShadowBytes < NumNOPBytes)
+    ShadowBytes += EmitNop(OutContext, *OutStreamer, NumNOPBytes - ShadowBytes,
+                           getSubtargetInfo());
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>
+void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
+                                        SystemZMCInstLower &Lower) {
+  SM.recordPatchPoint(MI);
+  PatchPointOpers Opers(&MI);
+
+  unsigned EncodedBytes = 0;
+  const MachineOperand &CalleeMO = Opers.getCallTarget();
+
+  if (CalleeMO.isImm()) {
+    uint64_t CallTarget = CalleeMO.getImm();
+    if (CallTarget) {
+      unsigned ScratchIdx = -1;
+      unsigned ScratchReg = 0;
+      do {
+        ScratchIdx = Opers.getNextScratchIdx(ScratchIdx + 1);
+        ScratchReg = MI.getOperand(ScratchIdx).getReg();
+      } while (ScratchReg == SystemZ::R0D);
+
+      // Materialize the call target address
+      EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::LLILF)
+                                      .addReg(ScratchReg)
+                                      .addImm(CallTarget & 0xFFFFFFFF));
+      EncodedBytes += 6;
+      if (CallTarget >> 32) {
+        EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::IIHF)
+                                        .addReg(ScratchReg)
+                                        .addImm(CallTarget >> 32));
+        EncodedBytes += 6;
+      }
+
+      EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR)
+                                     .addReg(SystemZ::R14D)
+                                     .addReg(ScratchReg));
+      EncodedBytes += 2;
+    }
+  } else if (CalleeMO.isGlobal()) {
+    const MCExpr *Expr = Lower.getExpr(CalleeMO, MCSymbolRefExpr::VK_PLT);
+    EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BRASL)
+                                   .addReg(SystemZ::R14D)
+                                   .addExpr(Expr));
+    EncodedBytes += 6;
+  }
+
+  // Emit padding.
+  unsigned NumBytes = Opers.getNumPatchBytes();
+  assert(NumBytes >= EncodedBytes &&
+         "Patchpoint can't request size less than the length of a call.");
+  assert((NumBytes - EncodedBytes) % 2 == 0 &&
+         "Invalid number of NOP bytes requested!");
+  while (EncodedBytes < NumBytes)
+    EncodedBytes += EmitNop(OutContext, *OutStreamer, NumBytes - EncodedBytes,
+                            getSubtargetInfo());
+}
+
 // Convert a SystemZ-specific constant pool modifier into the associated
 // MCSymbolRefExpr variant kind.
 static MCSymbolRefExpr::VariantKind
@ -521,6 +646,10 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
  return false;
 }

+void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  SM.serializeToStackMapSection();
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeSystemZAsmPrinter() {
  RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget());
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@ -11,7 +11,9 @@
 #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H

 #include "SystemZTargetMachine.h"
+#include "SystemZMCInstLower.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/Support/Compiler.h"

 namespace llvm {
@ -22,20 +24,33 @@ class Module;
 class raw_ostream;

 class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
+private:
+  StackMaps SM;
+
 public:
  SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
-      : AsmPrinter(TM, std::move(Streamer)) {}
+      : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}

  // Override AsmPrinter.
  StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
  void EmitInstruction(const MachineInstr *MI) override;
  void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
+  void EmitEndOfAsmFile(Module &M) override;
  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                       unsigned AsmVariant, const char *ExtraCode,
                       raw_ostream &OS) override;
  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                             unsigned AsmVariant, const char *ExtraCode,
                             raw_ostream &OS) override;
+
+  bool doInitialization(Module &M) override {
+    SM.reset();
+    return AsmPrinter::doInitialization(M);
+  }
+
+private:
+  void LowerSTACKMAP(const MachineInstr &MI);
+  void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
 };
 } // end namespace llvm

--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@ -983,6 +983,13 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,

 #include "SystemZGenCallingConv.inc"

+const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
+  CallingConv::ID) const {
+  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
+                                           SystemZ::R14D, 0 };
+  return ScratchRegs;
+}
+
 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
                                                     Type *ToType) const {
  return isTruncateFree(FromType, ToType);
@ -6806,6 +6813,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
  case SystemZ::LTXBRCompare_VecPseudo:
    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);

+  case TargetOpcode::STACKMAP:
+  case TargetOpcode::PATCHPOINT:
+    return emitPatchPoint(MI, MBB);
+
  default:
    llvm_unreachable("Unexpected instr type to insert");
  }
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@ -470,6 +470,7 @@ public:
                             SelectionDAG &DAG) const override;
  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                          SelectionDAG &DAG) const override;
+  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
  bool allowTruncateForTailCall(Type *, Type *) const override;
  bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
--- a/test/CodeGen/SystemZ/patchpoint-invoke.ll
+++ b/test/CodeGen/SystemZ/patchpoint-invoke.ll
@ -0,0 +1,65 @@
+; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
+
+; Test invoking of patchpoints
+;
+define i64 @patchpoint_invoke(i64 %p1, i64 %p2) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; CHECK-LABEL: patchpoint_invoke:
+; CHECK-NEXT:  [[FUNC_BEGIN:.L.*]]:
+; CHECK-NEXT: .cfi_startproc
+; CHECK:      .cfi_lsda 0, [[EXCEPTION_LABEL:.L[^ ]*]]
+; CHECK:      aghi %r15, -160
+
+; Unfortunately, hardcode the name of the label that begins the patchpoint:
+; CHECK:      .Ltmp0:
+; CHECK:      llilf   %r1, 559038736
+; CHECK-NEXT: basr    %r14, %r1
+; CHECK-NEXT: bcr     0, %r0
+; CHECK-NEXT: [[PP_END:.L.*]]:
+; CHECK:      br %r14
+  %resolveCall = inttoptr i64 559038736 to i8*
+  %result = invoke i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 10, i8* %resolveCall, i32 1, i64 %p1, i64 %p2)
+            to label %success unwind label %threw
+
+success:
+  ret i64 %result
+
+threw:
+  %0 = landingpad { i8*, i32 }
+          catch i8* null
+  ret i64 0
+}
+
+; Verify that the exception table was emitted:
+; CHECK:      [[EXCEPTION_LABEL]]:
+; CHECK-NEXT: .byte 255
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .uleb128 .Lttbase{{[0-9]+}}-[[TTBASEREF:.Lttbaseref[0-9]+]]
+; CHECK-NEXT: [[TTBASEREF]]:
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .uleb128 .Lcst_end{{[0-9]+}}-[[CST_BEGIN:.Lcst_begin[0-9]+]]
+; CHECK-NEXT: [[CST_BEGIN]]:
+; Verify that the unwind data covers the entire patchpoint region:
+; CHECK-NEXT: .uleb128 .Ltmp0-[[FUNC_BEGIN]]
+; CHECK-NEXT: .uleb128 [[PP_END]]-.Ltmp0
+
+
+; Verify that the stackmap section got emitted:
+; CHECK-LABEL: __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 3
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 1
+; Num LargeConstants
+; CHECK-NEXT:   .long 0
+; Num Callsites
+; CHECK-NEXT:   .long 1
+; CHECK-NEXT:   .quad patchpoint_invoke
+
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+declare i32 @__gxx_personality_v0(...)
--- a/test/CodeGen/SystemZ/patchpoint.ll
+++ b/test/CodeGen/SystemZ/patchpoint.ll
@ -0,0 +1,102 @@
+; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
+
+; Trivial patchpoint codegen
+;
+define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: trivial_patchpoint_codegen:
+; CHECK:       llilf   %r1, 559038736
+; CHECK-NEXT:  basr    %r14, %r1
+; CHECK-NEXT:  bcr     0, %r0
+; CHECK:       lgr     [[REG0:%r[0-9]+]], %r2
+; CHECK:       llilf   %r1, 559038737
+; CHECK-NEXT:  basr    %r14, %r1
+; CHECK-NEXT:  bcr     0, %r0
+; CHECK:       lgr     %r2, [[REG0:%r[0-9]+]]
+; CHECK:       br      %r14
+  %resolveCall2 = inttoptr i64 559038736 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 10, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %resolveCall3 = inttoptr i64 559038737 to i8*
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 10, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret i64 %result
+}
+
+; Trivial symbolic patchpoint codegen.
+;
+
+declare i64 @foo(i64 %p1, i64 %p2)
+define i64 @trivial_symbolic_patchpoint_codegen(i64 %p1, i64 %p2) {
+entry:
+; CHECK-LABEL: trivial_symbolic_patchpoint_codegen:
+; CHECK:       brasl   %r14, foo@PLT
+; CHECK-NEXT:  bcr     0, %r0
+; CHECK:       br      %r14
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 9, i32 8, i8* bitcast (i64 (i64, i64)* @foo to i8*), i32 2, i64 %p1, i64 %p2)
+  ret i64 %result
+}
+
+
+; Caller frame metadata with stackmaps. This should not be optimized
+; as a leaf function.
+;
+; CHECK-LABEL: caller_meta_leaf
+; CHECK: aghi  %r15, -184
+; CHECK: .Ltmp
+; CHECK: lmg   %r14, %r15, 296(%r15)
+; CHECK: br    %r14
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  ret void
+}
+
+; Test patchpoints reusing the same TargetConstant.
+; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
+; There is no way to verify this, since it depends on memory allocation.
+; But I think it's useful to include as a working example.
+define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
+entry:
+  %tmp80 = add i64 %tmp79, -16
+  %tmp81 = inttoptr i64 %tmp80 to i64*
+  %tmp82 = load i64, i64* %tmp81, align 8
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 6, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  %tmp83 = load i64, i64* %tmp33, align 8
+  %tmp84 = add i64 %tmp83, -24
+  %tmp85 = inttoptr i64 %tmp84 to i64*
+  %tmp86 = load i64, i64* %tmp85, align 8
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 6, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  ret i64 10
+}
+
+; Test small patchpoints that don't emit calls.
+define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: small_patchpoint_codegen:
+; CHECK:      .Ltmp
+; CHECK:      bcr 0, %r0
+; CHECK:      br %r14
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 2, i8* null, i32 2, i64 %p1, i64 %p2)
+  ret void
+}
+
+; Test large target address.
+define i64 @large_target_address_patchpoint_codegen() {
+entry:
+; CHECK-LABEL: large_target_address_patchpoint_codegen:
+; CHECK:        llilf   %r1, 2566957755
+; CHECK-NEXT:   iihf    %r1, 1432778632
+; CHECK-NEXT:   basr    %r14, %r1
+  %resolveCall2 = inttoptr i64 6153737369414576827 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 14, i8* %resolveCall2, i32 0)
+  ret i64 %result
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
--- a/test/CodeGen/SystemZ/stackmap-nops.ll
+++ b/test/CodeGen/SystemZ/stackmap-nops.ll
@ -0,0 +1,140 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @nop_test() {
+entry:
+; CHECK-LABEL: nop_test:
+
+; 2
+; CHECK:      bcr 0, %r0
+
+; 4
+; CHECK:      bc 0, 0
+
+; 6
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 8
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 10
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 12
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 14
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 16
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 18
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 20
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 22
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 24
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 26
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 28
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 30
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  0, i32  0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  2, i32  2)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  4, i32  4)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  6, i32  6)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  8, i32  8)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 10, i32 10)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 12)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 14)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 16)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 18, i32 18)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 20, i32 20)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 22, i32 22)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 24, i32 24)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 26, i32 26)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 28, i32 28)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 30, i32 30)
+; Add an extra stackmap with a zero-length shadow to thwart the shadow
+; optimization. This will force all bytes of the previous shadow to be
+; padded with nops.
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 31, i32 0)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
--- a/test/CodeGen/SystemZ/stackmap-shadow-optimization.ll
+++ b/test/CodeGen/SystemZ/stackmap-shadow-optimization.ll
@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check that the stackmap shadow optimization is only outputting a 2-byte
+; nop here. 8-bytes are requested, but 6 are covered by the code for the call to
+; bar.  However, the frame teardown and the return do not count towards the
+; stackmap shadow as the call return counts as a branch target so must flush
+; the shadow.
+; Note that in order for a thread to not return in to the patched space
+; the call must be at the end of the shadow, so the required nop must be
+; before the call, not after.
+define void @shadow_optimization_test() {
+entry:
+; CHECK-LABEL:  shadow_optimization_test:
+; CHECK:        brasl %r14, bar@PLT
+; CHECK-NEXT:   .Ltmp
+; CHECK-NEXT:   bcr 0, %r0
+; CHECK-NEXT:   brasl %r14, bar@PLT
+; CHECK-NEXT:   brasl %r14, bar@PLT
+  call void @bar()
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 8)
+  call void @bar()
+  call void @bar()
+  ret void
+}
+declare void @bar()
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
--- a/test/CodeGen/SystemZ/stackmap.ll
+++ b/test/CodeGen/SystemZ/stackmap.ll
@ -0,0 +1,537 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+; Note: Print verbose stackmaps using -debug-only=stackmaps.
+
+; CHECK:       .section .llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 3
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 15
+; Num LargeConstants
+; CHECK-NEXT:   .long 3
+; Num Callsites
+; CHECK-NEXT:   .long 19
+
+; Functions and stack size
+; CHECK-NEXT:   .quad constantargs
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad osrinline
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad osrcold
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad propertyRead
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad propertyWrite
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad jsVoidCall
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad jsIntCall
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad spilledValue
+; CHECK-NEXT:   .quad 240
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad spilledStackMapValue
+; CHECK-NEXT:   .quad 200
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad spillSubReg
+; CHECK-NEXT:   .quad 168
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad liveConstant
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad directFrameIdx
+; CHECK-NEXT:   .quad 200
+; CHECK-NEXT:   .quad 2
+; CHECK-NEXT:   .quad longid
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 4
+; CHECK-NEXT:   .quad clobberScratch
+; CHECK-NEXT:   .quad 168
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad needsStackRealignment
+; CHECK-NEXT:   .quad -1
+; CHECK-NEXT:   .quad 1
+
+; Large Constants
+; CHECK-NEXT:   .quad   2147483648
+; CHECK-NEXT:   .quad   4294967295
+; CHECK-NEXT:   .quad   4294967296
+
+; Callsites
+; Constant arguments
+;
+; CHECK-NEXT:   .quad   1
+; CHECK-NEXT:   .long   .L{{.*}}-constantargs
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  12
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65536
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2000000000
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2147483647
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 0
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 1
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   1
+; LargeConstant at index 2
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+
+define void @constantargs() {
+entry:
+  %0 = inttoptr i64 12345 to i8*
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 14, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+  ret void
+}
+
+; Inline OSR Exit
+;
+; CHECK:        .long   .L{{.*}}-osrinline
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @osrinline(i64 %a, i64 %b) {
+entry:
+  ; Runtime void->void call.
+  call void inttoptr (i64 -559038737 to void ()*)()
+  ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+  ret void
+}
+
+; Cold OSR Exit
+;
+; 2 live variables in register.
+;
+; CHECK:        .long   .L{{.*}}-osrcold
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @osrcold(i64 %a, i64 %b) {
+entry:
+  %test = icmp slt i64 %a, %b
+  br i1 %test, label %ret, label %cold
+cold:
+  ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
+  %thunk = inttoptr i64 -559038737 to i8*
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 14, i8* %thunk, i32 0, i64 %a, i64 %b)
+  unreachable
+ret:
+  ret void
+}
+
+; Property Read
+; CHECK:        .long   .L{{.*}}-propertyRead
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define i64 @propertyRead(i64* %obj) {
+entry:
+  %resolveRead = inttoptr i64 -559038737 to i8*
+  %result = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 14, i8* %resolveRead, i32 1, i64* %obj)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Property Write
+; CHECK:        .long   .L{{.*}}-propertyWrite
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
+entry:
+  %resolveWrite = inttoptr i64 -559038737 to i8*
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 14, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  ret void
+}
+
+; Void JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK:        .long   .L{{.*}}-jsVoidCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 14, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  ret void
+}
+
+; i64 JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK:        .long   .L{{.*}}-jsIntCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 14, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:        .long .L{{.*}}-spilledValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 17
+;
+; Check that at least one is a spilled entry from the parameter area.
+; Location: Indirect r15 + XX
+; CHECK:        .byte  3
+; CHECK-NEXT:   .byte  0
+; CHECK-NEXT:   .short 8
+; CHECK-NEXT:   .short 15
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long
+define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 14, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:        .long .L{{.*}}-spilledStackMapValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 17
+;
+; Check that at least one is a spilled entry from the parameter area.
+; Location: Indirect r15 + XX
+; CHECK:        .byte  3
+; CHECK-NEXT:   .byte  0
+; CHECK-NEXT:   .short 8
+; CHECK-NEXT:   .short 15
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long
+define void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spill a subregister stackmap operand.
+;
+; CHECK:        .long .L{{.*}}-spillSubReg
+; CHECK-NEXT:   .short 0
+; 4 locations
+; CHECK-NEXT:   .short 1
+;
+; Check that the subregister operand is a 4-byte spill.
+; Location: Indirect, 4-byte, %r15 + 164
+; CHECK:        .byte  3
+; CHECK-NEXT:   .byte  0
+; CHECK-NEXT:   .short 4
+; CHECK-NEXT:   .short 15
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long  164
+define void @spillSubReg(i64 %arg) #0 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp = load i64, i64* inttoptr (i64 140685446136880 to i64*)
+  br i1 undef, label %bb16, label %bb17
+
+bb16:
+  unreachable
+
+bb17:
+  %tmp32 = trunc i64 %tmp to i32
+  br i1 undef, label %bb60, label %bb61
+
+bb60:
+  tail call void asm sideeffect "nopr %r0", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14}"() nounwind
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 6, i32 %tmp32)
+  unreachable
+
+bb61:
+  unreachable
+}
+
+; Map a constant value.
+;
+; CHECK:        .long .L{{.*}}-liveConstant
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   33
+
+define void @liveConstant() {
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 6, i32 33)
+  ret void
+}
+
+; Directly map an alloca's address.
+;
+; Callsite 16
+; CHECK:        .long .L{{.*}}-directFrameIdx
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short	1
+; Loc 0: Direct %r15 + ofs
+; CHECK-NEXT:   .byte	2
+; CHECK-NEXT:   .byte	0
+; CHECK-NEXT:   .short	8
+; CHECK-NEXT:   .short	15
+; CHECK-NEXT:   .short	0
+; CHECK-NEXT:   .long
+
+; Callsite 17
+; CHECK:        .long .L{{.*}}-directFrameIdx
+; CHECK-NEXT:   .short	0
+; 2 locations
+; CHECK-NEXT:   .short	2
+; Loc 0: Direct %r15 + ofs
+; CHECK-NEXT:   .byte	2
+; CHECK-NEXT:   .byte	0
+; CHECK-NEXT:   .short	8
+; CHECK-NEXT:   .short	15
+; CHECK-NEXT:   .short	0
+; CHECK-NEXT:   .long
+; Loc 1: Direct %r15 + ofs
+; CHECK-NEXT:   .byte	2
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short	15
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long
+define void @directFrameIdx() {
+entry:
+  %metadata1 = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata1
+  store i64 12, i64* %metadata1
+  store i64 13, i64* %metadata1
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
+  %metadata2 = alloca i8, i32 4, align 8
+  %metadata3 = alloca i16, i32 4, align 8
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 17, i32 6, i8* null, i32 0, i8* %metadata2, i16* %metadata3)
+  ret void
+}
+
+; Test a 64-bit ID.
+;
+; CHECK:        .quad 4294967295
+; CHECK:        .long .L{{.*}}-longid
+; CHECK:        .quad 4294967296
+; CHECK:        .long .L{{.*}}-longid
+; CHECK:        .quad 9223372036854775807
+; CHECK:        .long .L{{.*}}-longid
+; CHECK:        .quad -1
+; CHECK:        .long .L{{.*}}-longid
+define void @longid() {
+entry:
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0)
+  ret void
+}
+
+; Map a value when %r0 and %r1 are the only free registers.
+; The scratch registers should not be used for a live stackmap value.
+;
+; CHECK:        .long .L{{.*}}-clobberScratch
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: Indirect %r15 + offset
+; CHECK-NEXT:   .byte   3
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  4
+; CHECK-NEXT:   .short  15
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   164
+define void @clobberScratch(i32 %a) {
+  tail call void asm sideeffect "nopr %r0", "~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14}"() nounwind
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+  ret void
+}
+
+; A stack frame which needs to be realigned at runtime (to meet alignment
+; criteria for values on the stack) does not have a fixed frame size.
+; CHECK:        .long .L{{.*}}-needsStackRealignment
+; CHECK-NEXT:   .short 0
+; 0 locations
+; CHECK-NEXT:   .short 0
+define void @needsStackRealignment() {
+  %val = alloca i64, i32 3, align 128
+  tail call void (...) @escape_values(i64* %val)
+; Note: Adding any non-constant to the stackmap would fail because we
+; expected to be able to address off the frame pointer.  In a realigned
+; frame, we must use the stack pointer instead.  This is a separate bug.
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0)
+  ret void
+}
+declare void @escape_values(...)
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)