1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[SystemZ] Support stackmaps and patchpoints

This adds back-end support for the @llvm.experimental.stackmap and
@llvm.experimental.patchpoint intrinsics.

llvm-svn: 326611
This commit is contained in:
Ulrich Weigand 2018-03-02 20:39:30 +00:00
parent 68f7d9ae66
commit 6a6a68334a
9 changed files with 1028 additions and 1 deletions

View File

@ -460,6 +460,14 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
break;
case TargetOpcode::STACKMAP:
LowerSTACKMAP(*MI);
return;
case TargetOpcode::PATCHPOINT:
LowerPATCHPOINT(*MI, Lower);
return;
default:
Lower.lower(MI, LoweredMI);
break;
@ -467,6 +475,123 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, LoweredMI);
}
// Emit the largest nop instruction smaller than or equal to NumBytes
// bytes. Return the size of nop emitted.
static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
unsigned NumBytes, const MCSubtargetInfo &STI) {
if (NumBytes < 2) {
llvm_unreachable("Zero nops?");
return 0;
}
else if (NumBytes < 4) {
OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCRAsm)
.addImm(0).addReg(SystemZ::R0D), STI);
return 2;
}
else if (NumBytes < 6) {
OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCAsm)
.addImm(0).addReg(0).addImm(0).addReg(0),
STI);
return 4;
}
else {
MCSymbol *DotSym = OutContext.createTempSymbol();
const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm)
.addImm(0).addExpr(Dot), STI);
OutStreamer.EmitLabel(DotSym);
return 6;
}
}
void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(MF->getSubtarget().getInstrInfo());
unsigned NumNOPBytes = MI.getOperand(1).getImm();
SM.recordStackMap(MI);
assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!");
// Scan ahead to trim the shadow.
unsigned ShadowBytes = 0;
const MachineBasicBlock &MBB = *MI.getParent();
MachineBasicBlock::const_iterator MII(MI);
++MII;
while (ShadowBytes < NumNOPBytes) {
if (MII == MBB.end() ||
MII->getOpcode() == TargetOpcode::PATCHPOINT ||
MII->getOpcode() == TargetOpcode::STACKMAP)
break;
ShadowBytes += TII->getInstSizeInBytes(*MII);
if (MII->isCall())
break;
++MII;
}
// Emit nops.
while (ShadowBytes < NumNOPBytes)
ShadowBytes += EmitNop(OutContext, *OutStreamer, NumNOPBytes - ShadowBytes,
getSubtargetInfo());
}
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>
void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
SystemZMCInstLower &Lower) {
SM.recordPatchPoint(MI);
PatchPointOpers Opers(&MI);
unsigned EncodedBytes = 0;
const MachineOperand &CalleeMO = Opers.getCallTarget();
if (CalleeMO.isImm()) {
uint64_t CallTarget = CalleeMO.getImm();
if (CallTarget) {
unsigned ScratchIdx = -1;
unsigned ScratchReg = 0;
do {
ScratchIdx = Opers.getNextScratchIdx(ScratchIdx + 1);
ScratchReg = MI.getOperand(ScratchIdx).getReg();
} while (ScratchReg == SystemZ::R0D);
// Materialize the call target address
EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::LLILF)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFFFFFF));
EncodedBytes += 6;
if (CallTarget >> 32) {
EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::IIHF)
.addReg(ScratchReg)
.addImm(CallTarget >> 32));
EncodedBytes += 6;
}
EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR)
.addReg(SystemZ::R14D)
.addReg(ScratchReg));
EncodedBytes += 2;
}
} else if (CalleeMO.isGlobal()) {
const MCExpr *Expr = Lower.getExpr(CalleeMO, MCSymbolRefExpr::VK_PLT);
EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BRASL)
.addReg(SystemZ::R14D)
.addExpr(Expr));
EncodedBytes += 6;
}
// Emit padding.
unsigned NumBytes = Opers.getNumPatchBytes();
assert(NumBytes >= EncodedBytes &&
"Patchpoint can't request size less than the length of a call.");
assert((NumBytes - EncodedBytes) % 2 == 0 &&
"Invalid number of NOP bytes requested!");
while (EncodedBytes < NumBytes)
EncodedBytes += EmitNop(OutContext, *OutStreamer, NumBytes - EncodedBytes,
getSubtargetInfo());
}
// Convert a SystemZ-specific constant pool modifier into the associated
// MCSymbolRefExpr variant kind.
static MCSymbolRefExpr::VariantKind
@ -521,6 +646,10 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
SM.serializeToStackMapSection();
}
// Force static initialization.
extern "C" void LLVMInitializeSystemZAsmPrinter() {
RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget());

View File

@ -11,7 +11,9 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
#include "SystemZTargetMachine.h"
#include "SystemZMCInstLower.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@ -22,20 +24,33 @@ class Module;
class raw_ostream;
class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
private:
StackMaps SM;
public:
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
: AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
// Override AsmPrinter.
StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
void EmitInstruction(const MachineInstr *MI) override;
void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
void EmitEndOfAsmFile(Module &M) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
bool doInitialization(Module &M) override {
SM.reset();
return AsmPrinter::doInitialization(M);
}
private:
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
};
} // end namespace llvm

View File

@ -983,6 +983,13 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
#include "SystemZGenCallingConv.inc"
const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
CallingConv::ID) const {
static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
SystemZ::R14D, 0 };
return ScratchRegs;
}
bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
Type *ToType) const {
return isTruncateFree(FromType, ToType);
@ -6806,6 +6813,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::LTXBRCompare_VecPseudo:
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, MBB);
default:
llvm_unreachable("Unexpected instr type to insert");
}

View File

@ -470,6 +470,7 @@ public:
SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
bool allowTruncateForTailCall(Type *, Type *) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,

View File

@ -0,0 +1,65 @@
; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
; Test invoking of patchpoints
;
define i64 @patchpoint_invoke(i64 %p1, i64 %p2) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
; CHECK-LABEL: patchpoint_invoke:
; CHECK-NEXT: [[FUNC_BEGIN:.L.*]]:
; CHECK-NEXT: .cfi_startproc
; CHECK: .cfi_lsda 0, [[EXCEPTION_LABEL:.L[^ ]*]]
; CHECK: aghi %r15, -160
; Unfortunately, hardcode the name of the label that begins the patchpoint:
; CHECK: .Ltmp0:
; CHECK: llilf %r1, 559038736
; CHECK-NEXT: basr %r14, %r1
; CHECK-NEXT: bcr 0, %r0
; CHECK-NEXT: [[PP_END:.L.*]]:
; CHECK: br %r14
%resolveCall = inttoptr i64 559038736 to i8*
%result = invoke i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 10, i8* %resolveCall, i32 1, i64 %p1, i64 %p2)
to label %success unwind label %threw
success:
ret i64 %result
threw:
%0 = landingpad { i8*, i32 }
catch i8* null
ret i64 0
}
; Verify that the exception table was emitted:
; CHECK: [[EXCEPTION_LABEL]]:
; CHECK-NEXT: .byte 255
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .uleb128 .Lttbase{{[0-9]+}}-[[TTBASEREF:.Lttbaseref[0-9]+]]
; CHECK-NEXT: [[TTBASEREF]]:
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .uleb128 .Lcst_end{{[0-9]+}}-[[CST_BEGIN:.Lcst_begin[0-9]+]]
; CHECK-NEXT: [[CST_BEGIN]]:
; Verify that the unwind data covers the entire patchpoint region:
; CHECK-NEXT: .uleb128 .Ltmp0-[[FUNC_BEGIN]]
; CHECK-NEXT: .uleb128 [[PP_END]]-.Ltmp0
; Verify that the stackmap section got emitted:
; CHECK-LABEL: __LLVM_StackMaps:
; Header
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 0
; Num Functions
; CHECK-NEXT: .long 1
; Num LargeConstants
; CHECK-NEXT: .long 0
; Num Callsites
; CHECK-NEXT: .long 1
; CHECK-NEXT: .quad patchpoint_invoke
declare void @llvm.experimental.stackmap(i64, i32, ...)
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
declare i32 @__gxx_personality_v0(...)

View File

@ -0,0 +1,102 @@
; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
; Trivial patchpoint codegen
;
define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: trivial_patchpoint_codegen:
; CHECK: llilf %r1, 559038736
; CHECK-NEXT: basr %r14, %r1
; CHECK-NEXT: bcr 0, %r0
; CHECK: lgr [[REG0:%r[0-9]+]], %r2
; CHECK: llilf %r1, 559038737
; CHECK-NEXT: basr %r14, %r1
; CHECK-NEXT: bcr 0, %r0
; CHECK: lgr %r2, [[REG0:%r[0-9]+]]
; CHECK: br %r14
%resolveCall2 = inttoptr i64 559038736 to i8*
%result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 10, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
%resolveCall3 = inttoptr i64 559038737 to i8*
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 10, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
ret i64 %result
}
; Trivial symbolic patchpoint codegen.
;
declare i64 @foo(i64 %p1, i64 %p2)
define i64 @trivial_symbolic_patchpoint_codegen(i64 %p1, i64 %p2) {
entry:
; CHECK-LABEL: trivial_symbolic_patchpoint_codegen:
; CHECK: brasl %r14, foo@PLT
; CHECK-NEXT: bcr 0, %r0
; CHECK: br %r14
%result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 9, i32 8, i8* bitcast (i64 (i64, i64)* @foo to i8*), i32 2, i64 %p1, i64 %p2)
ret i64 %result
}
; Caller frame metadata with stackmaps. This should not be optimized
; as a leaf function.
;
; CHECK-LABEL: caller_meta_leaf
; CHECK: aghi %r15, -184
; CHECK: .Ltmp
; CHECK: lmg %r14, %r15, 296(%r15)
; CHECK: br %r14
define void @caller_meta_leaf() {
entry:
%metadata = alloca i64, i32 3, align 8
store i64 11, i64* %metadata
store i64 12, i64* %metadata
store i64 13, i64* %metadata
call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
ret void
}
; Test patchpoints reusing the same TargetConstant.
; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
; There is no way to verify this, since it depends on memory allocation.
; But I think it's useful to include as a working example.
define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
entry:
%tmp80 = add i64 %tmp79, -16
%tmp81 = inttoptr i64 %tmp80 to i64*
%tmp82 = load i64, i64* %tmp81, align 8
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 6, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
%tmp83 = load i64, i64* %tmp33, align 8
%tmp84 = add i64 %tmp83, -24
%tmp85 = inttoptr i64 %tmp84 to i64*
%tmp86 = load i64, i64* %tmp85, align 8
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 6, i64 %arg, i64 %tmp10, i64 %tmp86)
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
ret i64 10
}
; Test small patchpoints that don't emit calls.
define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: small_patchpoint_codegen:
; CHECK: .Ltmp
; CHECK: bcr 0, %r0
; CHECK: br %r14
%result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 2, i8* null, i32 2, i64 %p1, i64 %p2)
ret void
}
; Test large target address.
define i64 @large_target_address_patchpoint_codegen() {
entry:
; CHECK-LABEL: large_target_address_patchpoint_codegen:
; CHECK: llilf %r1, 2566957755
; CHECK-NEXT: iihf %r1, 1432778632
; CHECK-NEXT: basr %r14, %r1
%resolveCall2 = inttoptr i64 6153737369414576827 to i8*
%result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 14, i8* %resolveCall2, i32 0)
ret i64 %result
}
declare void @llvm.experimental.stackmap(i64, i32, ...)
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)

View File

@ -0,0 +1,140 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
define void @nop_test() {
entry:
; CHECK-LABEL: nop_test:
; 2
; CHECK: bcr 0, %r0
; 4
; CHECK: bc 0, 0
; 6
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; 8
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bcr 0, %r0
; 10
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bc 0, 0
; 12
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; 14
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bcr 0, %r0
; 16
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bc 0, 0
; 18
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; 20
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bcr 0, %r0
; 22
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bc 0, 0
; 24
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; 26
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bcr 0, %r0
; 28
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: bc 0, 0
; 30
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
; CHECK: brcl 0, [[LAB:.Ltmp[0-9]+]]
; CHECK-NEXT: [[LAB]]:
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 2, i32 2)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 4)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 6, i32 6)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 8, i32 8)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 10, i32 10)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 12)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 14)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 16)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 18, i32 18)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 20, i32 20)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 22, i32 22)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 24, i32 24)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 26, i32 26)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 28, i32 28)
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 30, i32 30)
; Add an extra stackmap with a zero-length shadow to thwart the shadow
; optimization. This will force all bytes of the previous shadow to be
; padded with nops.
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 31, i32 0)
ret void
}
declare void @llvm.experimental.stackmap(i64, i32, ...)

View File

@ -0,0 +1,27 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; Check that the stackmap shadow optimization is only outputting a 2-byte
; nop here. 8-bytes are requested, but 6 are covered by the code for the call to
; bar. However, the frame teardown and the return do not count towards the
; stackmap shadow as the call return counts as a branch target so must flush
; the shadow.
; Note that in order for a thread to not return in to the patched space
; the call must be at the end of the shadow, so the required nop must be
; before the call, not after.
define void @shadow_optimization_test() {
entry:
; CHECK-LABEL: shadow_optimization_test:
; CHECK: brasl %r14, bar@PLT
; CHECK-NEXT: .Ltmp
; CHECK-NEXT: bcr 0, %r0
; CHECK-NEXT: brasl %r14, bar@PLT
; CHECK-NEXT: brasl %r14, bar@PLT
call void @bar()
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 8)
call void @bar()
call void @bar()
ret void
}
declare void @bar()
declare void @llvm.experimental.stackmap(i64, i32, ...)

View File

@ -0,0 +1,537 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
;
; Note: Print verbose stackmaps using -debug-only=stackmaps.
; CHECK: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
; Header
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 0
; Num Functions
; CHECK-NEXT: .long 15
; Num LargeConstants
; CHECK-NEXT: .long 3
; Num Callsites
; CHECK-NEXT: .long 19
; Functions and stack size
; CHECK-NEXT: .quad constantargs
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad osrinline
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad osrcold
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad propertyRead
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad propertyWrite
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad jsVoidCall
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad jsIntCall
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad spilledValue
; CHECK-NEXT: .quad 240
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad spilledStackMapValue
; CHECK-NEXT: .quad 200
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad spillSubReg
; CHECK-NEXT: .quad 168
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad liveConstant
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad directFrameIdx
; CHECK-NEXT: .quad 200
; CHECK-NEXT: .quad 2
; CHECK-NEXT: .quad longid
; CHECK-NEXT: .quad 160
; CHECK-NEXT: .quad 4
; CHECK-NEXT: .quad clobberScratch
; CHECK-NEXT: .quad 168
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .quad needsStackRealignment
; CHECK-NEXT: .quad -1
; CHECK-NEXT: .quad 1
; Large Constants
; CHECK-NEXT: .quad 2147483648
; CHECK-NEXT: .quad 4294967295
; CHECK-NEXT: .quad 4294967296
; Callsites
; Constant arguments
;
; CHECK-NEXT: .quad 1
; CHECK-NEXT: .long .L{{.*}}-constantargs
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 12
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 65536
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 2000000000
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 2147483647
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long -1
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; LargeConstant at index 0
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; LargeConstant at index 1
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 1
; LargeConstant at index 2
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 2
; SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long -1
define void @constantargs() {
entry:
%0 = inttoptr i64 12345 to i8*
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 14, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
ret void
}
; Inline OSR Exit
;
; CHECK: .long .L{{.*}}-osrinline
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
define void @osrinline(i64 %a, i64 %b) {
entry:
; Runtime void->void call.
call void inttoptr (i64 -559038737 to void ()*)()
; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
ret void
}
; Cold OSR Exit
;
; 2 live variables in register.
;
; CHECK: .long .L{{.*}}-osrcold
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
define void @osrcold(i64 %a, i64 %b) {
entry:
%test = icmp slt i64 %a, %b
br i1 %test, label %ret, label %cold
cold:
; OSR patchpoint with 12-byte nop-slide and 2 live vars.
%thunk = inttoptr i64 -559038737 to i8*
call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 14, i8* %thunk, i32 0, i64 %a, i64 %b)
unreachable
ret:
ret void
}
; Property Read
; CHECK: .long .L{{.*}}-propertyRead
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
define i64 @propertyRead(i64* %obj) {
entry:
%resolveRead = inttoptr i64 -559038737 to i8*
%result = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 14, i8* %resolveRead, i32 1, i64* %obj)
%add = add i64 %result, 3
ret i64 %add
}
; Property Write
; CHECK: .long .L{{.*}}-propertyWrite
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
entry:
%resolveWrite = inttoptr i64 -559038737 to i8*
call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 14, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
ret void
}
; Void JS Call
;
; 2 live variables in registers.
;
; CHECK: .long .L{{.*}}-jsVoidCall
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 -559038737 to i8*
call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 14, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
ret void
}
; i64 JS Call
;
; 2 live variables in registers.
;
; CHECK: .long .L{{.*}}-jsIntCall
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short {{[0-9]+}}
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 0
define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 -559038737 to i8*
%result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 14, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
%add = add i64 %result, 3
ret i64 %add
}
; Spilled stack map values.
;
; Verify 17 stack map entries.
;
; CHECK: .long .L{{.*}}-spilledValue
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 17
;
; Check that at least one is a spilled entry from the parameter area.
; Location: Indirect r15 + XX
; CHECK: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 15
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long
define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
entry:
call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 14, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
ret void
}
; Spilled stack map values.
;
; Verify 17 stack map entries.
;
; CHECK: .long .L{{.*}}-spilledStackMapValue
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 17
;
; Check that at least one is a spilled entry from the parameter area.
; Location: Indirect r15 + XX
; CHECK: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 15
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long
define void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
entry:
call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
ret void
}
; Spill a subregister stackmap operand.
;
; CHECK: .long .L{{.*}}-spillSubReg
; CHECK-NEXT: .short 0
; 4 locations
; CHECK-NEXT: .short 1
;
; Check that the subregister operand is a 4-byte spill.
; Location: Indirect, 4-byte, %r15 + 164
; CHECK: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 4
; CHECK-NEXT: .short 15
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 164
define void @spillSubReg(i64 %arg) #0 {
bb:
br i1 undef, label %bb1, label %bb2
bb1:
unreachable
bb2:
%tmp = load i64, i64* inttoptr (i64 140685446136880 to i64*)
br i1 undef, label %bb16, label %bb17
bb16:
unreachable
bb17:
%tmp32 = trunc i64 %tmp to i32
br i1 undef, label %bb60, label %bb61
bb60:
tail call void asm sideeffect "nopr %r0", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14}"() nounwind
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 6, i32 %tmp32)
unreachable
bb61:
unreachable
}
; Map a constant value.
;
; CHECK: .long .L{{.*}}-liveConstant
; CHECK-NEXT: .short 0
; 1 location
; CHECK-NEXT: .short 1
; Loc 0: SmallConstant
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 33
define void @liveConstant() {
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 6, i32 33)
ret void
}
; Directly map an alloca's address.
;
; Callsite 16
; CHECK: .long .L{{.*}}-directFrameIdx
; CHECK-NEXT: .short 0
; 1 location
; CHECK-NEXT: .short 1
; Loc 0: Direct %r15 + ofs
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 15
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long
; Callsite 17
; CHECK: .long .L{{.*}}-directFrameIdx
; CHECK-NEXT: .short 0
; 2 locations
; CHECK-NEXT: .short 2
; Loc 0: Direct %r15 + ofs
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 15
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long
; Loc 1: Direct %r15 + ofs
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 8
; CHECK-NEXT: .short 15
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long
define void @directFrameIdx() {
entry:
%metadata1 = alloca i64, i32 3, align 8
store i64 11, i64* %metadata1
store i64 12, i64* %metadata1
store i64 13, i64* %metadata1
call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
%metadata2 = alloca i8, i32 4, align 8
%metadata3 = alloca i16, i32 4, align 8
call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 17, i32 6, i8* null, i32 0, i8* %metadata2, i16* %metadata3)
ret void
}
; Test a 64-bit ID.
;
; CHECK: .quad 4294967295
; CHECK: .long .L{{.*}}-longid
; CHECK: .quad 4294967296
; CHECK: .long .L{{.*}}-longid
; CHECK: .quad 9223372036854775807
; CHECK: .long .L{{.*}}-longid
; CHECK: .quad -1
; CHECK: .long .L{{.*}}-longid
define void @longid() {
entry:
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0)
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0)
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0)
tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0)
ret void
}
; Map a value when %r0 and %r1 are the only free registers.
; The scratch registers should not be used for a live stackmap value.
;
; CHECK: .long .L{{.*}}-clobberScratch
; CHECK-NEXT: .short 0
; 1 location
; CHECK-NEXT: .short 1
; Loc 0: Indirect %r15 + offset
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 4
; CHECK-NEXT: .short 15
; CHECK-NEXT: .short 0
; CHECK-NEXT: .long 164
define void @clobberScratch(i32 %a) {
tail call void asm sideeffect "nopr %r0", "~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14}"() nounwind
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
ret void
}
; A stack frame which needs to be realigned at runtime (to meet alignment
; criteria for values on the stack) does not have a fixed frame size.
; CHECK: .long .L{{.*}}-needsStackRealignment
; CHECK-NEXT: .short 0
; 0 locations
; CHECK-NEXT: .short 0
define void @needsStackRealignment() {
%val = alloca i64, i32 3, align 128
tail call void (...) @escape_values(i64* %val)
; Note: Adding any non-constant to the stackmap would fail because we
; expected to be able to address off the frame pointer. In a realigned
; frame, we must use the stack pointer instead. This is a separate bug.
tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0)
ret void
}
declare void @escape_values(...)
declare void @llvm.experimental.stackmap(i64, i32, ...)
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)