diff --git a/docs/FaultMaps.rst b/docs/FaultMaps.rst new file mode 100644 index 00000000000..d6274fa0a2c --- /dev/null +++ b/docs/FaultMaps.rst @@ -0,0 +1,54 @@ +============================== +FaultMaps and implicit checks +============================== + +.. contents:: + :local: + :depth: 2 + +Motivation +========== + +Code generated by managed language runtimes tend to have checks that +are required for safety but never fail in practice. In such cases, it +is profitable to make the non-failing case cheaper even if it makes +the failing case significantly more expensive. This asymmetry can be +exploited by folding such safety checks into operations that can be +made to fault reliably if the check would have failed, and recovering +from such a fault by using a signal handler. + +For example, Java requires null checks on objects before they are read +from or written to. If the object is ``null`` then a +``NullPointerException`` has to be thrown, interrupting normal +execution. In practice, however, dereferencing a ``null`` pointer is +extremely rare in well-behaved Java programs, and typically the null +check can be folded into a nearby memory operation that operates on +the same memory location. + +The Fault Map Section +===================== + +Information about implicit checks generated by LLVM are put in a +special "fault map" section. On Darwin this section is named +``__llvm_faultmaps``. + +The format of this section is + +.. code-block:: none + + Header { + uint8 : Fault Map Version (current version is 1) + uint8 : Reserved (expected to be 0) + uint16 : Reserved (expected to be 0) + } + uint32 : NumFunctions + FunctionInfo[NumFunctions] { + uint64 : FunctionAddress + uint32 : NumFaultingPCs + uint32 : Reserved (expected to be 0) + FunctionFaultInfo[NumFaultingPCs] { + uint32 : FaultType = FaultMaps::FaultingLoad (only legal value currently) + uint32 : FaultingPCOffset + uint32 : handlerPCOffset + } + } diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h new file mode 100644 index 00000000000..7127ecb124b --- /dev/null +++ b/include/llvm/CodeGen/FaultMaps.h @@ -0,0 +1,73 @@ +//===------------------- FaultMaps.h - StackMaps ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_FAULTMAPS_H +#define LLVM_CODEGEN_FAULTMAPS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSymbol.h" + +#include +#include + +namespace llvm { + +class AsmPrinter; +class MCExpr; +class MCSymbol; +class MCStreamer; + +class FaultMaps { +public: + enum FaultType { FaultingLoad = 1, FaultTypeMax }; + + static const char *faultTypeToString(FaultType); + + explicit FaultMaps(AsmPrinter &AP); + + void recordFaultingOp(FaultType FaultTy, const MCSymbol *HandlerLabel); + void serializeToFaultMapSection(); + +private: + static const char *WFMP; + + struct FaultInfo { + FaultType FaultType; + const MCExpr *FaultingOffsetExpr; + const MCExpr *HandlerOffsetExpr; + + FaultInfo() + : FaultType(FaultTypeMax), FaultingOffsetExpr(nullptr), + HandlerOffsetExpr(nullptr) {} + + explicit FaultInfo(FaultMaps::FaultType FType, const MCExpr *FaultingOffset, + const MCExpr *HandlerOffset) + : FaultType(FType), FaultingOffsetExpr(FaultingOffset), + HandlerOffsetExpr(HandlerOffset) {} + }; + + typedef std::vector FunctionFaultInfos; + + // We'd like to keep a stable iteration order for FunctionInfos to help + // FileCheck based testing. + struct MCSymbolComparator { + bool operator()(const MCSymbol *LHS, const MCSymbol *RHS) const { + return LHS->getName() < RHS->getName(); + } + }; + + std::map + FunctionInfos; + AsmPrinter &AP; + + void emitFunctionInfo(const MCSymbol *FnLabel, const FunctionFaultInfos &FFI); +}; +} + +#endif diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 0515f1cd738..3ffd38f2c58 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -139,6 +139,9 @@ protected: /// StackMap section. MCSection *StackMapSection; + /// FaultMap section. + MCSection *FaultMapSection; + /// EH frame section. /// /// It is initialized on demand so it can be overwritten (with uniquing). @@ -262,6 +265,7 @@ public: MCSection *getTLSBSSSection() const { return TLSBSSSection; } MCSection *getStackMapSection() const { return StackMapSection; } + MCSection *getFaultMapSection() const { return FaultMapSection; } // ELF specific sections. MCSection *getDataRelSection() const { return DataRelSection; } diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index d99f0e1c5dc..61234991be4 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -881,6 +881,12 @@ def FRAME_ALLOC : Instruction { let hasSideEffects = 0; let hasCtrlDep = 1; } +def FAULTING_LOAD_OP : Instruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins variable_ops); + let usesCustomInserter = 1; + let mayLoad = 1; +} } //===----------------------------------------------------------------------===// diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h index afc22365eba..1f9a5d4ecaf 100644 --- a/include/llvm/Target/TargetOpcodes.h +++ b/include/llvm/Target/TargetOpcodes.h @@ -122,6 +122,12 @@ enum { /// label. Created by the llvm.frameallocate intrinsic. It has two arguments: /// the symbol for the label and the frame index of the stack allocation. FRAME_ALLOC = 21, + + /// Loading instruction that may page fault, bundled with associated + /// information on how to handle such a page fault. It is intended to support + /// "zero cost" null checks in managed languages by allowing LLVM to fold + /// comparisions into existing memory operations. + FAULTING_LOAD_OP = 22, }; } // end namespace TargetOpcode } // end namespace llvm diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 6d2af900350..2f65253c7c5 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp + FaultMaps.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCRootLowering.cpp diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp new file mode 100644 index 00000000000..2f849511d69 --- /dev/null +++ b/lib/CodeGen/FaultMaps.cpp @@ -0,0 +1,114 @@ +//===---------------------------- FaultMaps.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FaultMaps.h" + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "faultmaps" + +static const int FaultMapVersion = 1; +const char *FaultMaps::WFMP = "Fault Maps: "; + +FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {} + +void FaultMaps::recordFaultingOp(FaultType FaultTy, + const MCSymbol *HandlerLabel) { + MCContext &OutContext = AP.OutStreamer->getContext(); + MCSymbol *FaultingLabel = OutContext.createTempSymbol(); + + AP.OutStreamer->EmitLabel(FaultingLabel); + + const MCExpr *FaultingOffset = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(FaultingLabel, OutContext), + MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); + + const MCExpr *HandlerOffset = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(HandlerLabel, OutContext), + MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); + + FunctionInfos[AP.CurrentFnSym].emplace_back(FaultTy, FaultingOffset, + HandlerOffset); +} + +void FaultMaps::serializeToFaultMapSection() { + if (FunctionInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer->getContext(); + MCStreamer &OS = *AP.OutStreamer; + + // Create the section. + MCSection *FaultMapSection = + OutContext.getObjectFileInfo()->getFaultMapSection(); + OS.SwitchSection(FaultMapSection); + + // Emit a dummy symbol to force section inclusion. + OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); + + DEBUG(dbgs() << "********** Fault Map Output **********\n"); + + // Header + OS.EmitIntValue(FaultMapVersion, 1); // Version. + OS.EmitIntValue(0, 1); // Reserved. + OS.EmitIntValue(0, 2); // Reserved. + + DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n"); + OS.EmitIntValue(FunctionInfos.size(), 4); + + DEBUG(dbgs() << WFMP << "functions:\n"); + + for (const auto &FFI : FunctionInfos) + emitFunctionInfo(FFI.first, FFI.second); +} + +void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel, + const FunctionFaultInfos &FFI) { + MCStreamer &OS = *AP.OutStreamer; + + DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n"); + OS.EmitSymbolValue(FnLabel, 8); + + DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n"); + OS.EmitIntValue(FFI.size(), 4); + + OS.EmitIntValue(0, 4); // Reserved + + for (auto &Fault : FFI) { + DEBUG(dbgs() << WFMP << " fault type: " + << faultTypeToString(Fault.FaultType) << "\n"); + OS.EmitIntValue(Fault.FaultType, 4); + + DEBUG(dbgs() << WFMP << " faulting PC offset: " + << *Fault.FaultingOffsetExpr << "\n"); + OS.EmitValue(Fault.FaultingOffsetExpr, 4); + + DEBUG(dbgs() << WFMP << " fault handler PC offset: " + << *Fault.HandlerOffsetExpr << "\n"); + OS.EmitValue(Fault.HandlerOffsetExpr, 4); + } +} + + +const char *FaultMaps::faultTypeToString(FaultMaps::FaultType FT) { + switch (FT) { + default: + llvm_unreachable("unhandled fault type!"); + + case FaultMaps::FaultingLoad: + return "FaultingLoad"; + } +} diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 83a08e28a81..3ef87b52b34 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -238,6 +238,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0, SectionKind::getMetadata()); + FaultMapSection = Ctx->getMachOSection("__LLVM_FAULTMAPS", "__llvm_faultmaps", + 0, SectionKind::getMetadata()); + TLSExtraDataSection = TLSTLVSection; } @@ -518,6 +521,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { StackMapSection = Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + FaultMapSection = + Ctx->getELFSection(".llvm_faultmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); } void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 64fc6d0d7e5..3b11e694ecb 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -674,6 +674,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } SM.serializeToStackMapSection(); + FM.serializeToFaultMapSection(); // Funny Darwin hack: This flag tells the linker that no global symbols // contain code that falls through to other global symbols (e.g. the obvious @@ -726,8 +727,10 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } - if (TT.isOSBinFormatELF()) + if (TT.isOSBinFormatELF()) { SM.serializeToStackMapSection(); + FM.serializeToFaultMapSection(); + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 3beeb1752bf..acba21169c9 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -12,6 +12,7 @@ #include "X86Subtarget.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/FaultMaps.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/Target/TargetMachine.h" @@ -27,6 +28,7 @@ class MCSymbol; class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; StackMaps SM; + FaultMaps FM; void GenerateExportDirective(const MCSymbol *Sym, bool IsData); @@ -83,13 +85,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void LowerSTACKMAP(const MachineInstr &MI); void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerFAULTING_LOAD_OP(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI); public: explicit X86AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), SM(*this), SMShadowTracker(TM) {} + : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this), + SMShadowTracker(TM) {} const char *getPassName() const override { return "X86 Assembly / Object Emitter"; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index e47ab1738d3..655e06a8c17 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -864,6 +864,28 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, SM.recordStatepoint(MI); } +void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI, + X86MCInstLower &MCIL) { + // FAULTING_LOAD_OP , , , + + unsigned LoadDefRegister = MI.getOperand(0).getReg(); + MCSymbol *HandlerLabel = MI.getOperand(1).getMCSymbol(); + unsigned LoadOpcode = MI.getOperand(2).getImm(); + unsigned LoadOperandsBeginIdx = 3; + + FM.recordFaultingOp(FaultMaps::FaultingLoad, HandlerLabel); + + MCInst LoadMI; + LoadMI.setOpcode(LoadOpcode); + LoadMI.addOperand(MCOperand::createReg(LoadDefRegister)); + for (auto I = MI.operands_begin() + LoadOperandsBeginIdx, + E = MI.operands_end(); + I != E; ++I) + if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, *I)) + LoadMI.addOperand(MaybeOperand.getValue()); + + OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo()); +} // Lower a stackmap of the form: // , , ... @@ -1119,6 +1141,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case TargetOpcode::STATEPOINT: return LowerSTATEPOINT(*MI, MCInstLowering); + case TargetOpcode::FAULTING_LOAD_OP: + return LowerFAULTING_LOAD_OP(*MI, MCInstLowering); + case TargetOpcode::STACKMAP: return LowerSTACKMAP(*MI); diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index e79a809b6d1..47d68fc339a 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -297,7 +297,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const { "IMPLICIT_DEF", "SUBREG_TO_REG", "COPY_TO_REGCLASS", "DBG_VALUE", "REG_SEQUENCE", "COPY", "BUNDLE", "LIFETIME_START", "LIFETIME_END", "STACKMAP", "PATCHPOINT", "LOAD_STACK_GUARD", - "STATEPOINT", "FRAME_ALLOC", + "STATEPOINT", "FRAME_ALLOC", "FAULTING_LOAD_OP", nullptr}; const auto &Insts = getInstructions(); for (const char *const *p = FixedInstrs; *p; ++p) {