diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 3f6eae6bb20..8871c5714c5 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -805,6 +805,7 @@ def STACKMAP : Instruction { let InOperandList = (ins i32imm:$id, i32imm:$nbytes, variable_ops); let isCall = 1; let mayLoad = 1; + let usesCustomInserter = 1; } def PATCHPOINT : Instruction { let OutOperandList = (outs unknown:$dst); @@ -812,6 +813,7 @@ def PATCHPOINT : Instruction { i32imm:$nargs, i32imm:$cc, variable_ops); let isCall = 1; let mayLoad = 1; + let usesCustomInserter = 1; } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7bb1929401a..b1d247e74fd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6783,6 +6783,21 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, /// \brief Add a stack map intrinsic call's live variable operands to a stackmap /// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { @@ -6793,6 +6808,10 @@ static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); Ops.push_back( Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); } else Ops.push_back(OpVal); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f4c926ec8b6..fc52c0cd669 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15811,6 +15811,51 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, return MBB; } +/// Convert any TargetFrameIndex operands into the x86-specific pattern of five +/// memory operands that is recognized by PrologEpilogInserter. +MachineBasicBlock * +X86TargetLowering::emitPatchPoint(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const X86InstrInfo *TII = static_cast(TM.getInstrInfo()); + + // MI changes inside this loop as we grow operands. + for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { + MachineOperand &MO = MI->getOperand(OperIdx); + if (!MO.isFI()) + continue; + + // foldMemoryOperand builds a new MI after replacing a single FI operand + // with the canonical set of five x86 addressing-mode operands. + int FI = MO.getIndex(); + MachineFunction &MF = *MBB->getParent(); + SmallVector FIOps(1, OperIdx); + MachineInstr *NewMI = TII->foldMemoryOperandImpl(MF, MI, FIOps, FI); + assert(NewMI && "Cannot fold frame index operand into stackmap."); + + // Inherit previous memory operands. + NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + assert(NewMI->mayLoad() && "Folded a stackmap use to a non-load!"); + + // Add a new memory operand for this FI. + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + TM.getDataLayout()->getPointerSize(), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); + + // Replace the instruction and update the operand index. + MBB->insert(MachineBasicBlock::iterator(MI), NewMI); + OperIdx += (NewMI->getNumOperands() - MI->getNumOperands()) - 1; + MI->eraseFromParent(); + MI = NewMI; + } + return MBB; +} + MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -16038,6 +16083,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::EH_SjLj_LongJmp32: case X86::EH_SjLj_LongJmp64: return emitEHSjLjLongJmp(MI, BB); + + case TargetOpcode::STACKMAP: + case TargetOpcode::PATCHPOINT: + return emitPatchPoint(MI, BB); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index bc3dd608da5..6231e253d21 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -973,6 +973,9 @@ namespace llvm { MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; + MachineBasicBlock *emitPatchPoint(MachineInstr *MI, + MachineBasicBlock *MBB) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 24617737420..ad46c10c7d0 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4237,18 +4237,27 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { MachineOperand &MO = MI->getOperand(i); if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { - assert(MO.getReg() && "patchpoint can only fold a vreg operand"); - // Compute the spill slot size and offset. - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg()); unsigned SpillSize; unsigned SpillOffset; - bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, - SpillOffset, &MF.getTarget()); - if (!Valid) - report_fatal_error("cannot spill patchpoint subregister operand"); - - MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp)); - MIB.addOperand(MachineOperand::CreateImm(SpillSize)); + if (MO.isReg()) { + // Compute the spill slot size and offset. + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(MO.getReg()); + bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, + SpillOffset, &MF.getTarget()); + if (!Valid) + report_fatal_error("cannot spill patchpoint subregister operand"); + MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp)); + MIB.addOperand(MachineOperand::CreateImm(SpillSize)); + } + else { + // ExpandISelPseudos is converting a simple frame index into a 5-operand + // frame index. + assert(MO.isFI() && MO.getIndex() == FrameIndex && + "patchpoint can only fold a vreg operand or frame index"); + SpillOffset = 0; + MIB.addOperand(MachineOperand::CreateImm(StackMaps::DirectMemRefOp)); + } MIB.addOperand(MachineOperand::CreateFI(FrameIndex)); addOffset(MIB, SpillOffset); } diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll index 8109f879f21..300db34b975 100644 --- a/test/CodeGen/X86/anyregcc.ll +++ b/test/CodeGen/X86/anyregcc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s ; Stackmap Header: no constants - 6 callsites ; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps @@ -95,11 +95,11 @@ entry: ; CHECK-NEXT: .byte 8 ; CHECK-NEXT: .short {{[0-9]+}} ; CHECK-NEXT: .long 0 -; Loc 1: Register <-- this will be folded once folding for FI is implemented -; CHECK-NEXT: .byte 1 +; Loc 1: Direct RBP - ofs +; CHECK-NEXT: .byte 2 ; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short {{[0-9]+}} -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long define i64 @property_access3() nounwind ssp uwtable { entry: %obj = alloca i64, align 8 @@ -330,13 +330,13 @@ entry: ; Loc 3: Arg2 spilled to RBP + ; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short 7 -; CHECK-NEXT: .long {{[0-9]+}} +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long ; Loc 4: Arg3 spilled to RBP + ; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short 7 -; CHECK-NEXT: .long {{[0-9]+}} +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind diff --git a/test/CodeGen/X86/stackmap.ll b/test/CodeGen/X86/stackmap.ll index 2cc198db174..55532e96ace 100644 --- a/test/CodeGen/X86/stackmap.ll +++ b/test/CodeGen/X86/stackmap.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: .long 1 ; CHECK-NEXT: .quad 4294967296 ; Num Callsites -; CHECK-NEXT: .long 12 +; CHECK-NEXT: .long 14 ; Constant arguments ; @@ -305,6 +305,48 @@ define void @liveConstant() { ret void } +; Directly map an alloca's address. +; +; Callsite 16 +; CHECK: .long 16 +; CHECK-LABEL: .long L{{.*}}-_directFrameIdx +; CHECK-NEXT: .short 0 +; 1 location +; CHECK-NEXT: .short 1 +; Loc 0: Direct RBP - ofs +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long +; Callsite 17 +; CHECK-NEXT: .long 17 +; CHECK-NEXT: .long L{{.*}}-_directFrameIdx +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Direct RBP - ofs +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long +; Loc 1: Direct RBP - ofs +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long +define void @directFrameIdx() { +entry: + %metadata1 = alloca i64, i32 3, align 8 + store i64 11, i64* %metadata1 + store i64 12, i64* %metadata1 + store i64 13, i64* %metadata1 + call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 16, i32 0, i64* %metadata1) + %metadata2 = alloca i8, i32 4, align 8 + %metadata3 = alloca i16, i32 4, align 8 + call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 17, i32 5, i8* null, i32 0, i8* %metadata2, i16* %metadata3) + ret void +} + declare void @llvm.experimental.stackmap(i32, i32, ...) declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...) declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)