//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements routines for translating from LLVM IR into SelectionDAG IR. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" #include "SelectionDAGBuild.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Constants.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; static cl::opt LimitFPPrecision("limit-float-precision", cl::desc("Generate low-precision inline sequences " "for some float libcalls"), cl::location(LimitFloatPrecision), cl::init(0)); /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence /// of insertvalue or extractvalue indices that identify a member, return /// the linearized index of the start of the member. /// static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, const unsigned *Indices, const unsigned *IndicesEnd, unsigned CurIndex = 0) { // Base case: We're done. if (Indices && Indices == IndicesEnd) return CurIndex; // Given a struct type, recursively traverse the elements. if (const StructType *STy = dyn_cast(Ty)) { for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); EI != EE; ++EI) { if (Indices && *Indices == unsigned(EI - EB)) return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); } return CurIndex; } // Given an array type, recursively traverse the elements. else if (const ArrayType *ATy = dyn_cast(Ty)) { const Type *EltTy = ATy->getElementType(); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { if (Indices && *Indices == i) return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); } return CurIndex; } // We haven't found the type we're looking for, so keep searching. return CurIndex + 1; } /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of /// EVTs that represent all the individual underlying /// non-aggregate types that comprise it. /// /// If Offsets is non-null, it points to a vector to be filled in /// with the in-memory offsets of each of the individual values. /// static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *Offsets = 0, uint64_t StartingOffset = 0) { // Given a struct type, recursively traverse the elements. if (const StructType *STy = dyn_cast(Ty)) { const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); EI != EE; ++EI) ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, StartingOffset + SL->getElementOffset(EI - EB)); return; } // Given an array type, recursively traverse the elements. if (const ArrayType *ATy = dyn_cast(Ty)) { const Type *EltTy = ATy->getElementType(); uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, StartingOffset + i * EltSize); return; } // Interpret void as zero return values. if (Ty == Type::getVoidTy(Ty->getContext())) return; // Base case: we can get an EVT for this LLVM IR type. ValueVTs.push_back(TLI.getValueType(Ty)); if (Offsets) Offsets->push_back(StartingOffset); } namespace llvm { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information about /// the value. The most common situation is to represent one value at a time, /// but struct or array values are handled element-wise as multiple values. /// The splitting of aggregates is performed recursively, so that we never /// have aggregate-typed registers. The values at this point do not necessarily /// have legal types, so each value may require one or more registers of some /// legal type. /// struct VISIBILITY_HIDDEN RegsForValue { /// TLI - The TargetLowering object. /// const TargetLowering *TLI; /// ValueVTs - The value types of the values, which may not be legal, and /// may need be promoted or synthesized from one or more registers. /// SmallVector ValueVTs; /// RegVTs - The value types of the registers. This is the same size as /// ValueVTs and it records, for each value, what the type of the assigned /// register or registers are. (Individual values are never synthesized /// from more than one type of register.) /// /// With virtual registers, the contents of RegVTs is redundant with TLI's /// getRegisterType member function, however when with physical registers /// it is necessary to have a separate record of the types. /// SmallVector RegVTs; /// Regs - This list holds the registers assigned to the values. /// Each legal or promoted value requires one register, and each /// expanded value requires multiple registers. /// SmallVector Regs; RegsForValue() : TLI(0) {} RegsForValue(const TargetLowering &tli, const SmallVector ®s, EVT regvt, EVT valuevt) : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue(const TargetLowering &tli, const SmallVector ®s, const SmallVector ®vts, const SmallVector &valuevts) : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, const Type *Ty) : TLI(&tli) { ComputeValueVTs(tli, Ty, ValueVTs); for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT); EVT RegisterVT = TLI->getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); Reg += NumRegs; } } /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { TLI = RHS.TLI; ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); Regs.append(RHS.Regs.begin(), RHS.Regs.end()); } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from /// this value and returns the result as a ValueVTs value. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, SDValue *Flag) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, SDValue *Flag) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. void AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, SelectionDAG &DAG, std::vector &Ops) const; }; } /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by /// PHI nodes or outside of the basic block that defines it, or used by a /// switch or atomic instruction, which may expand to multiple basic blocks. static bool isUsedOutsideOfDefiningBlock(Instruction *I) { if (isa(I)) return true; BasicBlock *BB = I->getParent(); for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) if (cast(*UI)->getParent() != BB || isa(*UI)) return true; return false; } /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. // Don't force virtual registers for byval arguments though, because // fast-isel can't handle those in all cases. if (EnableFastISel && !A->hasByValAttr()) return A->use_empty(); BasicBlock *Entry = A->getParent()->begin(); for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) if (cast(*UI)->getParent() != Entry || isa(*UI)) return false; // Use not in entry block. return true; } FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) : TLI(tli) { } void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, SelectionDAG &DAG, bool EnableFastISel) { Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); AI != E; ++AI) if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) InitializeRegForValue(AI); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. Function::iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast(I)) if (ConstantInt *CUI = dyn_cast(AI->getArraySize())) { const Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false); } for (; BB != EB; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) if (!isa(I) || !StaticAllocaMap.count(cast(I))) InitializeRegForValue(I); // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); MBBMap[BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. if (BB->hasAddressTaken()) MBB->setHasAddressTaken(); // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. PHINode *PN; DebugLoc DL; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { PN = dyn_cast(I); if (!PN || PN->use_empty()) continue; unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector ValueVTs; ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); PHIReg += NumRegisters; } } } } unsigned FunctionLoweringInfo::MakeReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } /// CreateRegForValue - Allocate the appropriate number of virtual registers of /// the correctly promoted or expanded types. Assign these registers /// consecutive vreg numbers and return the first assigned number. /// /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { SmallVector ValueVTs; ComputeValueVTs(TLI, V->getType(), ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { unsigned R = MakeReg(RegisterVT); if (!FirstReg) FirstReg = R; } } return FirstReg; } /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, ISD::NodeType AssertOp = ISD::DELETED_NODE) { assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { // Assemble the value from multiple parts. if (!ValueVT.isVector() && ValueVT.isInteger()) { unsigned PartBits = PartVT.getSizeInBits(); unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. unsigned RoundParts = NumParts & (NumParts - 1) ? 1 << Log2_32(NumParts) : NumParts; unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); SDValue Lo, Hi; EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT); Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2, PartVT, HalfVT); } else { Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]); } if (TLI.isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, dl, Parts+RoundParts, OddParts, PartVT, OddVT); // Combine the round and odd parts. Lo = Val; if (TLI.isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), TLI.getPointerTy())); Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); } } else if (ValueVT.isVector()) { // Handle a multi-element vector. EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); assert(RegisterVT == Parts[0].getValueType() && "Part type doesn't match part!"); // Assemble the parts into intermediate operands. SmallVector Ops(NumIntermediates); if (NumIntermediates == NumParts) { // If the register was not expanded, truncate or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, PartVT, IntermediateVT); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate operands // from the parts. assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, PartVT, IntermediateVT); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate // operands. Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, ValueVT, &Ops[0], NumIntermediates); } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); } } // There is now one part, held in Val. Correct it to match ValueVT. PartVT = Val.getValueType(); if (PartVT == ValueVT) return Val; if (PartVT.isVector()) { assert(ValueVT.isVector() && "Unknown vector conversion!"); return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); } if (ValueVT.isVector()) { assert(ValueVT.getVectorElementType() == PartVT && ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); } if (PartVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp != ISD::DELETED_NODE) Val = DAG.getNode(AssertOp, dl, PartVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } else { return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); } } if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { if (ValueVT.bitsLT(Val.getValueType())) // FP_ROUND's are always exact here. return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, DAG.getIntPtrConstant(1)); return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); } if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); return SDValue(); } /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrVT = TLI.getPointerTy(); EVT ValueVT = Val.getValueType(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); if (!NumParts) return; if (!ValueVT.isVector()) { if (PartVT == ValueVT) { assert(NumParts == 1 && "No-op copy with multiple parts!"); Parts[0] = Val; return; } if (NumParts * PartBits > ValueVT.getSizeInBits()) { // If the parts cover more bits than the value has, promote the value. if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); } else if (PartVT.isInteger() && ValueVT.isInteger()) { ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); } else { llvm_unreachable("Unknown mismatch!"); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. assert(NumParts == 1 && PartVT != ValueVT); Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. if (PartVT.isInteger() && ValueVT.isInteger()) { ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } else { llvm_unreachable("Unknown mismatch!"); } } // The value may have changed - recompute ValueVT. ValueVT = Val.getValueType(); assert(NumParts * PartBits == ValueVT.getSizeInBits() && "Failed to tile the value with PartVT!"); if (NumParts == 1) { assert(PartVT == ValueVT && "Type conversion failed!"); Parts[0] = Val; return; } // Expand the value into multiple parts. if (NumParts & (NumParts - 1)) { // The number of parts is not a power of 2. Split off and copy the tail. assert(PartVT.isInteger() && ValueVT.isInteger() && "Do not know what to expand to!"); unsigned RoundParts = 1 << Log2_32(NumParts); unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, DAG.getConstant(RoundBits, TLI.getPointerTy())); getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT); if (TLI.isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); NumParts = RoundParts; ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { for (unsigned i = 0; i < NumParts; i += StepSize) { unsigned ThisBits = StepSize * PartBits / 2; EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); SDValue &Part0 = Parts[i]; SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ThisVT, Part0, DAG.getConstant(1, PtrVT)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ThisVT, Part0, DAG.getConstant(0, PtrVT)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Part0); Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Part1); } } } if (TLI.isBigEndian()) std::reverse(Parts, Parts + OrigNumParts); return; } // Vector ValueVT. if (NumParts == 1) { if (PartVT != ValueVT) { if (PartVT.isVector()) { Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); } else { assert(ValueVT.getVectorElementType() == PartVT && ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, PartVT, Val, DAG.getConstant(0, PtrVT)); } } Parts[0] = Val; return; } // Handle a multi-element vector. EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, IntermediateVT, Val, DAG.getConstant(i * (NumElements / NumIntermediates), PtrVT)); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, IntermediateVT, Val, DAG.getConstant(i, PtrVT)); // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT); } } void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { AA = &aa; GFI = gfi; TD = DAG.getTarget().getTargetData(); } /// clear - Clear out the curret SelectionDAG and the associated /// state and prepare this SelectionDAGLowering object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering /// or PHI node updating; that information is cleared out as it is /// consumed. void SelectionDAGLowering::clear() { NodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); EdgeMapping.clear(); DAG.clear(); CurDebugLoc = DebugLoc::getUnknownLoc(); HasTailCall = false; } /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any /// prior load instructions. /// SDValue SelectionDAGLowering::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); if (PendingLoads.size() == 1) { SDValue Root = PendingLoads[0]; DAG.setRoot(Root); PendingLoads.clear(); return Root; } // Otherwise, we have to make a token factor node. SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &PendingLoads[0], PendingLoads.size()); PendingLoads.clear(); DAG.setRoot(Root); return Root; } /// getControlRoot - Similar to getRoot, but instead of flushing all the /// PendingLoad items, flush all the PendingExports items. It is necessary /// to do this before emitting a terminator instruction. /// SDValue SelectionDAGLowering::getControlRoot() { SDValue Root = DAG.getRoot(); if (PendingExports.empty()) return Root; // Turn all of the CopyToReg chains into one factored node. if (Root.getOpcode() != ISD::EntryToken) { unsigned i = 0, e = PendingExports.size(); for (; i != e; ++i) { assert(PendingExports[i].getNode()->getNumOperands() > 1); if (PendingExports[i].getNode()->getOperand(0) == Root) break; // Don't add the root if we already indirectly depend on it. } if (i == e) PendingExports.push_back(Root); } Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &PendingExports[0], PendingExports.size()); PendingExports.clear(); DAG.setRoot(Root); return Root; } void SelectionDAGLowering::visit(Instruction &I) { visit(I.getOpcode(), I); } void SelectionDAGLowering::visit(unsigned Opcode, User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); #include "llvm/Instruction.def" } } SDValue SelectionDAGLowering::getValue(const Value *V) { SDValue &N = NodeMap[V]; if (N.getNode()) return N; if (Constant *C = const_cast(dyn_cast(V))) { EVT VT = TLI.getValueType(V->getType(), true); if (ConstantInt *CI = dyn_cast(C)) return N = DAG.getConstant(*CI, VT); if (GlobalValue *GV = dyn_cast(C)) return N = DAG.getGlobalAddress(GV, VT); if (isa(C)) return N = DAG.getConstant(0, TLI.getPointerTy()); if (ConstantFP *CFP = dyn_cast(C)) return N = DAG.getConstantFP(*CFP, VT); if (isa(C) && !V->getType()->isAggregateType()) return N = DAG.getUNDEF(VT); if (ConstantExpr *CE = dyn_cast(C)) { visit(CE->getOpcode(), *CE); SDValue N1 = NodeMap[V]; assert(N1.getNode() && "visit didn't populate the ValueMap!"); return N1; } if (isa(C) || isa(C)) { SmallVector Constants; for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); OI != OE; ++OI) { SDNode *Val = getValue(*OI).getNode(); // If the operand is an empty aggregate, there are no values. if (!Val) continue; // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Constants.push_back(SDValue(Val, i)); } return DAG.getMergeValues(&Constants[0], Constants.size(), getCurDebugLoc()); } if (isa(C->getType()) || isa(C->getType())) { assert((isa(C) || isa(C)) && "Unknown struct or array constant!"); SmallVector ValueVTs; ComputeValueVTs(TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct SmallVector Constants(NumElts); for (unsigned i = 0; i != NumElts; ++i) { EVT EltVT = ValueVTs[i]; if (isa(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) Constants[i] = DAG.getConstantFP(0, EltVT); else Constants[i] = DAG.getConstant(0, EltVT); } return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc()); } if (BlockAddress *BA = dyn_cast(C)) return DAG.getBlockAddress(BA, VT); const VectorType *VecTy = cast(V->getType()); unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector Ops; if (ConstantVector *CP = dyn_cast(C)) { for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CP->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) Op = DAG.getConstantFP(0, EltVT); else Op = DAG.getConstant(0, EltVT); Ops.assign(NumElements, Op); } // Create a BUILD_VECTOR node. return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), VT, &Ops[0], Ops.size()); } // If this is a static alloca, generate it as the frameindex instead of // computation. if (const AllocaInst *AI = dyn_cast(V)) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } unsigned InReg = FuncInfo.ValueMap[V]; assert(InReg && "Value not in map!"); RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); } /// Get the EVTs and ArgFlags collections that represent the return type /// of the given function. This does not require a DAG or a return value, and /// is suitable for use before any DAGs for the function are constructed. static void getReturnInfo(const Type* ReturnType, Attributes attr, SmallVectorImpl &OutVTs, SmallVectorImpl &OutFlags, TargetLowering &TLI, SmallVectorImpl *Offsets = 0) { SmallVector ValueVTs; ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets); unsigned NumValues = ValueVTs.size(); if ( NumValues == 0 ) return; for (unsigned j = 0, f = NumValues; j != f; ++j) { EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (attr & Attribute::SExt) ExtendKind = ISD::SIGN_EXTEND; else if (attr & Attribute::ZExt) ExtendKind = ISD::ZERO_EXTEND; // FIXME: C calling convention requires the return type to be promoted to // at least 32-bit. But this is not necessary for non-C calling // conventions. The frontend should mark functions whose return values // require promoting with signext or zeroext attributes. if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); if (VT.bitsLT(MinVT)) VT = MinVT; } unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); if (attr & Attribute::InReg) Flags.setInReg(); // Propagate extension type if any if (attr & Attribute::SExt) Flags.setSExt(); else if (attr & Attribute::ZExt) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { OutVTs.push_back(PartVT); OutFlags.push_back(Flags); } } } void SelectionDAGLowering::visitRet(ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector Outs; FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); if (!FLI.CanLowerReturn) { unsigned DemoteReg = FLI.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); EVT PtrVT = PtrValueVTs[0]; for (unsigned i = 0; i != NumValues; ++i) Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, DAG.getConstant(Offsets[i], PtrVT)), NULL, Offsets[i], false, 0); Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); } else { for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) continue; SDValue RetOp = getValue(I.getOperand(i)); for (unsigned j = 0, f = NumValues; j != f; ++j) { EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; const Function *F = I.getParent()->getParent(); if (F->paramHasAttr(0, Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; else if (F->paramHasAttr(0, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; // FIXME: C calling convention requires the return type to be promoted to // at least 32-bit. But this is not necessary for non-C calling // conventions. The frontend should mark functions whose return values // require promoting with signext or zeroext attributes. if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); if (VT.bitsLT(MinVT)) VT = MinVT; } unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); if (F->paramHasAttr(0, Attribute::InReg)) Flags.setInReg(); // Propagate extension type if any if (F->paramHasAttr(0, Attribute::SExt)) Flags.setSExt(); else if (F->paramHasAttr(0, Attribute::ZExt)) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); } } } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, Outs, getCurDebugLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && "LowerReturn didn't return a valid chain!"); // Update the DAG with the new chain value resulting from return lowering. DAG.setRoot(Chain); } /// CopyToExportRegsIfNeeded - If the given value has virtual registers /// created for it, emit nodes to copy the value into the virtual /// registers. void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) { if (!V->use_empty()) { DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) CopyValueToVirtualRegister(V, VMI->second); } } /// ExportFromCurrentBlock - If this condition isn't known to be exported from /// the current basic block, add it to ValueMap now so that we'll get a /// CopyTo/FromReg. void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { // No need to export constants. if (!isa(V) && !isa(V)) return; // Already exported? if (FuncInfo.isExportedInst(V)) return; unsigned Reg = FuncInfo.InitializeRegForValue(V); CopyValueToVirtualRegister(V, Reg); } bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB) { // The operands of the setcc have to be in this block. We don't know // how to export them from some other block. if (Instruction *VI = dyn_cast(V)) { // Can export from current BB. if (VI->getParent() == FromBB) return true; // Is already exported, noop. return FuncInfo.isExportedInst(V); } // If this is an argument, we can export it if the BB is the entry block or // if it is already exported. if (isa(V)) { if (FromBB == &FromBB->getParent()->getEntryBlock()) return true; // Otherwise, can only export this if it is already exported. return FuncInfo.isExportedInst(V); } // Otherwise, constants can always be exported. return true; } static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast(V)) return I->getParent() == BB; return true; } /// getFCmpCondCode - Return the ISD condition code corresponding to /// the given LLVM IR floating-point condition code. This includes /// consideration of global floating-point math flags. /// static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { ISD::CondCode FPC, FOC; switch (Pred) { case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; default: llvm_unreachable("Invalid FCmp predicate opcode!"); FOC = FPC = ISD::SETFALSE; break; } if (FiniteOnlyFPMath()) return FOC; else return FPC; } /// getICmpCondCode - Return the ISD condition code corresponding to /// the given LLVM IR integer condition code. /// static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { switch (Pred) { case ICmpInst::ICMP_EQ: return ISD::SETEQ; case ICmpInst::ICMP_NE: return ISD::SETNE; case ICmpInst::ICMP_SLE: return ISD::SETLE; case ICmpInst::ICMP_ULE: return ISD::SETULE; case ICmpInst::ICMP_SGE: return ISD::SETGE; case ICmpInst::ICMP_UGE: return ISD::SETUGE; case ICmpInst::ICMP_SLT: return ISD::SETLT; case ICmpInst::ICMP_ULT: return ISD::SETULT; case ICmpInst::ICMP_SGT: return ISD::SETGT; case ICmpInst::ICMP_UGT: return ISD::SETUGT; default: llvm_unreachable("Invalid ICmp predicate opcode!"); return ISD::SETNE; } } /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. /// void SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into // the caseblock. if (CmpInst *BOp = dyn_cast(Cond)) { // The operands of the cmp have to be in this block. We don't know // how to export them from some other block. If this is the first block // of the sequence, no exporting is needed. if (CurBB == CurMBB || (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (ICmpInst *IC = dyn_cast(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); } else if (FCmpInst *FC = dyn_cast(Cond)) { Condition = getFCmpCondCode(FC->getPredicate()); } else { Condition = ISD::SETEQ; // silence warning. llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), NULL, TBB, FBB, CurBB); SwitchCases.push_back(CB); return; } } // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), NULL, TBB, FBB, CurBB); SwitchCases.push_back(CB); } /// FindMergedConditions - If Cond is an expression like void SelectionDAGLowering::FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, unsigned Opc) { // If this node is not part of the or/and tree, emit it as a branch. Instruction *BOp = dyn_cast(Cond); if (!BOp || !(isa(BOp) || isa(BOp)) || (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB); return; } // Create TmpBB after CurBB. MachineFunction::iterator BBI = CurBB; MachineFunction &MF = DAG.getMachineFunction(); MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); CurBB->getParent()->insert(++BBI, TmpBB); if (Opc == Instruction::Or) { // Codegen X | Y as: // jmp_if_X TBB // jmp TmpBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: // jmp_if_X TmpBB // jmp FBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // This requires creation of TmpBB after CurBB. // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); } } /// If the set of cases should be emitted as a series of branches, return true. /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool SelectionDAGLowering::ShouldEmitAsBranches(const std::vector &Cases){ if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they // will get folded into a single comparison, so don't emit two blocks. if ((Cases[0].CmpLHS == Cases[1].CmpLHS && Cases[0].CmpRHS == Cases[1].CmpRHS) || (Cases[0].CmpRHS == Cases[1].CmpLHS && Cases[0].CmpLHS == Cases[1].CmpRHS)) { return false; } return true; } void SelectionDAGLowering::visitBr(BranchInst &I) { // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (I.isUnconditional()) { // Update machine-CFG edges. CurMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch, emit the branch. if (Succ0MBB != NextBlock) DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); return; } // If this condition is one of the special cases we handle, do special stuff // now. Value *CondVal = I.getCondition(); MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. // For example, instead of something like: // cmp A, B // C = seteq // cmp D, E // F = setle // or C, F // jnz foo // Emit: // cmp A, B // je foo // cmp D, E // jle foo // if (BinaryOperator *BOp = dyn_cast(CondVal)) { if (BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); // Allow some cases to be rejected. if (ShouldEmitAsBranches(SwitchCases)) { for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { ExportFromCurrentBlock(SwitchCases[i].CmpLHS); ExportFromCurrentBlock(SwitchCases[i].CmpRHS); } // Emit the branch for this block. visitSwitchCase(SwitchCases[0]); SwitchCases.erase(SwitchCases.begin()); return; } // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) FuncInfo.MF->erase(SwitchCases[i].ThisBB); SwitchCases.clear(); } } // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), NULL, Succ0MBB, Succ1MBB, CurMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. visitSwitchCase(CB); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); DebugLoc dl = getCurDebugLoc(); // Build the setcc now. if (CB.CmpMHS == NULL) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && CB.CC == ISD::SETEQ) Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); if (cast(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, DAG.getConstant(High-Low, VT), ISD::SETULE); } } // Update successor info CurMBB->addSuccessor(CB.TrueBB); CurMBB->addSuccessor(CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. if (CB.TrueBB == NextBlock) { std::swap(CB.TrueBB, CB.FalseBB); SDValue True = DAG.getConstant(1, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); // If the branch was constant folded, fix up the CFG. if (BrCond.getOpcode() == ISD::BR) { CurMBB->removeSuccessor(CB.FalseBB); DAG.setRoot(BrCond); } else { // Otherwise, go ahead and insert the false branch. if (BrCond == getControlRoot()) CurMBB->removeSuccessor(CB.TrueBB); if (CB.FalseBB == NextBlock) DAG.setRoot(BrCond); else DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(CB.FalseBB))); } } /// visitJumpTable - Emit JumpTable node in the current MBB void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = TLI.getPointerTy(); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(), MVT::Other, Index.getValue(1), Table, Index)); } /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH) { // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, DAG.getConstant(JTH.First, VT)); // The SDNode we just created, which holds the value being switched on minus // the the smallest case value, needs to be copied to a virtual register so it // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy()); unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = DAG.getSetCC(getCurDebugLoc(), TLI.getSetCCResultType(SUB.getValueType()), SUB, DAG.getConstant(JTH.Last-JTH.First,VT), ISD::SETUGT); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); if (JT.MBB == NextBlock) DAG.setRoot(BrCond); else DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB))); } /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, DAG.getConstant(B.First, VT)); // Check range SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), TLI.getSetCCResultType(SUB.getValueType()), SUB, DAG.getConstant(B.Range, VT), ISD::SETUGT); SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy()); B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, ShiftOp); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; MachineBasicBlock* MBB = B.Cases[0].ThisBB; CurMBB->addSuccessor(B.Default); CurMBB->addSuccessor(MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); if (MBB == NextBlock) DAG.setRoot(BrRange); else DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, DAG.getBasicBlock(MBB))); } /// visitBitTestCase - this function produces one "bit test" void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B) { // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), TLI.getPointerTy(), DAG.getConstant(1, TLI.getPointerTy()), ShiftOp); // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), TLI.getPointerTy(), SwitchVal, DAG.getConstant(B.Mask, TLI.getPointerTy())); SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), TLI.getSetCCResultType(AndOp.getValueType()), AndOp, DAG.getConstant(0, TLI.getPointerTy()), ISD::SETNE); CurMBB->addSuccessor(B.TargetBB); CurMBB->addSuccessor(NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), AndCmp, DAG.getBasicBlock(B.TargetBB)); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (NextMBB == NextBlock) DAG.setRoot(BrAnd); else DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB))); } void SelectionDAGLowering::visitInvoke(InvokeInst &I) { // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; const Value *Callee(I.getCalledValue()); if (isa(Callee)) visitInlineAsm(&I); else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. CopyToExportRegsIfNeeded(&I); // Update successor info CurMBB->addSuccessor(Return); CurMBB->addSuccessor(LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } void SelectionDAGLowering::visitUnwind(UnwindInst &I) { } /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, CaseRecVector& WorkList, Value* SV, MachineBasicBlock* Default) { Case& BackCase = *(CR.Range.second-1); // Size is the number of Cases represented by this range. size_t Size = CR.Range.second - CR.Range.first; if (Size > 3) return false; // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CR.CaseBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; // TODO: If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" // Rearrange the case blocks so that the last one falls through if possible. if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { // The last case block won't fall through into 'NextBlock' if we emit the // branches in this order. See if rearranging a case value would help. for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { if (I->BB == NextBlock) { std::swap(*I, BackCase); break; } } } // Create a CaseBlock record representing a conditional branch to // the Case's target mbb if the value being switched on SV is equal // to C. MachineBasicBlock *CurBlock = CR.CaseBB; for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { MachineBasicBlock *FallThrough; if (I != E-1) { FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); CurMF->insert(BBI, FallThrough); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); } else { // If the last case doesn't match, go to the default block. FallThrough = Default; } Value *RHS, *LHS, *MHS; ISD::CondCode CC; if (I->High == I->Low) { // This is just small small case range :) containing exactly 1 case CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the // vector to be later processed by SDISel, and insert the node's MBB // before the next MBB. if (CurBlock == CurMBB) visitSwitchCase(CB); else SwitchCases.push_back(CB); CurBlock = FallThrough; } return true; } static inline bool areJTsAllowed(const TargetLowering &TLI) { return !DisableJumpTables && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } static APInt ComputeRange(const APInt &First, const APInt &Last) { APInt LastExt(Last), FirstExt(First); uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; LastExt.sext(BitWidth); FirstExt.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } /// handleJTSwitchCase - Emit jumptable for current switch case range bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, CaseRecVector& WorkList, Value* SV, MachineBasicBlock* Default) { Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); const APInt &First = cast(FrontCase.Low)->getValue(); const APInt &Last = cast(BackCase.High)->getValue(); APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) TSize += I->size(); if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4))) return false; APInt Range = ComputeRange(First, Last); double Density = TSize.roundToDouble() / Range.roundToDouble(); if (Density < 0.4) return false; DEBUG(errs() << "Lowering jump table\n" << "First entry: " << First << ". Last entry: " << Last << '\n' << "Range: " << Range << "Size: " << TSize << ". Density: " << Density << "\n\n"); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. MachineFunction::iterator BBI = CR.CaseBB; ++BBI; const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); // Create a new basic block to hold the code for loading the address // of the jump table, and jumping to it. Update successor information; // we will either branch to the default case for the switch, or the jump // table. MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, JumpTableBB); CR.CaseBB->addSuccessor(Default); CR.CaseBB->addSuccessor(JumpTableBB); // Build a vector of destination BBs, corresponding to each target // of the jump table. If the value of the jump table slot corresponds to // a case statement, push the case's BB onto the vector, otherwise, push // the default BB. std::vector DestBBs; APInt TEI = First; for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { const APInt& Low = cast(I->Low)->getValue(); const APInt& High = cast(I->High)->getValue(); if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; } else { DestBBs.push_back(Default); } } // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); for (std::vector::iterator I = DestBBs.begin(), E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; JumpTableBB->addSuccessor(*I); } } // Create a jump table index for this jump table, or return an existing // one. unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs); // Set the jump table information so that we can codegen it as a second // MachineBasicBlock JumpTable JT(-1U, JTI, JumpTableBB, Default); JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB)); if (CR.CaseBB == CurMBB) visitJumpTableHeader(JT, JTH); JTCases.push_back(JumpTableBlock(JTH, JT)); return true; } /// handleBTSplitSwitchCase - emit comparison and split binary search tree into /// 2 subtrees. bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, Value* SV, MachineBasicBlock* Default) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. MachineFunction::iterator BBI = CR.CaseBB; ++BBI; Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); // Size is the number of Cases represented by this range. unsigned Size = CR.Range.second - CR.Range.first; const APInt &First = cast(FrontCase.Low)->getValue(); const APInt &Last = cast(BackCase.High)->getValue(); double FMetric = 0; CaseItr Pivot = CR.Range.first + Size/2; // Select optimal pivot, maximizing sum density of LHS and RHS. This will // (heuristically) allow us to emit JumpTable's later. APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) TSize += I->size(); APInt LSize = FrontCase.size(); APInt RSize = TSize-LSize; DEBUG(errs() << "Selecting best pivot: \n" << "First: " << First << ", Last: " << Last <<'\n' << "LSize: " << LSize << ", RSize: " << RSize << '\n'); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; J!=E; ++I, ++J) { const APInt &LEnd = cast(I->High)->getValue(); const APInt &RBegin = cast(J->Low)->getValue(); APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); double LDensity = (double)LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); double RDensity = (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(errs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' << "LDensity: " << LDensity << ", RDensity: " << RDensity << '\n' << "Metric: " << Metric << '\n'); if (FMetric < Metric) { Pivot = J; FMetric = Metric; DEBUG(errs() << "Current metric set to: " << FMetric << '\n'); } LSize += J->size(); RSize -= J->size(); } if (areJTsAllowed(TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { Pivot = CR.Range.first + Size/2; } CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); Constant *C = Pivot->Low; MachineBasicBlock *FalseBB = 0, *TrueBB = 0; // We know that we branch to the LHS if the Value being switched on is // less than the Pivot value, C. We use this to optimize our binary // tree a bit, by recognizing that if SV is greater than or equal to the // LHS's Case Value, and that Case Value is exactly one less than the // Pivot's Value, then we can branch directly to the LHS's Target, // rather than creating a leaf node for it. if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE && cast(C)->getValue() == (cast(CR.GE)->getValue() + 1LL)) { TrueBB = LHSR.first->BB; } else { TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, TrueBB); WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); } // Similar to the optimization above, if the Value being switched on is // known to be less than the Constant CR.LT, and the current Case Value // is CR.LT - 1, then we can branch directly to the target block for // the current Case Value, rather than emitting a RHS leaf node for it. if ((RHSR.second - RHSR.first) == 1 && CR.LT && cast(RHSR.first->Low)->getValue() == (cast(CR.LT)->getValue() - 1LL)) { FalseBB = RHSR.first->BB; } else { FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, FalseBB); WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); } // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == CurMBB) visitSwitchCase(CB); else SwitchCases.push_back(CB); return true; } /// handleBitTestsSwitchCase - if current case range has few destination and /// range span less, than machine word bitwidth, encode case range into series /// of masks and emit bit tests with these masks. bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, Value* SV, MachineBasicBlock* Default){ EVT PTy = TLI.getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) return false; size_t numCmps = 0; for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { // Single case counts one, case range - two. numCmps += (I->Low == I->High ? 1 : 2); } // Count unique destinations SmallSet Dests; for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { Dests.insert(I->BB); if (Dests.size() > 3) // Don't bother the code below, if there are too much unique destinations return false; } DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n' << "Total number of comparisons: " << numCmps << '\n'); // Compute span of values. const APInt& minValue = cast(FrontCase.Low)->getValue(); const APInt& maxValue = cast(BackCase.High)->getValue(); APInt cmpRange = maxValue - minValue; DEBUG(errs() << "Compare range: " << cmpRange << '\n' << "Low bound: " << minValue << '\n' << "High bound: " << maxValue << '\n'); if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) || (!(Dests.size() == 1 && numCmps >= 3) && !(Dests.size() == 2 && numCmps >= 5) && !(Dests.size() >= 3 && numCmps >= 6))) return false; DEBUG(errs() << "Emitting bit tests\n"); APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. if (minValue.isNonNegative() && maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) { cmpRange = maxValue; } else { lowBound = minValue; } CaseBitsVector CasesBits; unsigned i, count = 0; for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { MachineBasicBlock* Dest = I->BB; for (i = 0; i < count; ++i) if (Dest == CasesBits[i].BB) break; if (i == count) { assert((count < 3) && "Too much destinations to test!"); CasesBits.push_back(CaseBits(0, Dest, 0)); count++; } const APInt& lowValue = cast(I->Low)->getValue(); const APInt& highValue = cast(I->High)->getValue(); uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); for (uint64_t j = lo; j <= hi; j++) { CasesBits[i].Mask |= 1ULL << j; CasesBits[i].Bits++; } } std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); BitTestInfo BTC; // Figure out which block is immediately after the current one. MachineFunction::iterator BBI = CR.CaseBB; ++BBI; const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); DEBUG(errs() << "Cases:\n"); for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { DEBUG(errs() << "Mask: " << CasesBits[i].Mask << ", Bits: " << CasesBits[i].Bits << ", BB: " << CasesBits[i].BB << '\n'); MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, CaseBB); BTC.push_back(BitTestCase(CasesBits[i].Mask, CaseBB, CasesBits[i].BB)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); } BitTestBlock BTB(lowBound, cmpRange, SV, -1U, (CR.CaseBB == CurMBB), CR.CaseBB, Default, BTC); if (CR.CaseBB == CurMBB) visitBitTestHeader(BTB); BitTestCases.push_back(BTB); return true; } /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, const SwitchInst& SI) { size_t numCmps = 0; // Start with "simple" cases for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; Cases.push_back(Case(SI.getSuccessorValue(i), SI.getSuccessorValue(i), SMBB)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); // Merge case into clusters if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { const APInt& nextValue = cast(J->Low)->getValue(); const APInt& currentValue = cast(I->High)->getValue(); MachineBasicBlock* nextBB = J->BB; MachineBasicBlock* currentBB = I->BB; // If the two neighboring cases go to the same destination, merge them // into a single case. if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { I->High = J->High; J = Cases.erase(J); } else { I = J++; } } for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { if (I->Low != I->High) // A range counts double, since it requires two compares. ++numCmps; } return numCmps; } void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; // If there is only the default destination, branch to it if it is not the // next basic block. Otherwise, just fall through. if (SI.getNumOperands() == 2) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. CurMBB->addSuccessor(Default); if (Default != NextBlock) DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Default))); return; } // If there are any non-default case statements, create a vector of Cases // representing each one, and sort the vector so that we can efficiently // create a binary search tree from them. CaseVector Cases; size_t numCmps = Clusterify(Cases, SI); DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() << ". Total compares: " << numCmps << '\n'); numCmps = 0; // Get the Value to be switched on and default basic blocks, which will be // inserted into CaseBlock records, representing basic blocks in the binary // search tree. Value *SV = SI.getOperand(0); // Push the initial CaseRec onto the worklist CaseRecVector WorkList; WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); while (!WorkList.empty()) { // Grab a record representing a case range to process off the worklist CaseRec CR = WorkList.back(); WorkList.pop_back(); if (handleBitTestsSwitchCase(CR, WorkList, SV, Default)) continue; // If the range has few cases (two or less) emit a series of specific // tests. if (handleSmallSwitchRange(CR, WorkList, SV, Default)) continue; // If the switch has more than 5 blocks, and at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. if (handleJTSwitchCase(CR, WorkList, SV, Default)) continue; // Emit binary tree. We need to pick a pivot, and push left and right ranges // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. handleBTSplitSwitchCase(CR, WorkList, SV, Default); } } void SelectionDAGLowering::visitIndirectBr(IndirectBrInst &I) { // Update machine-CFG edges. for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } void SelectionDAGLowering::visitFSub(User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); if (isa(Ty)) { if (ConstantVector *CV = dyn_cast(I.getOperand(0))) { const VectorType *DestTy = cast(I.getType()); const Type *ElTy = DestTy->getElementType(); unsigned VL = DestTy->getNumElements(); std::vector NZ(VL, ConstantFP::getNegativeZero(ElTy)); Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); if (CV == CNZ) { SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), Op2.getValueType(), Op2)); return; } } } if (ConstantFP *CFP = dyn_cast(I.getOperand(0))) if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), Op2.getValueType(), Op2)); return; } visitBinary(I, ISD::FSUB); } void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), Op1.getValueType(), Op1, Op2)); } void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); if (!isa(I.getType()) && Op2.getValueType() != TLI.getShiftAmountTy()) { // If the operand is smaller than the shift count type, promote it. EVT PTy = TLI.getPointerTy(); EVT STy = TLI.getShiftAmountTy(); if (STy.bitsGT(Op2.getValueType())) Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), TLI.getShiftAmountTy(), Op2); // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. else if (STy.getSizeInBits() >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getShiftAmountTy(), Op2); // Otherwise we'll need to temporarily settle for some other // convenient type; type legalization will make adjustments as // needed. else if (PTy.bitsLT(Op2.getValueType())) Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getPointerTy(), Op2); else if (PTy.bitsGT(Op2.getValueType())) Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), Op2); } setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Op1.getValueType(), Op1, Op2)); } void SelectionDAGLowering::visitICmp(User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (ICmpInst *IC = dyn_cast(&I)) predicate = IC->getPredicate(); else if (ConstantExpr *IC = dyn_cast(&I)) predicate = ICmpInst::Predicate(IC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGLowering::visitFCmp(User &I) { FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; if (FCmpInst *FC = dyn_cast(&I)) predicate = FC->getPredicate(); else if (ConstantExpr *FC = dyn_cast(&I)) predicate = FCmpInst::Predicate(FC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGLowering::visitSelect(User &I) { SmallVector ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues != 0) { SmallVector Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); SDValue TrueVal = getValue(I.getOperand(1)); SDValue FalseVal = getValue(I.getOperand(2)); for (unsigned i = 0; i != NumValues; ++i) Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), TrueVal.getValueType(), Cond, SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } } void SelectionDAGLowering::visitTrunc(User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitZExt(User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitSExt(User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPTrunc(User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), DestVT, N, DAG.getIntPtrConstant(0))); } void SelectionDAGLowering::visitFPExt(User &I){ // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPToUI(User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPToSI(User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitUIToFP(User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitSIToFP(User &I){ // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitPtrToInt(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT); setValue(&I, Result); } void SelectionDAGLowering::visitIntToPtr(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } void SelectionDAGLowering::visitBitCast(User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this // is either a BIT_CONVERT or a no-op. if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), DestVT, N)); // convert types else setValue(&I, N); // noop cast. } void SelectionDAGLowering::visitInsertElement(User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), getValue(I.getOperand(2))); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), TLI.getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGLowering::visitExtractElement(User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), getValue(I.getOperand(1))); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), TLI.getValueType(I.getType()), InVec, InIdx)); } // Utility for visitShuffleVector - Returns true if the mask is mask starting // from SIndx and increasing to the element length (undefs are allowed). static bool SequentialMask(SmallVectorImpl &Mask, unsigned SIndx) { unsigned MaskNumElts = Mask.size(); for (unsigned i = 0; i != MaskNumElts; ++i) if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) return false; return true; } void SelectionDAGLowering::visitShuffleVector(User &I) { SmallVector Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); // Convert the ConstantVector mask operand into an array of ints, with -1 // representing undef values. SmallVector MaskElts; cast(I.getOperand(2))->getVectorElements(*DAG.getContext(), MaskElts); unsigned MaskNumElts = MaskElts.size(); for (unsigned i = 0; i != MaskNumElts; ++i) { if (isa(MaskElts[i])) Mask.push_back(-1); else Mask.push_back(cast(MaskElts[i])->getSExtValue()); } EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, &Mask[0])); return; } // Normalize the shuffle vector since mask and vector length don't match. if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { // The shuffle is concatenating two vectors together. setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), VT, Src1, Src2)); return; } // Pad both vectors with undefs to make them the same length as the mask. unsigned NumConcat = MaskNumElts / SrcNumElts; bool Src1U = Src1.getOpcode() == ISD::UNDEF; bool Src2U = Src2.getOpcode() == ISD::UNDEF; SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector MOps1(NumConcat, UndefVal); SmallVector MOps2(NumConcat, UndefVal); MOps1[0] = Src1; MOps2[0] = Src2; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), VT, &MOps1[0], NumConcat); Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), VT, &MOps2[0], NumConcat); // Readjust mask for new input vector length. SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx < (int)SrcNumElts) MappedOps.push_back(Idx); else MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, &MappedOps[0])); return; } if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; int Input = 0; if (Idx < 0) continue; if (Idx >= (int)SrcNumElts) { Input = 1; Idx -= SrcNumElts; } if (Idx > MaxRange[Input]) MaxRange[Input] = Idx; if (Idx < MinRange[Input]) MinRange[Input] = Idx; } // Check if the access is smaller than the vector size and can we find // a reasonable extract index. int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not Extract. int StartIdx[2]; // StartIdx to extract from for (int Input=0; Input < 2; ++Input) { if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { // Fits within range but we should see if we can find a good // start index that is a multiple of the mask length. if (MaxRange[Input] < (int)MaskNumElts) { RangeUse[Input] = 1; // Extract from beginning of the vector StartIdx[Input] = 0; } else { StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && StartIdx[Input] + MaskNumElts < SrcNumElts) RangeUse[Input] = 1; // Extract from a multiple of the mask length. } } } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } else if (RangeUse[0] < 2 && RangeUse[1] < 2) { // Extract appropriate subvector and generate a vector shuffle for (int Input=0; Input < 2; ++Input) { SDValue& Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) { Src = DAG.getUNDEF(VT); } else { Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, Src, DAG.getIntPtrConstant(StartIdx[Input])); } } // Calculate new mask. SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx < 0) MappedOps.push_back(Idx); else if (Idx < (int)SrcNumElts) MappedOps.push_back(Idx - StartIdx[0]); else MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, &MappedOps[0])); return; } } // We can't use either concat vectors or extract subvectors so fall back to // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT PtrVT = TLI.getPointerTy(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); } else { int Idx = Mask[i]; if (Idx < (int)SrcNumElts) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), EltVT, Src1, DAG.getConstant(Idx, PtrVT))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), EltVT, Src2, DAG.getConstant(Idx - SrcNumElts, PtrVT))); } } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), VT, &Ops[0], Ops.size())); } void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); const Type *AggTy = I.getType(); const Type *ValTy = Op1->getType(); bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, I.idx_begin(), I.idx_end()); SmallVector AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); SmallVector ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); SmallVector Values(NumAggValues); SDValue Agg = getValue(Op0); SDValue Val = getValue(Op1); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. for (; i != LinearIndex; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); // Copy values from the inserted value(s). for (; i != LinearIndex + NumValValues; ++i) Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); // Copy remaining value(s) from the original aggregate. for (; i != NumAggValues; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&AggValueVTs[0], NumAggValues), &Values[0], NumAggValues)); } void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); const Type *AggTy = Op0->getType(); const Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, I.idx_begin(), I.idx_end()); SmallVector ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); SmallVector Values(NumValValues); SDValue Agg = getValue(Op0); // Copy out the selected value(s). for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) Values[i - LinearIndex] = OutOfUndef ? DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&ValValueVTs[0], NumValValues), &Values[0], NumValValues)); } void SelectionDAGLowering::visitGetElementPtr(User &I) { SDValue N = getValue(I.getOperand(0)); const Type *Ty = I.getOperand(0)->getType(); for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { Value *Idx = *OI; if (const StructType *StTy = dyn_cast(Ty)) { unsigned Field = cast(Idx)->getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, DAG.getIntPtrConstant(Offset)); } Ty = StTy->getElementType(Field); } else { Ty = cast(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. if (ConstantInt *CI = dyn_cast(Idx)) { if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); SDValue OffsVal; EVT PTy = TLI.getPointerTy(); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) { OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getPointerTy(), DAG.getConstant(Offs, MVT::i64)); } else OffsVal = DAG.getIntPtrConstant(Offs); N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, OffsVal); continue; } // N = N + Idx * ElementSize; APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), N.getValueType(), IdxN, DAG.getConstant(Amt, TLI.getPointerTy())); } else { SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), N.getValueType(), IdxN, Scale); } } N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, IdxN); } } setValue(&I, N); } void SelectionDAGLowering::visitAlloca(AllocaInst &I) { // If this is a fixed sized alloca in the entry block of the function, // allocate it statically on the stack. if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. const Type *Ty = I.getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), AllocSize, DAG.getConstant(TySize, AllocSize.getValueType())); EVT IntPtr = TLI.getPointerTy(); AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. unsigned StackAlign = TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); if (Align <= StackAlign) Align = 0; // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(StackAlign-1)); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), VTs, Ops, 3); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); // Inform the Frame Information that we have just allocated a variable-sized // object. FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); } void SelectionDAGLowering::visitLoad(LoadInst &I) { const Value *SV = I.getOperand(0); SDValue Ptr = getValue(SV); const Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); unsigned Alignment = I.getAlignment(); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SDValue Root; bool ConstantMemory = false; if (I.isVolatile()) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory(SV)) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); } SmallVector Values(NumValues); SmallVector Chains(NumValues); EVT PtrVT = Ptr.getValueType(); for (unsigned i = 0; i != NumValues; ++i) { SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)), SV, Offsets[i], isVolatile, Alignment); Values[i] = L; Chains[i] = L.getValue(1); } if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); if (isVolatile) DAG.setRoot(Chain); else PendingLoads.push_back(Chain); } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } void SelectionDAGLowering::visitStore(StoreInst &I) { Value *SrcV = I.getOperand(0); Value *PtrV = I.getOperand(1); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; // Get the lowered operands. Note that we do this after // checking if NumResults is zero, because with zero results // the operands won't have values in the map. SDValue Src = getValue(SrcV); SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); SmallVector Chains(NumValues); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); unsigned Alignment = I.getAlignment(); for (unsigned i = 0; i != NumValues; ++i) Chains[i] = DAG.getStore(Root, getCurDebugLoc(), SDValue(Src.getNode(), Src.getResNo() + i), DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)), PtrV, Offsets[i], isVolatile, Alignment); DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues)); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, unsigned Intrinsic) { bool HasChain = !I.doesNotAccessMemory(); bool OnlyLoad = HasChain && I.onlyReadsMemory(); // Build the operand list. SmallVector Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. if (OnlyLoad) { // We don't need to serialize loads against other loads. Ops.push_back(DAG.getRoot()); } else { Ops.push_back(getRoot()); } } // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic) Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { SDValue Op = getValue(I.getOperand(i)); assert(TLI.isTypeLegal(Op.getValueType()) && "Intrinsic uses a non-legal type?"); Ops.push_back(Op); } SmallVector ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); #ifndef NDEBUG for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { assert(TLI.isTypeLegal(ValueVTs[Val]) && "Intrinsic uses a non-legal type?"); } #endif // NDEBUG if (HasChain) ValueVTs.push_back(MVT::Other); SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); // Create the node. SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), VTs, &Ops[0], Ops.size(), Info.memVT, Info.ptrVal, Info.offset, Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); else if (I.getType() != Type::getVoidTy(*DAG.getContext())) Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); else Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); if (HasChain) { SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); if (OnlyLoad) PendingLoads.push_back(Chain); else DAG.setRoot(Chain); } if (I.getType() != Type::getVoidTy(*DAG.getContext())) { if (const VectorType *PTy = dyn_cast(I.getType())) { EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); } setValue(&I, Result); } } /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. static GlobalVariable *ExtractTypeInfo(Value *V) { V = V->stripPointerCasts(); GlobalVariable *GV = dyn_cast(V); assert ((GV || isa(V)) && "TypeInfo must be a global variable or NULL"); return GV; } namespace llvm { /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB) { // Inform the MachineModuleInfo of the personality for this landing pad. ConstantExpr *CE = cast(I.getOperand(2)); assert(CE->getOpcode() == Instruction::BitCast && isa(CE->getOperand(0)) && "Personality should be a function"); MMI->addPersonality(MBB, cast(CE->getOperand(0))); // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. std::vector TyInfo; unsigned N = I.getNumOperands(); for (unsigned i = N - 1; i > 2; --i) { if (ConstantInt *CI = dyn_cast(I.getOperand(i))) { unsigned FilterLength = CI->getZExtValue(); unsigned FirstCatch = i + FilterLength + !FilterLength; assert (FirstCatch <= N && "Invalid filter length"); if (FirstCatch < N) { TyInfo.reserve(N - FirstCatch); for (unsigned j = FirstCatch; j < N; ++j) TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); TyInfo.clear(); } if (!FilterLength) { // Cleanup. MMI->addCleanup(MBB); } else { // Filter. TyInfo.reserve(FilterLength - 1); for (unsigned j = i + 1; j < FirstCatch; ++j) TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); MMI->addFilterTypeInfo(MBB, TyInfo); TyInfo.clear(); } N = i; } } if (N > 3) { TyInfo.reserve(N - 3); for (unsigned j = 3; j < N; ++j) TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); } } } /// GetSignificand - Get the significand and build it into a floating-point /// number with exponent of 1: /// /// Op = (Op & 0x007fffff) | 0x3f800000; /// /// where Op is the hexidecimal representation of floating point value. static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, DAG.getConstant(0x3f800000, MVT::i32)); return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); } /// GetExponent - Get the exponent: /// /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// /// where Op is the hexidecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, DebugLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, DAG.getConstant(23, TLI.getPointerTy())); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } /// Inlined utility function to implement binary input atomic intrinsics for /// visitIntrinsicCall: I is a call instruction /// Op is the associated NodeType for I const char * SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), getValue(I.getOperand(2)).getValueType().getSimpleVT(), Root, getValue(I.getOperand(1)), getValue(I.getOperand(2)), I.getOperand(1)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; } // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2); setValue(&I, Result); return 0; } /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void SelectionDAGLowering::visitExp(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getOperand(1)); // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OFe 1.4426950f // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // TwoToFractionalPartOfX = // 0.997535578f + // (0.735607626f + 0.252464424f * x) * x; // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3e814304)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); // Add the exponent into the result in integer domain. SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = // 0.999892986f + // (0.696457318f + // (0.224338339f + 0.792043434e-1f * x) * x) * x; // // 0.000107046256 error, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3da235e3)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3e65b8f3)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); // Add the exponent into the result in integer domain. SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = // 0.999999982f + // (0.693148872f + // (0.240227044f + // (0.554906021e-1f + // (0.961591928e-2f + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3924b03e)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3ab24b87)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3c1d8c17)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3d634a1d)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x3e75fe14)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); // Add the exponent into the result in integer domain. SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); } } else { // No special expansion. result = DAG.getNode(ISD::FEXP, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); } setValue(&I, result); } /// visitLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. void SelectionDAGLowering::visitLog(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getOperand(1)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3f317218)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // LogofMantissa = // -1.1609546f + // (1.4034025f - 0.23903021f * x) * x; // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbe74c456)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3fb3a2b1)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f949a29)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // LogOfMantissa = // -1.7417939f + // (2.8212026f + // (-1.4699568f + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbd67b6d6)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ee4f4b8)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fbc278b)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40348e95)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fdef31a)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // // LogOfMantissa = // -2.1072184f + // (4.2372794f + // (-3.7029485f + // (2.2781945f + // (-0.87823314f + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbc91e5ac)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e4350aa)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f60d3e3)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x4011cdf0)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x406cfd1c)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x408797cb)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4006dcab)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } } else { // No special expansion. result = DAG.getNode(ISD::FLOG, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); } setValue(&I, result); } /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. void SelectionDAGLowering::visitLog2(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getOperand(1)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Get the exponent. SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbeb08fe0)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x40019463)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fd6633d)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log2ofMantissa = // -2.51285454f + // (4.07009056f + // (-2.12067489f + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbda7262e)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f25280b)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x4007b923)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40823e2f)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x4020d29c)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log2ofMantissa = // -3.0400495f + // (6.1129976f + // (-5.3420409f + // (3.2865683f + // (-1.2669343f + // (0.27515199f - // 0.25691327e-1f * x) * x) * x) * x) * x) * x; // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbcd2769e)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e8ce0b9)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fa22ae7)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40525723)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x40aaf200)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x40c39dad)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4042902c)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } } else { // No special expansion. result = DAG.getNode(ISD::FLOG2, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); } setValue(&I, result); } /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. void SelectionDAGLowering::visitLog10(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getOperand(1)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3e9a209a)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log10ofMantissa = // -0.50419619f + // (0.60948995f - 0.10380950f * x) * x; // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbdd49a13)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f1c0789)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f011300)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log10ofMantissa = // -0.64831180f + // (0.91751397f + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3d431f31)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ea21fb2)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f6ae232)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f25f7c3)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log10ofMantissa = // -0.84299375f + // (1.5327582f + // (-1.0688956f + // (0.49102474f + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3c5d51ce)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e00685a)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3efb6798)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f88d192)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fc4316c)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, getF32Constant(DAG, 0x3f57ce70)); result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } } else { // No special expansion. result = DAG.getNode(ISD::FLOG10, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); } setValue(&I, result); } /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. void SelectionDAGLowering::visitExp2(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getOperand(1)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); // FractionalPartOfX = x - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // TwoToFractionalPartOfX = // 0.997535578f + // (0.735607626f + 0.252464424f * x) * x; // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3e814304)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = // 0.999892986f + // (0.696457318f + // (0.224338339f + 0.792043434e-1f * x) * x) * x; // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3da235e3)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3e65b8f3)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = // 0.999999982f + // (0.693148872f + // (0.240227044f + // (0.554906021e-1f + // (0.961591928e-2f + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3924b03e)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3ab24b87)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3c1d8c17)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3d634a1d)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x3e75fe14)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); } } else { // No special expansion. result = DAG.getNode(ISD::FEXP2, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); } setValue(&I, result); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. void SelectionDAGLowering::visitPow(CallInst &I) { SDValue result; Value *Val = I.getOperand(1); DebugLoc dl = getCurDebugLoc(); bool IsExp10 = false; if (getValue(Val).getValueType() == MVT::f32 && getValue(I.getOperand(2)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (Constant *C = const_cast(dyn_cast(Val))) { if (ConstantFP *CFP = dyn_cast(C)) { APFloat Ten(10.0f); IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten); } } } if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getOperand(2)); // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OF10 3.3219281f // IntegerPartOfX = (int32_t)(x * LOG2OF10); SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x40549a78)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); // FractionalPartOfX = x - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // twoToFractionalPartOfX = // 0.997535578f + // (0.735607626f + 0.252464424f * x) * x; // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3e814304)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = // 0.999892986f + // (0.696457318f + // (0.224338339f + 0.792043434e-1f * x) * x) * x; // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3da235e3)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3e65b8f3)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = // 0.999999982f + // (0.693148872f + // (0.240227044f + // (0.554906021e-1f + // (0.961591928e-2f + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3924b03e)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3ab24b87)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3c1d8c17)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3d634a1d)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x3e75fe14)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); } } else { // No special expansion. result = DAG.getNode(ISD::FPOW, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1)), getValue(I.getOperand(2))); } setValue(&I, result); } /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. const char * SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DebugLoc dl = getCurDebugLoc(); switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); return 0; case Intrinsic::vastart: visitVAStart(I); return 0; case Intrinsic::vaend: visitVAEnd(I); return 0; case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), getValue(I.getOperand(1)))); return 0; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), getValue(I.getOperand(1)))); return 0; case Intrinsic::setjmp: return "_setjmp"+!TLI.usesUnderscoreSetJmp(); break; case Intrinsic::longjmp: return "_longjmp"+!TLI.usesUnderscoreLongJmp(); break; case Intrinsic::memcpy: { SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast(I.getOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } case Intrinsic::memset: { SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast(I.getOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, I.getOperand(1), 0)); return 0; } case Intrinsic::memmove: { SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast(I.getOperand(4))->getZExtValue(); // If the source and destination are known to not be aliases, we can // lower memmove as memcpy. uint64_t Size = -1ULL; if (ConstantSDNode *C = dyn_cast(Op3)) Size = C->getZExtValue(); if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == AliasAnalysis::NoAlias) { DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } case Intrinsic::dbg_stoppoint: case Intrinsic::dbg_region_start: case Intrinsic::dbg_region_end: case Intrinsic::dbg_func_start: // FIXME - Remove this instructions once the dust settles. return 0; case Intrinsic::dbg_declare: { if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. return 0; DwarfWriter *DW = DAG.getDwarfWriter(); if (!DW) return 0; DbgDeclareInst &DI = cast(I); if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None)) return 0; MDNode *Variable = DI.getVariable(); Value *Address = DI.getAddress(); if (BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); AllocaInst *AI = dyn_cast(Address); // Don't handle byval struct arguments or VLAs, for example. if (!AI) return 0; DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) return 0; // VLAs. int FI = SI->second; MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); if (MMI) { MetadataContext &TheMetadata = DI.getParent()->getContext().getMetadata(); unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI); MMI->setVariableDbgInfo(Variable, FI, Dbg); } return 0; } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; Ops[0] = DAG.getRoot(); SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); setValue(&I, Op); DAG.setRoot(Op.getValue(1)); return 0; } case Intrinsic::eh_selector: { MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); if (CurMBB->isLandingPad()) AddCatchInfo(I, MMI, CurMBB); else { #ifndef NDEBUG FuncInfo.CatchInfoLost.insert(&I); #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); if (Reg) CurMBB->addLiveIn(Reg); } // Insert the EHSELECTION instruction. SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[2]; Ops[0] = getValue(I.getOperand(1)); Ops[1] = getRoot(); SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); DAG.setRoot(Op.getValue(1)); setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); return 0; } case Intrinsic::eh_typeid_for: { MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); if (MMI) { // Find the type id for the given typeinfo. GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); unsigned TypeID = MMI->getTypeIDFor(GV); setValue(&I, DAG.getConstant(TypeID, MVT::i32)); } else { // Return something different to eh_selector. setValue(&I, DAG.getConstant(1, MVT::i32)); } return 0; } case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { MMI->setCallsEHReturn(true); DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, MVT::Other, getControlRoot(), getValue(I.getOperand(1)), getValue(I.getOperand(2)))); } else { setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); } return 0; case Intrinsic::eh_unwind_init: if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { MMI->setCallsUnwindInit(true); } return 0; case Intrinsic::eh_dwarf_cfa: { EVT VT = getValue(I.getOperand(1)).getValueType(); SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl, TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, TLI.getPointerTy()), CfaArg); setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), DAG.getConstant(0, TLI.getPointerTy())), Offset)); return 0; } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: case Intrinsic::convertsif: case Intrinsic::convertuif: case Intrinsic::convertss: case Intrinsic::convertsu: case Intrinsic::convertus: case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; case Intrinsic::convertsif: Code = ISD::CVT_SF; break; case Intrinsic::convertuif: Code = ISD::CVT_UF; break; case Intrinsic::convertss: Code = ISD::CVT_SS; break; case Intrinsic::convertsu: Code = ISD::CVT_SU; break; case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(I.getType()); Value* Op1 = I.getOperand(1); setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), getValue(I.getOperand(2)), getValue(I.getOperand(3)), Code)); return 0; } case Intrinsic::sqrt: setValue(&I, DAG.getNode(ISD::FSQRT, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1)))); return 0; case Intrinsic::powi: setValue(&I, DAG.getNode(ISD::FPOWI, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1)), getValue(I.getOperand(2)))); return 0; case Intrinsic::sin: setValue(&I, DAG.getNode(ISD::FSIN, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1)))); return 0; case Intrinsic::cos: setValue(&I, DAG.getNode(ISD::FCOS, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1)))); return 0; case Intrinsic::log: visitLog(I); return 0; case Intrinsic::log2: visitLog2(I); return 0; case Intrinsic::log10: visitLog10(I); return 0; case Intrinsic::exp: visitExp(I); return 0; case Intrinsic::exp2: visitExp2(I); return 0; case Intrinsic::pow: visitPow(I); return 0; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getOperand(1)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); return 0; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl, DAG.getVTList(MVT::i64, MVT::Other), &Op, 1); setValue(&I, Tmp); DAG.setRoot(Tmp.getValue(1)); return 0; } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1)))); return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getOperand(1)); EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg); setValue(&I, result); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getOperand(1)); EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg); setValue(&I, result); return 0; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getOperand(1)); EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg); setValue(&I, result); return 0; } case Intrinsic::stacksave: { SDValue Op = getRoot(); SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl, DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); setValue(&I, Tmp); DAG.setRoot(Tmp.getValue(1)); return 0; } case Intrinsic::stackrestore: { SDValue Tmp = getValue(I.getOperand(1)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp)); return 0; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(); SDValue Src = getValue(I.getOperand(1)); // The guard's value. AllocaInst *Slot = cast(I.getOperand(2)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI->setStackProtectorIndex(FI); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, PseudoSourceValue::getFixedStack(FI), 0, true); setValue(&I, Result); DAG.setRoot(Result); return 0; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. ConstantInt *CI = dyn_cast(I.getOperand(2)); assert(CI && "Non-constant type in __builtin_object_size?"); SDValue Arg = getValue(I.getOperand(0)); EVT Ty = Arg.getValueType(); if (CI->getZExtValue() < 2) setValue(&I, DAG.getConstant(-1ULL, Ty)); else setValue(&I, DAG.getConstant(0, Ty)); return 0; } case Intrinsic::var_annotation: // Discard annotate attributes return 0; case Intrinsic::init_trampoline: { const Function *F = cast(I.getOperand(2)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); Ops[1] = getValue(I.getOperand(1)); Ops[2] = getValue(I.getOperand(2)); Ops[3] = getValue(I.getOperand(3)); Ops[4] = DAG.getSrcValue(I.getOperand(1)); Ops[5] = DAG.getSrcValue(F); SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl, DAG.getVTList(TLI.getPointerTy(), MVT::Other), Ops, 6); setValue(&I, Tmp); DAG.setRoot(Tmp.getValue(1)); return 0; } case Intrinsic::gcroot: if (GFI) { Value *Alloca = I.getOperand(1); Constant *TypeMap = cast(I.getOperand(2)); FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); } return 0; case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); return 0; case Intrinsic::flt_rounds: { setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; } case Intrinsic::trap: { DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); return 0; } case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: return implVisitAluOverflow(I, ISD::SADDO); case Intrinsic::usub_with_overflow: return implVisitAluOverflow(I, ISD::USUBO); case Intrinsic::ssub_with_overflow: return implVisitAluOverflow(I, ISD::SSUBO); case Intrinsic::umul_with_overflow: return implVisitAluOverflow(I, ISD::UMULO); case Intrinsic::smul_with_overflow: return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { SDValue Ops[4]; Ops[0] = getRoot(); Ops[1] = getValue(I.getOperand(1)); Ops[2] = getValue(I.getOperand(2)); Ops[3] = getValue(I.getOperand(3)); DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); return 0; } case Intrinsic::memory_barrier: { SDValue Ops[6]; Ops[0] = getRoot(); for (int x = 1; x < 6; ++x) Ops[x] = getValue(I.getOperand(x)); DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); return 0; } case Intrinsic::atomic_cmp_swap: { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), getValue(I.getOperand(2)).getValueType().getSimpleVT(), Root, getValue(I.getOperand(1)), getValue(I.getOperand(2)), getValue(I.getOperand(3)), I.getOperand(1)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; } case Intrinsic::atomic_load_add: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD); case Intrinsic::atomic_load_sub: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB); case Intrinsic::atomic_load_or: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); case Intrinsic::atomic_load_xor: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); case Intrinsic::atomic_load_and: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); case Intrinsic::atomic_load_nand: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND); case Intrinsic::atomic_load_max: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); case Intrinsic::atomic_load_min: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); case Intrinsic::atomic_load_umin: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); case Intrinsic::atomic_load_umax: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); case Intrinsic::atomic_swap: return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); case Intrinsic::invariant_start: case Intrinsic::lifetime_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return 0; case Intrinsic::invariant_end: case Intrinsic::lifetime_end: // Discard region information. return 0; } } /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled /// between it and the return. /// /// This function only tests target-independent requirements. /// For target-dependent requirements, a target should override /// TargetLowering::IsEligibleForTailCallOptimization. /// static bool isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr, const TargetLowering &TLI) { const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast(Term); const Function *F = ExitBB->getParent(); // The block must end in a return statement or an unreachable. if (!Ret && !isa(Term)) return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. if (I->mayHaveSideEffects() || I->mayReadFromMemory() || !I->isSafeToSpeculativelyExecute()) for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; --BBI) { if (&*BBI == I) break; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !BBI->isSafeToSpeculativelyExecute()) return false; } // If the block ends with a void return or unreachable, it doesn't matter // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; // If the return value is undef, it doesn't matter what the call's // return type is. if (isa(Ret->getOperand(0))) return true; // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; // Otherwise, make sure the unmodified return value of I is the return value. for (const Instruction *U = dyn_cast(Ret->getOperand(0)); ; U = dyn_cast(U->getOperand(0))) { if (!U) return false; if (!U->hasOneUse()) return false; if (U == I) break; // Check for a truly no-op truncate. if (isa(U) && TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) continue; // Check for a truly no-op bitcast. if (isa(U) && (U->getOperand(0)->getType() == U->getType() || (isa(U->getOperand(0)->getType()) && isa(U->getType())))) continue; // Otherwise it's not a true no-op. return false; } return true; } void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { const PointerType *PT = cast(CS.getCalledValue()->getType()); const FunctionType *FTy = cast(PT->getElementType()); const Type *RetTy = FTy->getReturnType(); MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); unsigned BeginLabel = 0, EndLabel = 0; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); // Check whether the function can return without sret-demotion. SmallVector OutVTs; SmallVector OutsFlags; SmallVector Offsets; getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), OutVTs, OutsFlags, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), FTy->isVarArg(), OutVTs, OutsFlags, DAG); SDValue DemoteStackSlot; if (!CanLowerReturn) { uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( FTy->getReturnType()); unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; Entry.isZExt = false; Entry.isInReg = false; Entry.isSRet = true; Entry.isNest = false; Entry.isByVal = false; Entry.Alignment = Align; Args.push_back(Entry); RetTy = Type::getVoidTy(FTy->getContext()); } for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { SDValue ArgNode = getValue(*i); Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); unsigned attrInd = i - CS.arg_begin() + 1; Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } if (LandingPad && MMI) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI->NextLabelID(); // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), getControlRoot(), BeginLabel)); } // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI.LowerCallTo. if (isTailCall && !isInTailCallPosition(CS.getInstruction(), CS.getAttributes().getRetAttributes(), TLI)) isTailCall = false; std::pair Result = TLI.LowerCallTo(getRoot(), RetTy, CS.paramHasAttr(0, Attribute::SExt), CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), CS.getCallingConv(), isTailCall, !CS.getInstruction()->use_empty(), Callee, Args, DAG, getCurDebugLoc()); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); else if (!CanLowerReturn && Result.second.getNode()) { // The instruction result is the result of loading from the // hidden sret parameter. SmallVector PVTs; const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; unsigned NumValues = OutVTs.size(); SmallVector Values(NumValues); SmallVector Chains(NumValues); for (unsigned i = 0; i < NumValues; ++i) { SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)), NULL, Offsets[i], false, 1); Values[i] = L; Chains[i] = L.getValue(1); } SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues), &Values[0], NumValues)); } // As a special case, a null chain means that a tail call has // been emitted and the DAG root is already updated. if (Result.second.getNode()) DAG.setRoot(Result.second); else HasTailCall = true; if (LandingPad && MMI) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. EndLabel = MMI->NextLabelID(); DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. MMI->addInvoke(LandingPad, BeginLabel, EndLabel); } } void SelectionDAGLowering::visitCall(CallInst &I) { const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo(); if (II) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; } } if (unsigned IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; } } // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); if (Name == "copysign" || Name == "copysignf") { if (I.getNumOperands() == 3 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType() && I.getType() == I.getOperand(2)->getType()) { SDValue LHS = getValue(I.getOperand(1)); SDValue RHS = getValue(I.getOperand(2)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } } } else if (isa(I.getOperand(0))) { visitInlineAsm(&I); return; } SDValue Callee; if (!RenameFn) Callee = getValue(I.getOperand(0)); else Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); // Check if we can potentially perform a tail call. More detailed // checking is be done within LowerCallTo, after more information // about the call is known. bool isTailCall = PerformTailCallOpt && I.isTailCall(); LowerCallTo(&I, Callee, isTailCall); } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from /// this value and returns the result as a ValueVT value. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, SDValue *Flag) const { // Assemble the legal parts into the final values. SmallVector Values(ValueVTs.size()); SmallVector Parts; for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT); EVT RegisterVT = RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue P; if (Flag == 0) P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); else { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); *Flag = P.getValue(2); } Chain = P.getValue(1); // If the source register was virtual and if we know something about it, // add an assert node. if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && RegisterVT.isInteger() && !RegisterVT.isVector()) { unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); if (FLI.LiveOutRegInfo.size() > SlotNo) { FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI.NumSignBits; unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; EVT FromVT(MVT::Other); if (NumSignBits == RegSize) isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 else if (NumZeroBits >= RegSize-1) isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 else if (NumSignBits > RegSize-8) isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 else if (NumZeroBits >= RegSize-8) isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 else if (NumSignBits > RegSize-16) isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 else if (NumZeroBits >= RegSize-16) isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 else if (NumSignBits > RegSize-32) isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 else if (NumZeroBits >= RegSize-32) isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 if (FromVT != MVT::Other) { P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, RegisterVT, P, DAG.getValueType(FromVT)); } } } Parts[i] = P; } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, RegisterVT, ValueVT); Part += NumRegs; Parts.clear(); } return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(&ValueVTs[0], ValueVTs.size()), &Values[0], ValueVTs.size()); } /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, SDValue *Flag) const { // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); EVT RegisterVT = RegVTs[Value]; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT); Part += NumParts; } // Copy the parts into the registers. SmallVector Chains(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue Part; if (Flag == 0) Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); else { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); *Flag = Part.getValue(1); } Chains[i] = Part.getValue(0); } if (NumRegs == 1 || Flag) // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is // flagged to it. That is the CopyToReg nodes and the user are considered // a single scheduling unit. If we create a TokenFactor and return it as // chain, then the TokenFactor is both a predecessor (operand) of the // user as well as a successor (the TF operands are flagged to the user). // c1, f1 = CopyToReg // c2, f2 = CopyToReg // c3 = TokenFactor c1, c2 // ... // = op c3, ..., f2 Chain = Chains[NumRegs-1]; else Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); } /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,unsigned MatchingIdx, SelectionDAG &DAG, std::vector &Ops) const { EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); unsigned Flag = Code | (Regs.size() << 3); if (HasMatching) Flag |= 0x80000000 | (MatchingIdx << 16); Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy)); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); EVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); } } } /// isAllocatableRegister - If the specified register is safe to allocate, /// i.e. it isn't a stack pointer or some other special register, return the /// register class for the register. Otherwise, return null. static const TargetRegisterClass * isAllocatableRegister(unsigned Reg, MachineFunction &MF, const TargetLowering &TLI, const TargetRegisterInfo *TRI) { EVT FoundVT = MVT::Other; const TargetRegisterClass *FoundRC = 0; for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), E = TRI->regclass_end(); RCI != E; ++RCI) { EVT ThisVT = MVT::Other; const TargetRegisterClass *RC = *RCI; // If none of the the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); I != E; ++I) { if (TLI.isTypeLegal(*I)) { // If we have already found this register in a different register class, // choose the one with the largest VT specified. For example, on // PowerPC, we favor f64 register classes over f32. if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { ThisVT = *I; break; } } } if (ThisVT == MVT::Other) continue; // NOTE: This isn't ideal. In particular, this might allocate the // frame pointer in functions that need it (due to them not being taken // out of allocation, because a variable sized allocation hasn't been seen // yet). This is a slight code pessimization, but should still work. for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), E = RC->allocation_order_end(MF); I != E; ++I) if (*I == Reg) { // We found a matching register class. Keep looking at others in case // we find one with larger registers that this physreg is also in. FoundRC = RC; FoundVT = ThisVT; break; } } return FoundRC; } namespace llvm { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class VISIBILITY_HIDDEN SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. /// This gets modified as the asm is processed. SDValue CallOperand; /// AssignedRegs - If this is a register or register class operand, this /// contains the set of register corresponding to the operand. RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers /// busy in OutputRegs/InputRegs. void MarkAllocatedRegs(bool isOutReg, bool isInReg, std::set &OutputRegs, std::set &InputRegs, const TargetRegisterInfo &TRI) const { if (isOutReg) { for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); } if (isInReg) { for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); } } /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, const TargetData *TD) const { if (CallOperandVal == 0) return MVT::Other; if (isa(CallOperandVal)) return TLI.getPointerTy(); const llvm::Type *OpTy = CallOperandVal->getType(); // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) OpTy = cast(OpTy)->getElementType(); // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { unsigned BitSize = TD->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: case 8: case 16: case 32: case 64: case 128: OpTy = IntegerType::get(Context, BitSize); break; } } return TLI.getValueType(OpTy, true); } private: /// MarkRegAndAliases - Mark the specified register and all aliases in the /// specified set. static void MarkRegAndAliases(unsigned Reg, std::set &Regs, const TargetRegisterInfo &TRI) { assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); Regs.insert(Reg); if (const unsigned *Aliases = TRI.getAliasSet(Reg)) for (; *Aliases; ++Aliases) Regs.insert(*Aliases); } }; } // end llvm namespace. /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator handle the assignment process. However, if the asm uses /// features that we can't model on machineinstrs, we have SDISel do the /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. /// Input and OutputRegs are the set of already allocated physical registers. /// void SelectionDAGLowering:: GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set &OutputRegs, std::set &InputRegs) { LLVMContext &Context = FuncInfo.Fn->getContext(); // Compute whether this value requires an input register, an output register, // or both. bool isOutReg = false; bool isInReg = false; switch (OpInfo.Type) { case InlineAsm::isOutput: isOutReg = true; // If there is an input constraint that matches this, we need to reserve // the input register so no other inputs allocate to it. isInReg = OpInfo.hasMatchingInput(); break; case InlineAsm::isInput: isInReg = true; isOutReg = false; break; case InlineAsm::isClobber: isOutReg = true; isInReg = true; break; } MachineFunction &MF = DAG.getMachineFunction(); SmallVector Regs; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair PhysReg = TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { // If the input is a FP value and we want it in FP registers, do a // bitcast to the corresponding integer type. This turns an f64 value // into i64, which can be passed with two i32 values on a 32-bit // machine. RegVT = EVT::getIntegerVT(Context, OpInfo.ConstraintVT.getSizeInBits()); OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } } NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } EVT RegVT; EVT ValueVT = OpInfo.ConstraintVT; // If this is a constraint for a specific physical register, like {r17}, // assign it now. if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) ValueVT = *RC->vt_begin(); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. RegVT = *RC->vt_begin(); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); // If this is an expanded reference, add the rest of the regs to Regs. if (NumRegs != 1) { TargetRegisterClass::iterator I = RC->begin(); for (; *I != AssignedReg; ++I) assert(I != RC->end() && "Didn't find reg!"); // Already added the first reg. --NumRegs; ++I; for (; NumRegs; --NumRegs, ++I) { assert(I != RC->end() && "Ran out of registers to allocate!"); Regs.push_back(*I); } } OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; } // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { RegVT = *RC->vt_begin(); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; // Create the appropriate number of virtual registers. MachineRegisterInfo &RegInfo = MF.getRegInfo(); for (; NumRegs; --NumRegs) Regs.push_back(RegInfo.createVirtualRegister(RC)); OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); return; } // This is a reference to a register class that doesn't directly correspond // to an LLVM register class. Allocate NumRegs consecutive, available, // registers from the class. std::vector RegClassRegs = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); unsigned NumAllocated = 0; for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { unsigned Reg = RegClassRegs[i]; // See if this register is available. if ((isOutReg && OutputRegs.count(Reg)) || // Already used. (isInReg && InputRegs.count(Reg))) { // Already used. // Make sure we find consecutive registers. NumAllocated = 0; continue; } // Check to see if this register is allocatable (i.e. don't give out the // stack pointer). const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); if (!RC) { // Couldn't allocate this register. // Reset NumAllocated to make sure we return consecutive registers. NumAllocated = 0; continue; } // Okay, this register is good, we can use it. ++NumAllocated; // If we allocated enough consecutive registers, succeed. if (NumAllocated == NumRegs) { unsigned RegStart = (i-NumAllocated)+1; unsigned RegEnd = i+1; // Mark all of the allocated registers used. for (unsigned i = RegStart; i != RegEnd; ++i) Regs.push_back(RegClassRegs[i]); OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), OpInfo.ConstraintVT); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; } } // Otherwise, we couldn't allocate enough registers for this. } /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being /// processed uses a memory 'm' constraint. static bool hasInlineAsmMemConstraint(std::vector &CInfos, const TargetLowering &TLI) { for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { InlineAsm::ConstraintInfo &CI = CInfos[i]; for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); if (CType == TargetLowering::C_Memory) return true; } // Indirect operand accesses access memory. if (CI.isIndirect) return true; } return false; } /// visitInlineAsm - Handle a call to an InlineAsm object. /// void SelectionDAGLowering::visitInlineAsm(CallSite CS) { InlineAsm *IA = cast(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. std::vector ConstraintOperands; std::set OutputRegs, InputRegs; // Do a prepass over the constraints, canonicalizing them, and building up the // ConstraintOperands list. std::vector ConstraintInfos = IA->ParseConstraints(); bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); SDValue Chain, Flag; // We won't need to flush pending loads if this asm doesn't touch // memory and is nonvolatile. if (hasMemory || IA->hasSideEffects()) Chain = getRoot(); else Chain = DAG.getRoot(); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); EVT OpVT = MVT::Other; // Compute the value type for each operand. switch (OpInfo.Type) { case InlineAsm::isOutput: // Indirect outputs just consume an argument. if (OpInfo.isIndirect) { OpInfo.CallOperandVal = CS.getArgument(ArgNo++); break; } // The return value of the call is this value. As such, there is no // corresponding argument. assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && "Bad inline asm!"); if (const StructType *STy = dyn_cast(CS.getType())) { OpVT = TLI.getValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); OpVT = TLI.getValueType(CS.getType()); } ++ResNo; break; case InlineAsm::isInput: OpInfo.CallOperandVal = CS.getArgument(ArgNo++); break; case InlineAsm::isClobber: // Nothing to do. break; } // If this is an input or an indirect output, process the call argument. // BasicBlocks are labels, currently appearing only in asm's. if (OpInfo.CallOperandVal) { // Strip bitcasts, if any. This mostly comes up for functions. OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); if (BasicBlock *BB = dyn_cast(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD); } OpInfo.ConstraintVT = OpVT; } // Second pass over the constraints: compute which constraint option to use // and assign registers to constraints that want a specific physreg. for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (OpInfo.ConstraintVT.getSizeInBits() != Input.ConstraintVT.getSizeInBits())) { llvm_report_error("Unsupported asm: input constraint" " with a matching output constraint of incompatible" " type!"); } Input.ConstraintVT = OpInfo.ConstraintVT; } } // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { assert(OpInfo.Type == InlineAsm::isInput && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have // an indirect input, put it in the constpool if we can, otherwise spill // it to a stack slot. // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. Value *OpVal = OpInfo.CallOperandVal; if (isa(OpVal) || isa(OpVal) || isa(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast(OpVal), TLI.getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. const Type *Ty = OpVal->getType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), OpInfo.CallOperand, StackSlot, NULL, 0); OpInfo.CallOperand = StackSlot; } // There is no longer a Value* corresponding to this operand. OpInfo.CallOperandVal = 0; // It is now an indirect operand. OpInfo.isIndirect = true; } // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) GetRegistersForValue(OpInfo, OutputRegs, InputRegs); } ConstraintInfos.clear(); // Second pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) GetRegistersForValue(OpInfo, OutputRegs, InputRegs); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. std::vector AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other)); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; // IndirectStoresToEmit - The set of stores to emit after the inline asm node. std::vector > IndirectStoresToEmit; for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; switch (OpInfo.Type) { case InlineAsm::isOutput: { if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { // Memory output, or 'other' output (e.g. 'X' constraint). assert(OpInfo.isIndirect && "Memory output must be indirect operand"); // Add information to the INLINEASM node to know about this output. unsigned ResOpType = 4/*MEM*/ | (1<<3); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } // Otherwise, this is a register or register class output. // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { llvm_report_error("Couldn't allocate output reg for" " constraint '" + OpInfo.ConstraintCode + "'!"); } // If this is an indirect operand, store through the pointer after the // asm. if (OpInfo.isIndirect) { IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, OpInfo.CallOperandVal)); } else { // This is the result value of the call. assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && "Bad inline asm!"); // Concatenate this output onto the outputs list. RetValRegs.append(OpInfo.AssignedRegs); } // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? 6 /* EARLYCLOBBER REGDEF */ : 2 /* REGDEF */ , false, 0, DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? // If this is required to match an output register we have already set, // just use its register. unsigned OperandNo = OpInfo.getMatchedOperand(); // Scan until we find the definition we already emitted of this operand. // When we find it, create a RegsForValue operand. unsigned CurOp = 2; // The first operand. for (; OperandNo; --OperandNo) { // Advance to the next operand. unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); assert(((OpFlag & 7) == 2 /*REGDEF*/ || (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ || (OpFlag & 7) == 4 /*MEM*/) && "Skipped past definitions?"); CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; } unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); if ((OpFlag & 7) == 2 /*REGDEF*/ || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { llvm_report_error("Don't know how to handle tied indirect " "register inputs yet!"); } RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) MatchedRegs.Regs. push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); break; } else { assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } } if (OpInfo.ConstraintType == TargetLowering::C_Other) { assert(!OpInfo.isIndirect && "Don't know how to handle indirect other inputs yet!"); std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], hasMemory, Ops, DAG); if (Ops.empty()) { llvm_report_error("Invalid operand for inline asm" " constraint '" + OpInfo.ConstraintCode + "'!"); } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = 4/*MEM*/ | (1<<3); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); assert(!OpInfo.isIndirect && "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { llvm_report_error("Couldn't allocate input reg for" " constraint '"+ OpInfo.ConstraintCode +"'!"); } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, false, 0, DAG,AsmNodeOperands); break; } } } // Finish up input operands. AsmNodeOperands[0] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), DAG.getVTList(MVT::Other, MVT::Flag), &AsmNodeOperands[0], AsmNodeOperands.size()); Flag = Chain.getValue(1); // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, &Flag); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { EVT ResultType = TLI.getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can // contain multiple different value types. The preg or vreg allocated may // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && ResultType.isInteger() && Val.getValueType().isInteger()) { // If a result value was tied to an input value, the computed result may // have a wider width than the expected result. Extract the relevant // portion. Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); } assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); } setValue(CS.getInstruction(), Val); // Don't need to use this as a chain in this case. if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) return; } std::vector > StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } // Emit the non-flagged stores from the physregs. SmallVector OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), StoresToEmit[i].second, 0)); if (!OutChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &OutChains[0], OutChains.size()); DAG.setRoot(Chain); } void SelectionDAGLowering::visitVAStart(CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } void SelectionDAGLowering::visitVAArg(VAArgInst &I) { SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0))); setValue(&I, V); DAG.setRoot(V.getValue(1)); } void SelectionDAGLowering::visitVAEnd(CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } void SelectionDAGLowering::visitVACopy(CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), getValue(I.getOperand(2)), DAG.getSrcValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(2)))); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { assert((!isTailCall || PerformTailCallOpt) && "isTailCall set when tail-call optimizations are disabled!"); // Handle all of the outgoing arguments. SmallVector Outs; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = getTargetData()->getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); if (Args[i].isSExt) Flags.setSExt(); if (Args[i].isInReg) Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); if (Args[i].isByVal) { Flags.setByVal(); const PointerType *Ty = cast(Args[i].Ty); const Type *ElementTy = Ty->getElementType(); unsigned FrameAlign = getByValTypeAlignment(ElementTy); unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. if (Args[i].Alignment) FrameAlign = Args[i].Alignment; Flags.setByValAlign(FrameAlign); Flags.setByValSize(FrameSize); } if (Args[i].isNest) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); EVT PartVT = getRegisterType(RetTy->getContext(), VT); unsigned NumParts = getNumRegisters(RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (Args[i].isSExt) ExtendKind = ISD::SIGN_EXTEND; else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); Outs.push_back(MyFlags); } } } // Handle the incoming return values from the call. SmallVector Ins; SmallVector RetTys; ComputeValueVTs(*this, RetTy, RetTys); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; MyFlags.Used = isReturnValueUsed; if (RetSExt) MyFlags.Flags.setSExt(); if (RetZExt) MyFlags.Flags.setZExt(); if (isInreg) MyFlags.Flags.setInReg(); Ins.push_back(MyFlags); } } // Check if target-dependent constraints permit a tail call here. // Target-independent constraints should be checked by the caller. if (isTailCall && !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG)) isTailCall = false; SmallVector InVals; Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); assert((!isTailCall || InVals.empty()) && "LowerCall emitted a return value for a tail call!"); assert((isTailCall || InVals.size() == Ins.size()) && "LowerCall didn't emit the correct number of values!"); DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); assert(Ins[i].VT == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); }); // For a tail call, the return value is merely live-out and there aren't // any nodes in the DAG representing it. Return a special value to // indicate that a tail call has been emitted and no more Instructions // should be processed in the current block. if (isTailCall) { DAG.setRoot(Chain); return std::make_pair(SDValue(), SDValue()); } // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. ISD::NodeType AssertOp = ISD::DELETED_NODE; if (RetSExt) AssertOp = ISD::AssertSext; else if (RetZExt) AssertOp = ISD::AssertZext; SmallVector ReturnValues; unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); SDValue ReturnValue = getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT, AssertOp); ReturnValues.push_back(ReturnValue); CurReg += NumRegs; } // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In // that case, nothing will actualy look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), Chain); SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size()); return std::make_pair(Res, Chain); } void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) { SDValue Res = LowerOperation(SDValue(N, 0), DAG); if (Res.getNode()) Results.push_back(Res); } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { llvm_unreachable("LowerOperation not implemented for this target!"); return SDValue(); } void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { SDValue Op = getValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); PendingExports.push_back(Chain); } #include "llvm/CodeGen/SelectionDAGISel.h" void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDL->DAG; SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDL->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); SmallVector Ins; // Check whether the function can return without sret-demotion. SmallVector OutVTs; SmallVector OutsFlags; getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), OutVTs, OutsFlags, TLI); FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), OutVTs, OutsFlags, DAG); if (!FLI.CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]); ISD::InputArg RetArg(Flags, RegisterVT, true); Ins.push_back(RetArg); } // Set up the incoming argument description vector. unsigned Idx = 1; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = TD->getABITypeAlignment(ArgTy); if (F.paramHasAttr(Idx, Attribute::ZExt)) Flags.setZExt(); if (F.paramHasAttr(Idx, Attribute::SExt)) Flags.setSExt(); if (F.paramHasAttr(Idx, Attribute::InReg)) Flags.setInReg(); if (F.paramHasAttr(Idx, Attribute::StructRet)) Flags.setSRet(); if (F.paramHasAttr(Idx, Attribute::ByVal)) { Flags.setByVal(); const PointerType *Ty = cast(I->getType()); const Type *ElementTy = Ty->getElementType(); unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy); unsigned FrameSize = TD->getTypeAllocSize(ElementTy); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); Flags.setByValAlign(FrameAlign); Flags.setByValSize(FrameSize); } if (F.paramHasAttr(Idx, Attribute::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); Ins.push_back(MyFlags); } } } // Call the target to set up the argument values. SmallVector InVals; SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && "LowerFormalArguments didn't return a valid chain!"); assert(InVals.size() == Ins.size() && "LowerFormalArguments didn't emit the correct number of values!"); DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerFormalArguments emitted a null value!"); assert(Ins[i].VT == InVals[i].getValueType() && "LowerFormalArguments emitted a value with the wrong type!"); }); // Update the DAG with the new chain value resulting from argument lowering. DAG.setRoot(NewRoot); // Set up the argument values. unsigned i = 0; Idx = 1; if (!FLI.CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); EVT VT = ValueVTs[0]; EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, AssertOp); MachineFunction& MF = SDL->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); FLI.DemoteRegister = SRetReg; NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. // Idx indexes LLVM arguments. Don't touch it. ++i; } for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ArgValues; SmallVector ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); for (unsigned Value = 0; Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; if (F.paramHasAttr(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.paramHasAttr(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, AssertOp)); } i += NumParts; } if (!I->use_empty()) { SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues, SDL->getCurDebugLoc())); // If this argument is live outside of the entry block, insert a copy from // whereever we got it to the vreg that other BB's will reference it as. SDL->CopyToExportRegsIfNeeded(I); } } assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. // FIXME: this should insert code into the DAG! EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction()); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to /// ensure constants are generated when needed. Remember the virtual registers /// that need to be added to the Machine PHI nodes as input. We cannot just /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. /// void SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB)) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); PHINode *PN; // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::iterator I = SuccBB->begin(); (PN = dyn_cast(I)); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; unsigned Reg; Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); if (Constant *C = dyn_cast(PHIOp)) { unsigned &RegOut = SDL->ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo->CreateRegForValue(C); SDL->CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { Reg = FuncInfo->ValueMap[PHIOp]; if (Reg == 0) { assert(isa(PHIOp) && FuncInfo->StaticAllocaMap.count(cast(PHIOp)) && "Didn't codegen value into a register!??"); Reg = FuncInfo->CreateRegForValue(PHIOp); SDL->CopyValueToVirtualRegister(PHIOp, Reg); } } // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector ValueVTs; ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } SDL->ConstantsOut.clear(); } /// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only /// supports legal types, and it emits MachineInstrs directly instead of /// creating SelectionDAG nodes. /// bool SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, FastISel *F) { TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB)) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); PHINode *PN; // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::iterator I = SuccBB->begin(); (PN = dyn_cast(I)); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its // own moves. Second, this check is necessary becuase FastISel doesn't // use CreateRegForValue to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Promote MVT::i1. if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT); else { SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); unsigned Reg = F->getRegForValue(PHIOp); if (Reg == 0) { SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); } } return true; }