diff --git a/lib/Target/PTX/AsmPrinter/PTXAsmPrinter.cpp b/lib/Target/PTX/AsmPrinter/PTXAsmPrinter.cpp index 1e76f28f659..90dc6b91485 100644 --- a/lib/Target/PTX/AsmPrinter/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/AsmPrinter/PTXAsmPrinter.cpp @@ -32,6 +32,8 @@ namespace { virtual void EmitInstruction(const MachineInstr *MI); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + // autogen'd. void printInstruction(const MachineInstr *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); @@ -40,10 +42,27 @@ namespace { void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> str; - raw_svector_ostream os(str); - printInstruction(MI, os); - os << ';'; - OutStreamer.EmitRawText(os.str()); + raw_svector_ostream OS(str); + printInstruction(MI, OS); + OS << ';'; + OutStreamer.EmitRawText(OS.str()); +} + +void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + default: + llvm_unreachable(""); + break; + case MachineOperand::MO_Register: + OS << getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + OS << (int) MO.getImm(); + break; + } } #include "PTXGenAsmWriter.inc" diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index d38abf1a3c9..6e68c376018 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -11,9 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "PTX.h" #include "PTXISelLowering.h" #include "PTXRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -22,7 +25,8 @@ using namespace llvm; PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. - addRegisterClass(MVT::i1, PTX::PredsRegisterClass); + addRegisterClass(MVT::i1, PTX::PredsRegisterClass); + addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass); // Compute derived properties from the register classes computeRegisterProperties(); @@ -40,6 +44,57 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { // Calling Convention Implementation //===----------------------------------------------------------------------===// +static struct argmap_entry { + MVT::SimpleValueType VT; + TargetRegisterClass *RC; + TargetRegisterClass::iterator loc; + + argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC) + : VT(_VT), RC(_RC), loc(_RC->begin()) {} + + void reset(void) { loc = RC->begin(); } + bool operator==(MVT::SimpleValueType _VT) { return VT == _VT; } +} argmap[] = { + argmap_entry(MVT::i1, PTX::PredsRegisterClass), + argmap_entry(MVT::i32, PTX::RRegs32RegisterClass) +}; + +static SDValue lower_kernel_argument(int i, + SDValue Chain, + DebugLoc dl, + MVT::SimpleValueType VT, + argmap_entry *entry, + SelectionDAG &DAG, + unsigned *argreg) { + // TODO + llvm_unreachable("Not implemented yet"); +} + +static SDValue lower_device_argument(int i, + SDValue Chain, + DebugLoc dl, + MVT::SimpleValueType VT, + argmap_entry *entry, + SelectionDAG &DAG, + unsigned *argreg) { + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + + unsigned preg = *++(entry->loc); // allocate start from register 1 + unsigned vreg = RegInfo.createVirtualRegister(entry->RC); + RegInfo.addLiveIn(preg, vreg); + + *argreg = preg; + return DAG.getCopyFromReg(Chain, dl, vreg, VT); +} + +typedef SDValue (*lower_argument_func)(int i, + SDValue Chain, + DebugLoc dl, + MVT::SimpleValueType VT, + argmap_entry *entry, + SelectionDAG &DAG, + unsigned *argreg); + SDValue PTXTargetLowering:: LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -48,6 +103,40 @@ SDValue PTXTargetLowering:: DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { + if (isVarArg) llvm_unreachable("PTX does not support varargs"); + + lower_argument_func lower_argument; + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + break; + case CallingConv::PTX_Kernel: + lower_argument = lower_kernel_argument; + break; + case CallingConv::PTX_Device: + lower_argument = lower_device_argument; + break; + } + + // Reset argmap before allocation + for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap); + i != e; ++ i) + i->reset(); + + for (int i = 0, e = Ins.size(); i != e; ++ i) { + MVT::SimpleValueType VT = Ins[i].VT.getSimpleVT().SimpleTy; + + struct argmap_entry *entry = std::find(argmap, + argmap + array_lengthof(argmap), VT); + if (entry == argmap + array_lengthof(argmap)) + llvm_unreachable("Type of argument is not supported"); + + unsigned reg; + SDValue arg = lower_argument(i, Chain, dl, VT, entry, DAG, ®); + InVals.push_back(arg); + } + return Chain; } @@ -59,7 +148,7 @@ SDValue PTXTargetLowering:: const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { - assert(!isVarArg && "PTX does not support var args."); + if (isVarArg) llvm_unreachable("PTX does not support varargs"); switch (CallConv) { default: @@ -74,10 +163,26 @@ SDValue PTXTargetLowering:: // PTX_Device + // return void if (Outs.size() == 0) return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); - // TODO: allocate return register + assert(Outs[0].VT == MVT::i32 && "Can return only basic types"); + SDValue Flag; + unsigned reg = PTX::R0; + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) + DAG.getMachineFunction().getRegInfo().addLiveOut(reg); + + // Copy the result values into the output registers + Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad + Flag = Chain.getValue(1); + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); } diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 59979dc8dfb..805759bcab1 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "PTX.h" #include "PTXInstrInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -20,3 +22,66 @@ using namespace llvm; PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)), RI(_TM, *this), TM(_TM) {} + +static const struct map_entry { + const TargetRegisterClass *cls; + const int opcode; +} map[] = { + { &PTX::RRegs32RegClass, PTX::MOVrr }, + { &PTX::PredsRegClass, PTX::MOVpp } +}; + +void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const { + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) + if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, I, DL, + get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + llvm_unreachable("Impossible reg-to-reg copy"); +} + +bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { + if (DstRC != SrcRC) + return false; + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) + if (DstRC == map[i].cls) { + MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode), + DstReg).addReg(SrcReg); + if (MI->findFirstPredOperandIdx() == -1) { + MI->addOperand(MachineOperand::CreateReg(0, false)); + MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0)); + } + return true; + } + + return false; +} + +bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + switch (MI.getOpcode()) { + default: + return false; + case PTX::MOVpp: + case PTX::MOVrr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && + "Invalid register-register move instruction"); + SrcSubIdx = DstSubIdx = 0; // No sub-registers + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } +} diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h index 3832567eb44..9d9ffe1d23a 100644 --- a/lib/Target/PTX/PTXInstrInfo.h +++ b/lib/Target/PTX/PTXInstrInfo.h @@ -29,6 +29,22 @@ class PTXInstrInfo : public TargetInstrInfoImpl { explicit PTXInstrInfo(PTXTargetMachine &_TM); virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; + + virtual bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; }; // class PTXInstrInfo } // namespace llvm diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 3698c9dfc40..e5dd3341753 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -30,6 +30,27 @@ def PTXret // Instructions //===----------------------------------------------------------------------===// +///===- Data Movement and Conversion Instructions -------------------------===// + +let neverHasSideEffects = 1 in { + // rely on isMoveInstr to separate MOVpp, MOVrr, etc. + def MOVpp + : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; + def MOVrr + : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.s32\t$d, $a", []>; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVpi + : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a", + [(set Preds:$d, imm:$a)]>; + def MOVri + : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a", + [(set RRegs32:$d, imm:$a)]>; +} + +///===- Control Flow Instructions -----------------------------------------===// + let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h index 56d75a39ebb..a658c9258f0 100644 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ b/lib/Target/PTX/PTXRegisterInfo.h @@ -40,10 +40,11 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo { virtual bool hasFP(const MachineFunction &MF) const { return false; } - // FIXME: Given that PTX does not support stack frame, what should we do here? virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, - RegScavenger *RS = NULL) const {} + RegScavenger *RS = NULL) const { + llvm_unreachable("PTX does not support general function call"); + } virtual void emitPrologue(MachineFunction &MF) const {} virtual void emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 9346b7193e9..22e2b343a0e 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -52,6 +52,39 @@ def P29 : PTXReg<"p29">; def P30 : PTXReg<"p30">; def P31 : PTXReg<"p31">; +def R0 : PTXReg<"r0">; +def R1 : PTXReg<"r1">; +def R2 : PTXReg<"r2">; +def R3 : PTXReg<"r3">; +def R4 : PTXReg<"r4">; +def R5 : PTXReg<"r5">; +def R6 : PTXReg<"r6">; +def R7 : PTXReg<"r7">; +def R8 : PTXReg<"r8">; +def R9 : PTXReg<"r9">; +def R10 : PTXReg<"r10">; +def R11 : PTXReg<"r11">; +def R12 : PTXReg<"r12">; +def R13 : PTXReg<"r13">; +def R14 : PTXReg<"r14">; +def R15 : PTXReg<"r15">; +def R16 : PTXReg<"r16">; +def R17 : PTXReg<"r17">; +def R18 : PTXReg<"r18">; +def R19 : PTXReg<"r19">; +def R20 : PTXReg<"r20">; +def R21 : PTXReg<"r21">; +def R22 : PTXReg<"r22">; +def R23 : PTXReg<"r23">; +def R24 : PTXReg<"r24">; +def R25 : PTXReg<"r25">; +def R26 : PTXReg<"r26">; +def R27 : PTXReg<"r27">; +def R28 : PTXReg<"r28">; +def R29 : PTXReg<"r29">; +def R30 : PTXReg<"r30">; +def R31 : PTXReg<"r31">; + //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// @@ -61,3 +94,9 @@ def Preds : RegisterClass<"PTX", [i1], 8, P8, P9, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31]>; + +def RRegs32 : RegisterClass<"PTX", [i32], 32, + [R0, R1, R2, R3, R4, R5, R6, R7, + R8, R9, R10, R11, R12, R13, R14, R15, + R16, R17, R18, R19, R20, R21, R22, R23, + R24, R25, R26, R27, R28, R29, R30, R31]>;