diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index 657279106e0..49045cdc01e 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -31,7 +31,7 @@ namespace llvm { }; enum Predicate { - PRED_IGNORE = 0, + PRED_NORMAL = 0, PRED_NEGATE = 1 }; } // namespace PTX diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 8b5ed4501fb..087d4f98c48 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -410,7 +410,7 @@ printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n"); - if (reg && predOp != PTX::PRED_IGNORE) { + if (reg != PTX::NoRegister) { O << '@'; if (predOp == PTX::PRED_NEGATE) O << '!'; diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 8c2145d26c7..0aebc820521 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -100,10 +100,7 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { int i = MI->findFirstPredOperandIdx(); - if (i == -1) - llvm_unreachable("missing predicate operand"); - return MI->getOperand(i).getReg() || - MI->getOperand(i+1).getImm() != PTX::PRED_IGNORE; + return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister; } bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { @@ -143,7 +140,29 @@ DefinesPredicate(MachineInstr *MI, // If the specified instruction defines any predicate or condition code // register(s) used for predication, returns true as well as the definition // predicate(s) by reference. - return false; + + switch (MI->getOpcode()) { + default: + return false; + case PTX::SETPEQu32rr: + case PTX::SETPEQu32ri: + case PTX::SETPNEu32rr: + case PTX::SETPNEu32ri: + case PTX::SETPLTu32rr: + case PTX::SETPLTu32ri: + case PTX::SETPLEu32rr: + case PTX::SETPLEu32ri: + case PTX::SETPGTu32rr: + case PTX::SETPGTu32ri: + case PTX::SETPGEu32rr: + case PTX::SETPGEu32ri: { + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isReg() && RI.getRegClass(MO.getReg()) == &PTX::PredsRegClass); + Pred.push_back(MO); + Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + return true; + } + } } // static helper routines @@ -151,8 +170,8 @@ DefinesPredicate(MachineInstr *MI, MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { - SDValue predReg = DAG->getRegister(0, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_IGNORE, MVT::i1); + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); SDValue ops[] = { Op1, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -160,8 +179,8 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { - SDValue predReg = DAG->getRegister(0, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_IGNORE, MVT::i1); + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); SDValue ops[] = { Op1, Op2, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -169,6 +188,6 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { if (MI->findFirstPredOperandIdx() == -1) { MI->addOperand(MachineOperand::CreateReg(0, /*IsDef=*/false)); - MI->addOperand(MachineOperand::CreateImm(PTX::PRED_IGNORE)); + MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); } } diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 065799430c0..a5f82f1bf38 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -269,6 +269,18 @@ multiclass INT3ntnc { [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>; } +multiclass PTX_SETP { + def rr + : InstPTX<(outs Preds:$d), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$d, $a, $b"), + [(set Preds:$d, (setcc RC:$a, RC:$b, cmp))]>; + def ri + : InstPTX<(outs Preds:$d), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$d, $a, $b"), + [(set Preds:$d, (setcc RC:$a, imm:$b, cmp))]>; +} + multiclass PTX_LD { def rr32 : InstPTX<(outs RC:$d), (ins MEMri32:$a), @@ -343,6 +355,11 @@ multiclass PTX_ST_ALL { // Instructions //===----------------------------------------------------------------------===// +///===- Integer Arithmetic Instructions -----------------------------------===// + +defm ADD : INT3<"add", add>; +defm SUB : INT3<"sub", sub>; + ///===- Floating-Point Arithmetic Instructions ----------------------------===// // Standard Binary Operations @@ -397,12 +414,14 @@ def FDIVri64SM10 : InstPTX<(outs RRegf64:$d), // TODO: Allow the rounding mode to be selectable through llc. defm FMAD : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>; +///===- Comparison and Selection Instructions -----------------------------===// - -///===- Integer Arithmetic Instructions -----------------------------------===// - -defm ADD : INT3<"add", add>; -defm SUB : INT3<"sub", sub>; +defm SETPEQu32 : PTX_SETP; +defm SETPNEu32 : PTX_SETP; +defm SETPLTu32 : PTX_SETP; +defm SETPLEu32 : PTX_SETP; +defm SETPGTu32 : PTX_SETP; +defm SETPGEu32 : PTX_SETP; ///===- Logic and Shift Instructions --------------------------------------===// @@ -475,6 +494,10 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>; // defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; // TODO: Do something with st.param if/when it is needed. +def CVT_u32_pred + : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", + [(set RRegu32:$d, (zext Preds:$a))]>; + ///===- Control Flow Instructions -----------------------------------------===// let isReturn = 1, isTerminator = 1, isBarrier = 1 in { diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll new file mode 100644 index 00000000000..7f8b996fd0a --- /dev/null +++ b/test/CodeGen/PTX/setp.ll @@ -0,0 +1,109 @@ +; RUN: llc < %s -march=ptx | FileCheck %s + +define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.eq.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp eq i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.ne.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ne i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.lt.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ult i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.le.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ule i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.gt.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ugt i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.ge.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp uge i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { +; CHECK: setp.eq.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp eq i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { +; CHECK: setp.ne.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ne i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { +; CHECK: setp.eq.u32 p0, r1, 0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ult i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { +; CHECK: setp.lt.u32 p0, r1, 2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ule i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { +; CHECK: setp.gt.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ugt i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { +; CHECK: setp.ne.u32 p0, r1, 0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp uge i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +}