diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 75fec7bd1d4..ac324794816 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1827,11 +1827,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); - if (hasOVF) { EVT OvfVT = getSetCCResultType(NVT); SDVTList VTList = DAG.getVTList(NVT, OvfVT); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; @@ -1864,13 +1863,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); - - if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { - SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); - return; - } - SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); @@ -1885,14 +1877,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - - SDValue Borrow; - if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) - Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); - else - Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); - + SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 02102d6b22f..a6ec9fb2e59 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -364,18 +364,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); - if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) { - setOperationAction(ISD::ADDC, MVT::i32, Expand); - setOperationAction(ISD::ADDE, MVT::i32, Expand); - } - - setOperationAction(ISD::ADDC, MVT::i64, Expand); - setOperationAction(ISD::ADDE, MVT::i64, Expand); - setOperationAction(ISD::SUBC, MVT::i32, Expand); - setOperationAction(ISD::SUBE, MVT::i32, Expand); - setOperationAction(ISD::SUBC, MVT::i64, Expand); - setOperationAction(ISD::SUBE, MVT::i64, Expand); - // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); @@ -481,7 +469,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::SHL); @@ -936,130 +923,14 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, } } -static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, - const MipsSubtarget &Subtarget) { - // ROOTNode must have a multiplication as an operand for the match to be - // successful. - if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL && - ROOTNode->getOperand(1).getOpcode() != ISD::MUL) - return SDValue(); - - // We don't handle vector types here. - if (ROOTNode->getValueType(0).isVector()) - return SDValue(); - - // For MIPS64, madd / msub instructions are inefficent to use with 64 bit - // arithmetic. E.g. - // (add (mul a b) c) => - // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in - // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32) - // or - // MIPS64R2: (dins (mflo res) (mfhi res) 32 32) - // - // The overhead of setting up the Hi/Lo registers and reassembling the - // result makes this a dubious optimzation for MIPS64. The core of the - // problem is that Hi/Lo contain the upper and lower 32 bits of the - // operand and result. - // - // It requires a chain of 4 add/mul for MIPS64R2 to get better code - // density than doing it naively, 5 for MIPS64. Additionally, using - // madd/msub on MIPS64 requires the operands actually be 32 bit sign - // extended operands, not true 64 bit values. - // - // FIXME: For the moment, disable this completely for MIPS64. - if (Subtarget.hasMips64()) - return SDValue(); - - SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL - ? ROOTNode->getOperand(0) - : ROOTNode->getOperand(1); - - SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL - ? ROOTNode->getOperand(1) - : ROOTNode->getOperand(0); - - // Transform this to a MADD only if the user of this node is the add. - // If there are other users of the mul, this function returns here. - if (!Mult.hasOneUse()) - return SDValue(); - - // maddu and madd are unusual instructions in that on MIPS64 bits 63..31 - // must be in canonical form, i.e. sign extended. For MIPS32, the operands - // of the multiply must have 32 or more sign bits, otherwise we cannot - // perform this optimization. We have to check this here as we're performing - // this optimization pre-legalization. - SDValue MultLHS = Mult->getOperand(0); - SDValue MultRHS = Mult->getOperand(1); - unsigned LHSSB = CurDAG.ComputeNumSignBits(MultLHS); - unsigned RHSSB = CurDAG.ComputeNumSignBits(MultRHS); - - if (LHSSB < 32 || RHSSB < 32) - return SDValue(); - - APInt HighMask = - APInt::getHighBitsSet(Mult->getValueType(0).getScalarSizeInBits(), 32); - bool IsUnsigned = CurDAG.MaskedValueIsZero(Mult->getOperand(0), HighMask) && - CurDAG.MaskedValueIsZero(Mult->getOperand(1), HighMask) && - CurDAG.MaskedValueIsZero(AddOperand, HighMask); - - // Initialize accumulator. - SDLoc DL(ROOTNode); - SDValue TopHalf; - SDValue BottomHalf; - BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, - CurDAG.getIntPtrConstant(0, DL)); - - TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, - CurDAG.getIntPtrConstant(1, DL)); - SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - BottomHalf, - TopHalf); - - // Create MipsMAdd(u) / MipsMSub(u) node. - bool IsAdd = ROOTNode->getOpcode() == ISD::ADD; - unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd) - : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub); - SDValue MAddOps[3] = { - CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)), - CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn}; - EVT VTs[2] = {MVT::i32, MVT::i32}; - SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); - - SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); - SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); - SDValue Combined = - CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi); - return Combined; -} - -static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - // (sub v0 (mul v1, v2)) => (msub v1, v2, v0) - if (DCI.isBeforeLegalizeOps()) { - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) - return performMADD_MSUBCombine(N, DAG, Subtarget); - - return SDValue(); - } - - return SDValue(); -} - static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { - // (add v0 (mul v1, v2)) => (madd v1, v2, v0) - if (DCI.isBeforeLegalizeOps()) { - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) - return performMADD_MSUBCombine(N, DAG, Subtarget); - - return SDValue(); - } - // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) @@ -1187,8 +1058,6 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performAssertZextCombine(N, DAG, DCI, Subtarget); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); - case ISD::SUB: - return performSUBCombine(N, DAG, DCI, Subtarget); } return SDValue(); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 4be26dd25dc..49ae6dd4cd3 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -245,64 +245,46 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { } } -void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { - SDValue InFlag = Node->getOperand(2); - unsigned Opc = InFlag.getOpcode(); +void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, + SDValue CmpLHS, const SDLoc &DL, + SDNode *Node) const { + unsigned Opc = InFlag.getOpcode(); (void)Opc; + + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; + if (Subtarget->isGP64bit()) { + SLTuOp = Mips::SLTu64; + ADDuOp = Mips::DADDu; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - // In the base case, we can rely on the carry bit from the addsc - // instruction. - if (Opc == ISD::ADDC) { - SDValue Ops[3] = {LHS, RHS, InFlag}; - CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops); - return; + SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); + + if (Subtarget->isGP64bit()) { + // On 64-bit targets, sltu produces an i64 but our backend currently says + // that SLTu64 produces an i32. We need to fix this in the long run but for + // now, just make the DAG type-correct by asserting the upper bits are zero. + Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, + CurDAG->getTargetConstant(0, DL, VT), + SDValue(Carry, 0), + CurDAG->getTargetConstant(Mips::sub_32, DL, + VT)); } - assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!"); + // Generate a second addition only if we know that RHS is not a + // constant-zero node. + SDNode *AddCarry = Carry; + ConstantSDNode *C = dyn_cast(RHS); + if (!C || C->getZExtValue()) + AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); - // The more complex case is when there is a chain of ISD::ADDE nodes like: - // (adde (adde (adde (addc a b) c) d) e). - // - // The addwc instruction does not write to the carry bit, instead it writes - // to bit 20 of the dsp control register. To match this series of nodes, each - // intermediate adde node must be expanded to write the carry bit before the - // addition. - - // Start by reading the overflow field for addsc and moving the value to the - // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP - // corresponds to reading/writing the entire control register to/from a GPR. - - SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32); - - SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32); - - SDNode *DSPCtrlField = - CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag); - - SDNode *Carry = CurDAG->getMachineNode( - Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne); - - SDValue Ops[4] = {SDValue(DSPCtrlField, 0), - CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne, - SDValue(Carry, 0)}; - SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); - - // My reading of the the MIPS DSP 3.01 specification isn't as clear as I - // would like about whether bit 20 always gets overwritten by addwc. - // Hence take an extremely conservative view and presume it's sticky. We - // therefore need to clear it. - - SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); - - SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)}; - SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps); - - SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue, - SDValue(DSPCtrlFinal, 0), CstOne); - - SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)}; - CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands); + CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); } /// Match frameindex @@ -783,8 +765,19 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { switch(Opcode) { default: break; + case ISD::SUBE: { + SDValue InFlag = Node->getOperand(2); + unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu; + selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node); + return true; + } + case ISD::ADDE: { - selectAddE(Node, DL); + if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. + break; + SDValue InFlag = Node->getOperand(2); + unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu; + selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node); return true; } diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 6f38289c5a4..f89a350cab0 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -41,7 +41,8 @@ private: const SDLoc &dl, EVT Ty, bool HasLo, bool HasHi); - void selectAddE(SDNode *Node, const SDLoc &DL) const; + void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, + const SDLoc &DL, SDNode *Node) const; bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index b57bceb3c83..06a97b9d123 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -179,6 +179,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); + setTargetDAGCombine(ISD::ADDE); + setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -419,6 +421,163 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, return MipsTargetLowering::LowerOperation(Op, DAG); } +// selectMADD - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc multLo, Lo0), (adde multHi, Hi0), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { + // ADDENode's second operand must be a flag output of an ADDC node in order + // for the matching to be successful. + SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); + + if (ADDCNode->getOpcode() != ISD::ADDC) + return false; + + SDValue MultHi = ADDENode->getOperand(0); + SDValue MultLo = ADDCNode->getOperand(0); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MADD only if ADDENode and ADDCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than ADDENode or ADDCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MADD instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + SDLoc DL(ADDENode); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + ADDCNode->getOperand(1), + ADDENode->getOperand(1)); + + // create MipsMAdd(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; + + SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of adde and addc here + if (!SDValue(ADDCNode, 0).use_empty()) { + SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); + } + if (!SDValue(ADDENode, 0).use_empty()) { + SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); + } + + return true; +} + +// selectMSUB - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc Lo0, multLo), (sube Hi0, multHi), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { + // SUBENode's second operand must be a flag output of an SUBC node in order + // for the matching to be successful. + SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); + + if (SUBCNode->getOpcode() != ISD::SUBC) + return false; + + SDValue MultHi = SUBENode->getOperand(1); + SDValue MultLo = SUBCNode->getOperand(1); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MSUB only if SUBENode and SUBCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than SUBENode or SUBCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MSUB instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + SDLoc DL(SUBENode); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + SUBCNode->getOperand(0), + SUBENode->getOperand(0)); + + // create MipsSub(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; + + SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of sube and subc here + if (!SDValue(SUBCNode, 0).use_empty()) { + SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); + } + if (!SDValue(SUBENode, 0).use_empty()) { + SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); + } + + return true; +} + +static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT // // Performs the following transformations: @@ -661,6 +820,19 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && + selectMSUB(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Clear the upper (64 - VT.sizeInBits) bits. @@ -938,12 +1110,16 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SDValue Val; switch (N->getOpcode()) { + case ISD::ADDE: + return performADDECombine(N, DAG, DCI, Subtarget); case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; case ISD::OR: Val = performORCombine(N, DAG, DCI, Subtarget); break; + case ISD::SUBE: + return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: return performMULCombine(N, DAG, DCI, this); case ISD::SHL: diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll index 5e6092fc784..c61e1cdedea 100644 --- a/test/CodeGen/Mips/2008-06-05-Carry.ll +++ b/test/CodeGen/Mips/2008-06-05-Carry.ll @@ -2,21 +2,20 @@ define i64 @add64(i64 %u, i64 %v) nounwind { entry: -; CHECK-LABEL: add64: ; CHECK: addu -; CHECK-DAG: sltu -; CHECK-DAG: addu +; CHECK: sltu ; CHECK: addu - %tmp2 = add i64 %u, %v +; CHECK: addu + %tmp2 = add i64 %u, %v ret i64 %tmp2 } define i64 @sub64(i64 %u, i64 %v) nounwind { entry: -; CHECK-LABEL: sub64 -; CHECK-DAG: sltu -; CHECK-DAG: subu +; CHECK: sub64 ; CHECK: subu +; CHECK: sltu +; CHECK: addu ; CHECK: subu %tmp2 = sub i64 %u, %v ret i64 %tmp2 diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll index 250d3eff37d..837c0d8bfc5 100644 --- a/test/CodeGen/Mips/dsp-patterns.ll +++ b/test/CodeGen/Mips/dsp-patterns.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dsp < %s | FileCheck %s -check-prefix=R1 -; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2 +; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=R1 +; RUN: llc -march=mips -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2 ; R1-LABEL: test_lbux: ; R1: lbux ${{[0-9]+}} diff --git a/test/CodeGen/Mips/llcarry.ll b/test/CodeGen/Mips/llcarry.ll index b7cc6fc8ea7..fcf12942023 100644 --- a/test/CodeGen/Mips/llcarry.ll +++ b/test/CodeGen/Mips/llcarry.ll @@ -14,9 +14,9 @@ entry: %add = add nsw i64 %1, %0 store i64 %add, i64* @k, align 8 ; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} -; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} ; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} -; 16: move ${{[0-9]+}}, $24 +; 16: move ${{[0-9]+}}, $t8 +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} ; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} ret void } @@ -28,8 +28,8 @@ entry: %sub = sub nsw i64 %0, %1 ; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} ; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} -; 16: move ${{[0-9]+}}, $24 -; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} +; 16: move ${{[0-9]+}}, $t8 +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} ; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} store i64 %sub, i64* @l, align 8 ret void @@ -41,7 +41,8 @@ entry: %add = add nsw i64 %0, 15 ; 16: addiu ${{[0-9]+}}, 15 ; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} -; 16: move ${{[0-9]+}}, $24 +; 16: move ${{[0-9]+}}, $t8 +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} ; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} store i64 %add, i64* @m, align 8 ret void diff --git a/test/CodeGen/Mips/llvm-ir/add.ll b/test/CodeGen/Mips/llvm-ir/add.ll index 63884eb03b8..a5ecdda94ce 100644 --- a/test/CodeGen/Mips/llvm-ir/add.ll +++ b/test/CodeGen/Mips/llvm-ir/add.ll @@ -1,35 +1,35 @@ ; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ -; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32,PRE4 +; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32 ; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ -; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32,GP32-CMOV +; RUN: -check-prefixes=ALL,NOT-R2-R6,GP32 ; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ -; RUN: -check-prefixes=ALL,R2-R6,GP32,GP32-CMOV +; RUN: -check-prefixes=ALL,R2-R6,GP32 ; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \ -; RUN: -check-prefixes=ALL,R2-R6,GP32,GP32-CMOV +; RUN: -check-prefixes=ALL,R2-R6,GP32 ; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \ -; RUN: -check-prefixes=ALL,R2-R6,GP32,GP32-CMOV +; RUN: -check-prefixes=ALL,R2-R6,GP32 ; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ ; RUN: -check-prefixes=ALL,R2-R6,GP32 ; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ -; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64,GP64-NOT-R2-R6 +; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64 ; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ -; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64,GP64-NOT-R2-R6 +; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64 ; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ -; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64,GP64-NOT-R2-R6 +; RUN: -check-prefixes=ALL,NOT-R2-R6,GP64 ; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ -; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6 +; RUN: -check-prefixes=ALL,R2-R6,GP64 ; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \ -; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6 +; RUN: -check-prefixes=ALL,R2-R6,GP64 ; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \ -; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6 +; RUN: -check-prefixes=ALL,R2-R6,GP64 ; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ -; RUN: -check-prefixes=ALL,R2-R6,GP64,GP64-R2-R6 +; RUN: -check-prefixes=ALL,R2-R6,GP64 ; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips -O2 -verify-machineinstrs | FileCheck %s \ -; RUN: -check-prefixes=ALL,MMR3,MM32 +; RUN: -check-prefixes=ALL,MMR6,MM32 ; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips -O2 | FileCheck %s \ ; RUN: -check-prefixes=ALL,MMR6,MM32 ; RUN: llc < %s -march=mips -mcpu=mips64r6 -target-abi n64 -mattr=+micromips -O2 | FileCheck %s \ -; RUN: -check-prefixes=ALL,MM64 +; RUN: -check-prefixes=ALL,MMR6,MM64 ; FIXME: This code sequence is inefficient as it should be 'subu $[[T0]], $zero, $[[T0]'. @@ -110,17 +110,17 @@ define signext i64 @add_i64(i64 signext %a, i64 signext %b) { entry: ; ALL-LABEL: add_i64: - ; GP32-DAG: addu $[[T0:[0-9]+]], $4, $6 - ; GP32-DAG: addu $3, $5, $7 - ; GP32: sltu $[[T1:[0-9]+]], $3, $5 - ; GP32: addu $2, $[[T0]], $[[T1]] + ; GP32: addu $3, $5, $7 + ; GP32: sltu $[[T0:[0-9]+]], $3, $7 + ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP32: addu $2, $4, $[[T1]] ; GP64: daddu $2, $4, $5 - ; MM32-DAG: addu16 $3, $5, $7 - ; MM32-DAG: addu16 $[[T0:[0-9]+]], $4, $6 - ; MM32: sltu $[[T1:[0-9]+]], $3, $5 - ; MM32: addu16 $2, $[[T0]], $[[T1]] + ; MM32: addu16 $3, $5, $7 + ; MM32: sltu $[[T0:[0-9]+]], $3, $7 + ; MM32: addu $[[T1:[0-9]+]], $[[T0]], $6 + ; MM32: addu $2, $4, $[[T1]] ; MM64: daddu $2, $4, $5 @@ -132,108 +132,49 @@ define signext i128 @add_i128(i128 signext %a, i128 signext %b) { entry: ; ALL-LABEL: add_i128: - ; PRE4: move $[[R1:[0-9]+]], $5 - ; PRE4: move $[[R2:[0-9]+]], $4 - ; PRE4: lw $[[R3:[0-9]+]], 24($sp) - ; PRE4: addu $[[R4:[0-9]+]], $6, $[[R3]] - ; PRE4: lw $[[R5:[0-9]+]], 28($sp) - ; PRE4: addu $[[R6:[0-9]+]], $7, $[[R5]] - ; PRE4: sltu $[[R7:[0-9]+]], $[[R6]], $7 - ; PRE4: addu $[[R8:[0-9]+]], $[[R4]], $[[R7]] - ; PRE4: xor $[[R9:[0-9]+]], $[[R8]], $6 - ; PRE4: sltiu $[[R10:[0-9]+]], $[[R9]], 1 - ; PRE4: bnez $[[R10]], $BB5_2 - ; PRE4: sltu $[[R7]], $[[R8]], $6 - ; PRE4: lw $[[R12:[0-9]+]], 20($sp) - ; PRE4: addu $[[R13:[0-9]+]], $[[R1]], $[[R12]] - ; PRE4: lw $[[R14:[0-9]+]], 16($sp) - ; PRE4: addu $[[R15:[0-9]+]], $[[R13]], $[[R7]] - ; PRE4: addu $[[R16:[0-9]+]], $[[R2]], $[[R14]] - ; PRE4: sltu $[[R17:[0-9]+]], $[[R15]], $[[R13]] - ; PRE4: sltu $[[R18:[0-9]+]], $[[R13]], $[[R1]] - ; PRE4: addu $[[R19:[0-9]+]], $[[R16]], $[[R18]] - ; PRE4: addu $2, $[[R19]], $[[R17]] + ; GP32: lw $[[T0:[0-9]+]], 28($sp) + ; GP32: addu $[[T1:[0-9]+]], $7, $[[T0]] + ; GP32: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]] + ; GP32: lw $[[T3:[0-9]+]], 24($sp) + ; GP32: addu $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; GP32: addu $[[T5:[0-9]+]], $6, $[[T4]] + ; GP32: sltu $[[T6:[0-9]+]], $[[T5]], $[[T3]] + ; GP32: lw $[[T7:[0-9]+]], 20($sp) + ; GP32: addu $[[T8:[0-9]+]], $[[T6]], $[[T7]] + ; GP32: lw $[[T9:[0-9]+]], 16($sp) + ; GP32: addu $3, $5, $[[T8]] + ; GP32: sltu $[[T10:[0-9]+]], $3, $[[T7]] + ; GP32: addu $[[T11:[0-9]+]], $[[T10]], $[[T9]] + ; GP32: addu $2, $4, $[[T11]] + ; GP32: move $4, $[[T5]] + ; GP32: move $5, $[[T1]] - ; GP32-CMOV: lw $[[T0:[0-9]+]], 24($sp) - ; GP32-CMOV: addu $[[T1:[0-9]+]], $6, $[[T0]] - ; GP32-CMOV: lw $[[T2:[0-9]+]], 28($sp) - ; GP32-CMOV: addu $[[T3:[0-9]+]], $7, $[[T2]] - ; GP32-CMOV: sltu $[[T4:[0-9]+]], $[[T3]], $7 - ; GP32-CMOV: addu $[[T5:[0-9]+]], $[[T1]], $[[T4]] - ; GP32-CMOV: sltu $[[T6:[0-9]+]], $[[T5]], $6 - ; GP32-CMOV: xor $[[T7:[0-9]+]], $[[T5]], $6 - ; GP32-CMOV: movz $[[T8:[0-9]+]], $[[T4]], $[[T7]] - ; GP32-CMOV: lw $[[T9:[0-9]+]], 20($sp) - ; GP32-CMOV: addu $[[T10:[0-9]+]], $5, $[[T4]] - ; GP32-CMOV: addu $[[T11:[0-9]+]], $[[T10]], $[[T8]] - ; GP32-CMOV: lw $[[T12:[0-9]+]], 16($sp) - ; GP32-CMOV: sltu $[[T13:[0-9]+]], $[[T11]], $[[T10]] - ; GP32-CMOV: addu $[[T14:[0-9]+]], $4, $[[T12]] - ; GP32-CMOV: sltu $[[T15:[0-9]+]], $[[T10]], $5 - ; GP32-CMOV: addu $[[T16:[0-9]+]], $[[T14]], $[[T15]] - ; GP32-CMOV: addu $[[T17:[0-9]+]], $[[T16]], $[[T13]] - ; GP32-CMOV: move $4, $[[T5]] - ; GP32-CMOV: move $5, $[[T3]] + ; GP64: daddu $3, $5, $7 + ; GP64: sltu $[[T0:[0-9]+]], $3, $7 + ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP64: daddu $2, $4, $[[T1]] - ; GP64: daddu $[[T0:[0-9]+]], $4, $6 - ; GP64: daddu $[[T1:[0-9]+]], $5, $7 - ; GP64: sltu $[[T2:[0-9]+]], $[[T1]], $5 - ; GP64-NOT-R2-R6: dsll $[[T3:[0-9]+]], $[[T2]], 32 - ; GP64-NOT-R2-R6: dsrl $[[T4:[0-9]+]], $[[T3]], 32 - ; GP64-R2-R6: dext $[[T4:[0-9]+]], $[[T2]], 0, 32 + ; MM32: lw $[[T0:[0-9]+]], 28($sp) + ; MM32: addu $[[T1:[0-9]+]], $7, $[[T0]] + ; MM32: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]] + ; MM32: lw $[[T3:[0-9]+]], 24($sp) + ; MM32: addu16 $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; MM32: addu16 $[[T5:[0-9]+]], $6, $[[T4]] + ; MM32: sltu $[[T6:[0-9]+]], $[[T5]], $[[T3]] + ; MM32: lw $[[T7:[0-9]+]], 20($sp) + ; MM32: addu16 $[[T8:[0-9]+]], $[[T6]], $[[T7]] + ; MM32: lw $[[T9:[0-9]+]], 16($sp) + ; MM32: addu16 $[[T10:[0-9]+]], $5, $[[T8]] + ; MM32: sltu $[[T11:[0-9]+]], $[[T10]], $[[T7]] + ; MM32: addu $[[T12:[0-9]+]], $[[T11]], $[[T9]] + ; MM32: addu16 $[[T13:[0-9]+]], $4, $[[T12]] + ; MM32: move $4, $[[T5]] + ; MM32: move $5, $[[T1]] - ; GP64: daddu $2, $[[T0]], $[[T4]] - - ; MMR3: move $[[T1:[0-9]+]], $5 - ; MMR3-DAG: lw $[[T2:[0-9]+]], 32($sp) - ; MMR3: addu16 $[[T3:[0-9]+]], $6, $[[T2]] - ; MMR3-DAG: lw $[[T4:[0-9]+]], 36($sp) - ; MMR3: addu16 $[[T5:[0-9]+]], $7, $[[T4]] - ; MMR3: sltu $[[T6:[0-9]+]], $[[T5]], $7 - ; MMR3: addu16 $[[T7:[0-9]+]], $[[T3]], $[[T6]] - ; MMR3: sltu $[[T8:[0-9]+]], $[[T7]], $6 - ; MMR3: xor $[[T9:[0-9]+]], $[[T7]], $6 - ; MMR3: movz $[[T8]], $[[T6]], $[[T9]] - ; MMR3: lw $[[T10:[0-9]+]], 28($sp) - ; MMR3: addu16 $[[T11:[0-9]+]], $[[T1]], $[[T10]] - ; MMR3: addu16 $[[T12:[0-9]+]], $[[T11]], $[[T8]] - ; MMR3: lw $[[T13:[0-9]+]], 24($sp) - ; MMR3: sltu $[[T14:[0-9]+]], $[[T12]], $[[T11]] - ; MMR3: addu16 $[[T15:[0-9]+]], $4, $[[T13]] - ; MMR3: sltu $[[T16:[0-9]+]], $[[T11]], $[[T1]] - ; MMR3: addu16 $[[T17:[0-9]+]], $[[T15]], $[[T16]] - ; MMR3: addu16 $2, $2, $[[T14]] - - ; MMR6: move $[[T1:[0-9]+]], $5 - ; MMR6: move $[[T2:[0-9]+]], $4 - ; MMR6: lw $[[T3:[0-9]+]], 32($sp) - ; MMR6: addu16 $[[T4:[0-9]+]], $6, $[[T3]] - ; MMR6: lw $[[T5:[0-9]+]], 36($sp) - ; MMR6: addu16 $[[T6:[0-9]+]], $7, $[[T5]] - ; MMR6: sltu $[[T7:[0-9]+]], $[[T6]], $7 - ; MMR6: addu16 $[[T8:[0-9]+]], $[[T4]], $7 - ; MMR6: sltu $[[T9:[0-9]+]], $[[T8]], $6 - ; MMR6: xor $[[T10:[0-9]+]], $[[T4]], $6 - ; MMR6: sltiu $[[T11:[0-9]+]], $[[T10]], 1 - ; MMR6: seleqz $[[T12:[0-9]+]], $[[T9]], $[[T11]] - ; MMR6: selnez $[[T13:[0-9]+]], $[[T7]], $[[T11]] - ; MMR6: lw $[[T14:[0-9]+]], 24($sp) - ; MMR6: or $[[T15:[0-9]+]], $[[T13]], $[[T12]] - ; MMR6: addu16 $[[T16:[0-9]+]], $[[T2]], $[[T14]] - ; MMR6: lw $[[T17:[0-9]+]], 28($sp) - ; MMR6: addu16 $[[T18:[0-9]+]], $[[T1]], $[[T17]] - ; MMR6: addu16 $[[T19:[0-9]+]], $[[T18]], $[[T15]] - ; MMR6: sltu $[[T20:[0-9]+]], $[[T18]], $[[T1]] - ; MMR6: sltu $[[T21:[0-9]+]], $[[T17]], $[[T18]] - ; MMR6: addu16 $2, $[[T16]], $[[T20]] - ; MMR6: addu16 $2, $[[T20]], $[[T21]] - - ; MM64: daddu $[[T0:[0-9]+]], $4, $6 ; MM64: daddu $3, $5, $7 - ; MM64: sltu $[[T1:[0-9]+]], $3, $5 - ; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32 - ; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32 - ; MM64: daddu $2, $[[T0]], $[[T3]] + ; MM64: sltu $[[T0:[0-9]+]], $3, $7 + ; MM64: daddu $[[T1:[0-9]+]], $[[T0]], $6 + ; MM64: daddu $2, $4, $[[T1]] %r = add i128 %a, %b ret i128 %r @@ -308,16 +249,17 @@ define signext i32 @add_i32_4(i32 signext %a) { define signext i64 @add_i64_4(i64 signext %a) { ; ALL-LABEL: add_i64_4: - ; GP32: addiu $3, $5, 4 - ; GP32: sltu $[[T0:[0-9]+]], $3, $5 - ; GP32: addu $2, $4, $[[T0]] - - ; MM32: addiur2 $[[T1:[0-9]+]], $5, 4 - ; MM32: sltu $[[T2:[0-9]+]], $[[T1]], $5 - ; MM32: addu16 $2, $4, $[[T2]] + ; GP32: addiu $[[T0:[0-9]+]], $5, 4 + ; GP32: addiu $[[T1:[0-9]+]], $zero, 4 + ; GP32: sltu $[[T1]], $[[T0]], $[[T1]] + ; GP32: addu $2, $4, $[[T1]] ; GP64: daddiu $2, $4, 4 + ; MM32: addiu $[[T0:[0-9]+]], $5, 4 + ; MM32: li16 $[[T1:[0-9]+]], 4 + ; MM32: sltu $[[T2:[0-9]+]], $[[T0]], $[[T1]] + ; MM32: addu $2, $4, $[[T2]] ; MM64: daddiu $2, $4, 4 @@ -328,67 +270,38 @@ define signext i64 @add_i64_4(i64 signext %a) { define signext i128 @add_i128_4(i128 signext %a) { ; ALL-LABEL: add_i128_4: - ; PRE4: move $[[T0:[0-9]+]], $5 - ; PRE4: addiu $[[T1:[0-9]+]], $7, 4 - ; PRE4: sltu $[[T2:[0-9]+]], $[[T1]], $7 - ; PRE4: xori $[[T3:[0-9]+]], $[[T2]], 1 - ; PRE4: bnez $[[T3]], $BB[[BB0:[0-9_]+]] - ; PRE4: addu $[[T4:[0-9]+]], $6, $[[T2]] - ; PRE4: sltu $[[T5:[0-9]+]], $[[T4]], $6 - ; PRE4; $BB[[BB0:[0-9]+]]: - ; PRE4: addu $[[T6:[0-9]+]], $[[T0]], $[[T5]] - ; PRE4: sltu $[[T7:[0-9]+]], $[[T6]], $[[T0]] - ; PRE4: addu $[[T8:[0-9]+]], $4, $[[T7]] - ; PRE4: move $4, $[[T4]] + ; GP32: addiu $[[T0:[0-9]+]], $7, 4 + ; GP32: addiu $[[T1:[0-9]+]], $zero, 4 + ; GP32: sltu $[[T1]], $[[T0]], $[[T1]] + ; GP32: addu $[[T2:[0-9]+]], $6, $[[T1]] + ; GP32: sltu $[[T1]], $[[T2]], $zero + ; GP32: addu $[[T3:[0-9]+]], $5, $[[T1]] + ; GP32: sltu $[[T1]], $[[T3]], $zero + ; GP32: addu $[[T1]], $4, $[[T1]] + ; GP32: move $4, $[[T2]] + ; GP32: move $5, $[[T0]] - ; GP32-CMOV: addiu $[[T0:[0-9]+]], $7, 4 - ; GP32-CMOV: sltu $[[T1:[0-9]+]], $[[T0]], $7 - ; GP32-CMOV: addu $[[T2:[0-9]+]], $6, $[[T1]] - ; GP32-CMOV: sltu $[[T3:[0-9]+]], $[[T2]], $6 - ; GP32-CMOV: movz $[[T3]], $[[T1]], $[[T1]] - ; GP32-CMOV: addu $[[T4:[0-9]+]], $5, $[[T3]] - ; GP32-CMOV: sltu $[[T5:[0-9]+]], $[[T4]], $5 - ; GP32-CMOV: addu $[[T7:[0-9]+]], $4, $[[T5]] - ; GP32-CMOV: move $4, $[[T2]] - ; GP32-CMOV: move $5, $[[T0]] + ; GP64: daddiu $[[T0:[0-9]+]], $5, 4 + ; GP64: daddiu $[[T1:[0-9]+]], $zero, 4 + ; GP64: sltu $[[T1]], $[[T0]], $[[T1]] + ; GP64: daddu $2, $4, $[[T1]] - ; GP64: daddiu $[[T0:[0-9]+]], $5, 4 - ; GP64: sltu $[[T1:[0-9]+]], $[[T0]], $5 - ; GP64-NOT-R2-R6: dsll $[[T2:[0-9]+]], $[[T1]], 32 - ; GP64-NOT-R2-R6: dsrl $[[T3:[0-9]+]], $[[T2]], 32 - ; GP64-R2-R6: dext $[[T3:[0-9]+]], $[[T1]], 0, 32 - - ; GP64: daddu $2, $4, $[[T3]] - - ; MMR3: addiur2 $[[T0:[0-9]+]], $7, 4 - ; MMR3: sltu $[[T1:[0-9]+]], $[[T0]], $7 - ; MMR3: sltu $[[T2:[0-9]+]], $[[T0]], $7 - ; MMR3: addu16 $[[T3:[0-9]+]], $6, $[[T2]] - ; MMR3: sltu $[[T4:[0-9]+]], $[[T3]], $6 - ; MMR3: movz $[[T4]], $[[T2]], $[[T1]] - ; MMR3: addu16 $[[T6:[0-9]+]], $5, $[[T4]] - ; MMR3: sltu $[[T7:[0-9]+]], $[[T6]], $5 - ; MMR3: addu16 $2, $4, $[[T7]] - - ; MMR6: addiur2 $[[T1:[0-9]+]], $7, 4 - ; MMR6: sltu $[[T2:[0-9]+]], $[[T1]], $7 - ; MMR6: xori $[[T3:[0-9]+]], $[[T2]], 1 - ; MMR6: selnez $[[T4:[0-9]+]], $[[T2]], $[[T3]] - ; MMR6: addu16 $[[T5:[0-9]+]], $6, $[[T2]] - ; MMR6: sltu $[[T6:[0-9]+]], $[[T5]], $6 - ; MMR6: seleqz $[[T7:[0-9]+]], $[[T6]], $[[T3]] - ; MMR6: or $[[T8:[0-9]+]], $[[T4]], $[[T7]] - ; MMR6: addu16 $[[T9:[0-9]+]], $5, $[[T8]] - ; MMR6: sltu $[[T10:[0-9]+]], $[[T9]], $5 - ; MMR6: addu16 $[[T11:[0-9]+]], $4, $[[T10]] - ; MMR6: move $4, $7 - ; MMR6: move $5, $[[T1]] + ; MM32: addiu $[[T0:[0-9]+]], $7, 4 + ; MM32: li16 $[[T1:[0-9]+]], 4 + ; MM32: sltu $[[T1]], $[[T0]], $[[T1]] + ; MM32: addu16 $[[T2:[0-9]+]], $6, $[[T1]] + ; MM32: li16 $[[T1]], 0 + ; MM32: sltu $[[T3:[0-9]+]], $[[T2]], $[[T1]] + ; MM32: addu16 $[[T3]], $5, $[[T3]] + ; MM32: sltu $[[T1]], $[[T3]], $[[T1]] + ; MM32: addu16 $[[T1]], $4, $[[T1]] + ; MM32: move $4, $[[T2]] + ; MM32: move $5, $[[T0]] ; MM64: daddiu $[[T0:[0-9]+]], $5, 4 - ; MM64: sltu $[[T1:[0-9]+]], $[[T0]], $5 - ; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32 - ; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32 - ; MM64: daddu $2, $4, $[[T3]] + ; MM64: daddiu $[[T1:[0-9]+]], $zero, 4 + ; MM64: sltu $[[T1]], $[[T0]], $[[T1]] + ; MM64: daddu $2, $4, $[[T1]] %r = add i128 4, %a ret i128 %r @@ -467,15 +380,16 @@ define signext i64 @add_i64_3(i64 signext %a) { ; ALL-LABEL: add_i64_3: ; GP32: addiu $[[T0:[0-9]+]], $5, 3 - ; GP32: sltu $[[T1:[0-9]+]], $[[T0]], $5 + ; GP32: addiu $[[T1:[0-9]+]], $zero, 3 + ; GP32: sltu $[[T1]], $[[T0]], $[[T1]] ; GP32: addu $2, $4, $[[T1]] ; GP64: daddiu $2, $4, 3 - ; MM32: move $[[T1:[0-9]+]], $5 - ; MM32: addius5 $[[T1]], 3 - ; MM32: sltu $[[T2:[0-9]+]], $[[T1]], $5 - ; MM32: addu16 $2, $4, $[[T2]] + ; MM32: addiu $[[T0:[0-9]+]], $5, 3 + ; MM32: li16 $[[T1:[0-9]+]], 3 + ; MM32: sltu $[[T2:[0-9]+]], $[[T0]], $[[T1]] + ; MM32: addu $2, $4, $[[T2]] ; MM64: daddiu $2, $4, 3 @@ -486,70 +400,38 @@ define signext i64 @add_i64_3(i64 signext %a) { define signext i128 @add_i128_3(i128 signext %a) { ; ALL-LABEL: add_i128_3: - ; PRE4: move $[[T0:[0-9]+]], $5 - ; PRE4: addiu $[[T1:[0-9]+]], $7, 3 - ; PRE4: sltu $[[T2:[0-9]+]], $[[T1]], $7 - ; PRE4: xori $[[T3:[0-9]+]], $[[T2]], 1 - ; PRE4: bnez $[[T3]], $BB[[BB0:[0-9_]+]] - ; PRE4: addu $[[T4:[0-9]+]], $6, $[[T2]] - ; PRE4: sltu $[[T5:[0-9]+]], $[[T4]], $6 - ; PRE4; $BB[[BB0:[0-9]+]]: - ; PRE4: addu $[[T6:[0-9]+]], $[[T0]], $[[T5]] - ; PRE4: sltu $[[T7:[0-9]+]], $[[T6]], $[[T0]] - ; PRE4: addu $[[T8:[0-9]+]], $4, $[[T7]] - ; PRE4: move $4, $[[T4]] + ; GP32: addiu $[[T0:[0-9]+]], $7, 3 + ; GP32: addiu $[[T1:[0-9]+]], $zero, 3 + ; GP32: sltu $[[T1]], $[[T0]], $[[T1]] + ; GP32: addu $[[T2:[0-9]+]], $6, $[[T1]] + ; GP32: sltu $[[T3:[0-9]+]], $[[T2]], $zero + ; GP32: addu $[[T4:[0-9]+]], $5, $[[T3]] + ; GP32: sltu $[[T5:[0-9]+]], $[[T4]], $zero + ; GP32: addu $[[T5]], $4, $[[T5]] + ; GP32: move $4, $[[T2]] + ; GP32: move $5, $[[T0]] - ; GP32-CMOV: addiu $[[T0:[0-9]+]], $7, 3 - ; GP32-CMOV: sltu $[[T1:[0-9]+]], $[[T0]], $7 - ; GP32-CMOV: addu $[[T2:[0-9]+]], $6, $[[T1]] - ; GP32-CMOV: sltu $[[T3:[0-9]+]], $[[T2]], $6 - ; GP32-CMOV: movz $[[T3]], $[[T1]], $[[T1]] - ; GP32-CMOV: addu $[[T4:[0-9]+]], $5, $[[T3]] - ; GP32-CMOV: sltu $[[T5:[0-9]+]], $[[T4]], $5 - ; GP32-CMOV: addu $[[T7:[0-9]+]], $4, $[[T5]] - ; GP32-CMOV: move $4, $[[T2]] - ; GP32-CMOV: move $5, $[[T0]] + ; GP64: daddiu $[[T0:[0-9]+]], $5, 3 + ; GP64: daddiu $[[T1:[0-9]+]], $zero, 3 + ; GP64: sltu $[[T1]], $[[T0]], $[[T1]] + ; GP64: daddu $2, $4, $[[T1]] - ; GP64: daddiu $[[T0:[0-9]+]], $5, 3 - ; GP64: sltu $[[T1:[0-9]+]], $[[T0]], $5 - - ; GP64-NOT-R2-R6: dsll $[[T2:[0-9]+]], $[[T1]], 32 - ; GP64-NOT-R2-R6: dsrl $[[T3:[0-9]+]], $[[T2]], 32 - ; GP64-R2-R6: dext $[[T3:[0-9]+]], $[[T1]], 0, 32 - - ; GP64: daddu $2, $4, $[[T3]] - - ; MMR3: move $[[T1:[0-9]+]], $7 - ; MMR3: addius5 $[[T1]], 3 - ; MMR3: sltu $[[T2:[0-9]+]], $[[T1]], $7 - ; MMR3: sltu $[[T3:[0-9]+]], $[[T1]], $7 - ; MMR3: addu16 $[[T4:[0-9]+]], $6, $[[T3]] - ; MMR3: sltu $[[T5:[0-9]+]], $[[T4]], $6 - ; MMR3: movz $[[T5]], $[[T3]], $[[T2]] - ; MMR3: addu16 $[[T6:[0-9]+]], $5, $[[T5]] - ; MMR3: sltu $[[T7:[0-9]+]], $[[T6]], $5 - ; MMR3: addu16 $2, $4, $[[T7]] - - ; MMR6: move $[[T1:[0-9]+]], $7 - ; MMR6: addius5 $[[T1]], 3 - ; MMR6: sltu $[[T2:[0-9]+]], $[[T1]], $7 - ; MMR6: xori $[[T3:[0-9]+]], $[[T2]], 1 - ; MMR6: selnez $[[T4:[0-9]+]], $[[T2]], $[[T3]] - ; MMR6: addu16 $[[T5:[0-9]+]], $6, $[[T2]] - ; MMR6: sltu $[[T6:[0-9]+]], $[[T5]], $6 - ; MMR6: seleqz $[[T7:[0-9]+]], $[[T6]], $[[T3]] - ; MMR6: or $[[T8:[0-9]+]], $[[T4]], $[[T7]] - ; MMR6: addu16 $[[T9:[0-9]+]], $5, $[[T8]] - ; MMR6: sltu $[[T10:[0-9]+]], $[[T9]], $5 - ; MMR6: addu16 $[[T11:[0-9]+]], $4, $[[T10]] - ; MMR6: move $4, $[[T5]] - ; MMR6: move $5, $[[T1]] + ; MM32: addiu $[[T0:[0-9]+]], $7, 3 + ; MM32: li16 $[[T1:[0-9]+]], 3 + ; MM32: sltu $[[T1]], $[[T0]], $[[T1]] + ; MM32: addu16 $[[T2:[0-9]+]], $6, $[[T1]] + ; MM32: li16 $[[T3:[0-9]+]], 0 + ; MM32: sltu $[[T4:[0-9]+]], $[[T2]], $[[T3]] + ; MM32: addu16 $[[T4]], $5, $[[T4]] + ; MM32: sltu $[[T5:[0-9]+]], $[[T4]], $[[T3]] + ; MM32: addu16 $[[T5]], $4, $[[T5]] + ; MM32: move $4, $[[T2]] + ; MM32: move $5, $[[T0]] ; MM64: daddiu $[[T0:[0-9]+]], $5, 3 - ; MM64: sltu $[[T1:[0-9]+]], $[[T0]], $5 - ; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32 - ; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32 - ; MM64: daddu $2, $4, $[[T3]] + ; MM64: daddiu $[[T1:[0-9]+]], $zero, 3 + ; MM64: sltu $[[T1]], $[[T0]], $[[T1]] + ; MM64: daddu $2, $4, $[[T1]] %r = add i128 3, %a ret i128 %r diff --git a/test/CodeGen/Mips/llvm-ir/sub.ll b/test/CodeGen/Mips/llvm-ir/sub.ll index 655addb10a6..a730063c552 100644 --- a/test/CodeGen/Mips/llvm-ir/sub.ll +++ b/test/CodeGen/Mips/llvm-ir/sub.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \ -; RUN: -check-prefixes=NOT-R2-R6,GP32,GP32-NOT-MM,NOT-MM,PRE4 +; RUN: -check-prefixes=NOT-R2-R6,GP32,GP32-NOT-MM,NOT-MM ; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \ ; RUN: -check-prefixes=NOT-R2-R6,GP32,GP32-NOT-MM,NOT-MM ; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \ @@ -11,25 +11,25 @@ ; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \ ; RUN: -check-prefixes=R2-R6,GP32,GP32-NOT-MM,NOT-MM ; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips -verify-machineinstrs | FileCheck %s \ -; RUN: -check-prefixes=GP32-MM,GP32,MM32,MMR3 +; RUN: -check-prefixes=GP32-MM,GP32,MM ; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips | FileCheck %s \ -; RUN: -check-prefixes=GP32-MM,GP32,MM32,MMR6 +; RUN: -check-prefixes=GP32-MM,GP32,MM ; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ -; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM,GP64-NOT-R2 +; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM ; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ -; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM,GP64-NOT-R2 +; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM ; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ -; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM,GP64-NOT-R2 +; RUN: -check-prefixes=NOT-R2-R6,GP64,NOT-MM ; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ -; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2 +; RUN: -check-prefixes=R2-R6,GP64,NOT-MM ; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \ -; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2 +; RUN: -check-prefixes=R2-R6,GP64,NOT-MM ; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \ -; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2 +; RUN: -check-prefixes=R2-R6,GP64,NOT-MM ; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ -; RUN: -check-prefixes=R2-R6,GP64,NOT-MM,GP64-R2 +; RUN: -check-prefixes=R2-R6,GP64,NOT-MM ; RUN: llc < %s -march=mips64 -mcpu=mips64r6 -mattr=+micromips | FileCheck %s \ -; RUN: -check-prefixes=GP64,MM64 +; RUN: -check-prefixes=GP64,MM define signext i1 @sub_i1(i1 signext %a, i1 signext %b) { entry: @@ -100,15 +100,10 @@ define signext i64 @sub_i64(i64 signext %a, i64 signext %b) { entry: ; ALL-LABEL: sub_i64: - ; GP32-NOT-MM: sltu $[[T0:[0-9]+]], $5, $7 - ; GP32-NOT-MM: subu $2, $4, $6 - ; GP32-NOT-MM: subu $2, $2, $[[T0]] - ; GP32-NOT-MM: subu $3, $5, $7 - - ; MM32: sltu $[[T0:[0-9]+]], $5, $7 - ; MM32: subu16 $3, $4, $6 - ; MM32: subu16 $2, $3, $[[T0]] - ; MM32: subu16 $3, $5, $7 + ; GP32-NOT-MM subu $3, $5, $7 + ; GP32: sltu $[[T0:[0-9]+]], $5, $7 + ; GP32: addu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP32: subu $2, $4, $[[T1]] ; GP64: dsubu $2, $4, $5 @@ -120,109 +115,42 @@ define signext i128 @sub_i128(i128 signext %a, i128 signext %b) { entry: ; ALL-LABEL: sub_i128: -; PRE4: lw $[[T0:[0-9]+]], 24($sp) -; PRE4: lw $[[T1:[0-9]+]], 28($sp) -; PRE4: sltu $[[T2:[0-9]+]], $7, $[[T1]] -; PRE4: xor $[[T3:[0-9]+]], $6, $[[T0]] -; PRE4: sltiu $[[T4:[0-9]+]], $[[T3]], 1 -; PRE4: bnez $[[T4]] -; PRE4: move $[[T5:[0-9]+]], $[[T2]] -; PRE4: sltu $[[T5]], $6, $[[T0]] + ; GP32-NOT-MM: lw $[[T0:[0-9]+]], 20($sp) + ; GP32-NOT-MM: sltu $[[T1:[0-9]+]], $5, $[[T0]] + ; GP32-NOT-MM: lw $[[T2:[0-9]+]], 16($sp) + ; GP32-NOT-MM: addu $[[T3:[0-9]+]], $[[T1]], $[[T2]] + ; GP32-NOT-MM: lw $[[T4:[0-9]+]], 24($sp) + ; GP32-NOT-MM: lw $[[T5:[0-9]+]], 28($sp) + ; GP32-NOT-MM: subu $[[T6:[0-9]+]], $7, $[[T5]] + ; GP32-NOT-MM: subu $2, $4, $[[T3]] + ; GP32-NOT-MM: sltu $[[T8:[0-9]+]], $6, $[[T4]] + ; GP32-NOT-MM: addu $[[T9:[0-9]+]], $[[T8]], $[[T0]] + ; GP32-NOT-MM: subu $3, $5, $[[T9]] + ; GP32-NOT-MM: sltu $[[T10:[0-9]+]], $7, $[[T5]] + ; GP32-NOT-MM: addu $[[T11:[0-9]+]], $[[T10]], $[[T4]] + ; GP32-NOT-MM: subu $4, $6, $[[T11]] + ; GP32-NOT-MM: move $5, $[[T6]] -; PRE4: lw $[[T6:[0-9]+]], 20($sp) -; PRE4: subu $[[T7:[0-9]+]], $5, $[[T6]] -; PRE4: subu $[[T8:[0-9]+]], $[[T7]], $[[T5]] -; PRE4: sltu $[[T9:[0-9]+]], $[[T7]], $[[T5]] -; PRE4: sltu $[[T10:[0-9]+]], $5, $[[T6]] -; PRE4: lw $[[T11:[0-9]+]], 16($sp) -; PRE4: subu $[[T12:[0-9]+]], $4, $[[T11]] -; PRE4: subu $[[T13:[0-9]+]], $[[T12]], $[[T10]] -; PRE4: subu $[[T14:[0-9]+]], $[[T13]], $[[T9]] -; PRE4: subu $[[T15:[0-9]+]], $6, $[[T0]] -; PRE4: subu $[[T16:[0-9]+]], $[[T15]], $[[T2]] -; PRE4: subu $5, $7, $[[T1]] + ; GP32-MM: lw $[[T0:[0-9]+]], 20($sp) + ; GP32-MM: sltu $[[T1:[0-9]+]], $[[T2:[0-9]+]], $[[T0]] + ; GP32-MM: lw $[[T3:[0-9]+]], 16($sp) + ; GP32-MM: addu $[[T3]], $[[T1]], $[[T3]] + ; GP32-MM: lw $[[T4:[0-9]+]], 24($sp) + ; GP32-MM: lw $[[T5:[0-9]+]], 28($sp) + ; GP32-MM: subu $[[T1]], $7, $[[T5]] + ; GP32-MM: subu16 $[[T3]], $[[T6:[0-9]+]], $[[T3]] + ; GP32-MM: sltu $[[T6]], $6, $[[T4]] + ; GP32-MM: addu16 $[[T0]], $[[T6]], $[[T0]] + ; GP32-MM: subu16 $[[T0]], $5, $[[T0]] + ; GP32-MM: sltu $[[T6]], $7, $[[T5]] + ; GP32-MM: addu $[[T6]], $[[T6]], $[[T4]] + ; GP32-MM: subu16 $[[T6]], $6, $[[T6]] + ; GP32-MM: move $[[T2]], $[[T1]] -; MMR3: lw $[[T1:[0-9]+]], 48($sp) -; MMR3: sltu $[[T2:[0-9]+]], $6, $[[T1]] -; MMR3: xor $[[T3:[0-9]+]], $6, $[[T1]] -; MMR3: lw $[[T4:[0-9]+]], 52($sp) -; MMR3: sltu $[[T5:[0-9]+]], $7, $[[T4]] -; MMR3: movz $[[T6:[0-9]+]], $[[T5]], $[[T3]] -; MMR3: lw $[[T7:[0-8]+]], 44($sp) -; MMR3: subu16 $[[T8:[0-9]+]], $5, $[[T7]] -; MMR3: subu16 $[[T9:[0-9]+]], $[[T8]], $[[T6]] -; MMR3: sltu $[[T10:[0-9]+]], $[[T8]], $[[T2]] -; MMR3: sltu $[[T11:[0-9]+]], $5, $[[T7]] -; MMR3: lw $[[T12:[0-9]+]], 40($sp) -; MMR3: lw $[[T13:[0-9]+]], 12($sp) -; MMR3: subu16 $[[T14:[0-9]+]], $[[T13]], $[[T12]] -; MMR3: subu16 $[[T15:[0-9]+]], $[[T14]], $[[T11]] -; MMR3: subu16 $[[T16:[0-9]+]], $[[T15]], $[[T10]] -; MMR3: subu16 $[[T17:[0-9]+]], $6, $[[T1]] -; MMR3: subu16 $[[T18:[0-9]+]], $[[T17]], $7 -; MMR3: lw $[[T19:[0-9]+]], 8($sp) -; MMR3: lw $[[T20:[0-9]+]], 0($sp) -; MMR3: subu16 $5, $[[T19]], $[[T20]] - -; MMR6: move $[[T0:[0-9]+]], $7 -; MMR6: sw $[[T0]], 8($sp) -; MMR6: move $[[T1:[0-9]+]], $5 -; MMR6: sw $4, 12($sp) -; MMR6: lw $[[T2:[0-9]+]], 48($sp) -; MMR6: sltu $[[T3:[0-9]+]], $6, $[[T2]] -; MMR6: xor $[[T4:[0-9]+]], $6, $[[T2]] -; MMR6: sltiu $[[T5:[0-9]+]], $[[T4]], 1 -; MMR6: seleqz $[[T6:[0-9]+]], $[[T3]], $[[T5]] -; MMR6: lw $[[T7:[0-9]+]], 52($sp) -; MMR6: sltu $[[T8:[0-9]+]], $[[T0]], $[[T7]] -; MMR6: selnez $[[T9:[0-9]+]], $[[T8]], $[[T5]] -; MMR6: or $[[T10:[0-9]+]], $[[T9]], $[[T6]] -; MMR6: lw $[[T11:[0-9]+]], 44($sp) -; MMR6: subu16 $[[T12:[0-9]+]], $[[T1]], $[[T11]] -; MMR6: subu16 $[[T13:[0-9]+]], $[[T12]], $[[T7]] -; MMR6: sltu $[[T16:[0-9]+]], $[[T12]], $[[T7]] -; MMR6: sltu $[[T17:[0-9]+]], $[[T1]], $[[T11]] -; MMR6: lw $[[T18:[0-9]+]], 40($sp) -; MMR6: lw $[[T19:[0-9]+]], 12($sp) -; MMR6: subu16 $[[T20:[0-9]+]], $[[T19]], $[[T18]] -; MMR6: subu16 $[[T21:[0-9]+]], $[[T20]], $[[T17]] -; MMR6: subu16 $[[T22:[0-9]+]], $[[T21]], $[[T16]] -; MMR6: subu16 $[[T23:[0-9]+]], $6, $[[T2]] -; MMR6: subu16 $4, $[[T23]], $5 -; MMR6: lw $[[T24:[0-9]+]], 8($sp) -; MMR6: lw $[[T25:[0-9]+]], 0($sp) -; MMR6: subu16 $5, $[[T24]], $[[T25]] -; MMR6: lw $3, 4($sp) - -; FIXME: The sltu, dsll, dsrl pattern here occurs when an i32 is zero -; extended to 64 bits. Fortunately slt(i)(u) actually gives an i1. -; These should be combined away. - -; GP64-NOT-R2: dsubu $1, $4, $6 -; GP64-NOT-R2: sltu $[[T0:[0-9]+]], $5, $7 -; GP64-NOT-R2: dsll $[[T1:[0-9]+]], $[[T0]], 32 -; GP64-NOT-R2: dsrl $[[T2:[0-9]+]], $[[T1]], 32 -; GP64-NOT-R2: dsubu $2, $1, $[[T2]] -; GP64-NOT-R2: dsubu $3, $5, $7 - -; FIXME: Likewise for the sltu, dext here. - -; GP64-R2: dsubu $1, $4, $6 -; GP64-R2: sltu $[[T0:[0-9]+]], $5, $7 -; GP64-R2: dext $[[T1:[0-9]+]], $[[T0]], 0, 32 -; GP64-R2: dsubu $2, $1, $[[T1]] -; GP64-R2: dsubu $3, $5, $7 - -; FIXME: Again, redundant sign extension. Also, microMIPSR6 has the -; dext instruction which should be used here. - -; MM64: dsubu $[[T0:[0-9]+]], $4, $6 -; MM64: sltu $[[T1:[0-9]+]], $5, $7 -; MM64: dsll $[[T2:[0-9]+]], $[[T1]], 32 -; MM64: dsrl $[[T3:[0-9]+]], $[[T2]], 32 -; MM64: dsubu $2, $[[T0]], $[[T3]] -; MM64: dsubu $3, $5, $7 -; MM64: jr $ra + ; GP64: dsubu $3, $5, $7 + ; GP64: sltu $[[T0:[0-9]+]], $5, $7 + ; GP64: daddu $[[T1:[0-9]+]], $[[T0]], $6 + ; GP64: dsubu $2, $4, $[[T1]] %r = sub i128 %a, %b ret i128 %r diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll index 3e1a2e8b970..7baba005a07 100644 --- a/test/CodeGen/Mips/madd-msub.ll +++ b/test/CodeGen/Mips/madd-msub.ll @@ -25,11 +25,11 @@ ; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} ; 32R6-DAG: addu $[[T1:[0-9]+]], $[[T0]], $6 -; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: muh $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: sra $[[T4:[0-9]+]], $6, 31 -; 32R6-DAG: addu $[[T5:[0-9]+]], $[[T3]], $[[T4]] -; 32R6-DAG: addu $2, $[[T5]], $[[T2]] +; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $6 +; 32R6-DAG: sra $[[T3:[0-9]+]], $6, 31 +; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T2]], $[[T3]] +; 32R6-DAG: muh $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}} +; 32R6-DAG: addu $2, $[[T5]], $[[T4]] ; 64-DAG: sll $[[T0:[0-9]+]], $4, 0 ; 64-DAG: sll $[[T1:[0-9]+]], $5, 0 @@ -71,7 +71,7 @@ entry: ; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} ; 32R6-DAG: addu $[[T1:[0-9]+]], $[[T0]], $6 -; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]] +; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $6 ; FIXME: There's a redundant move here. We should remove it ; 32R6-DAG: muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}} ; 32R6-DAG: addu $2, $[[T3]], $[[T2]] @@ -109,10 +109,10 @@ entry: ; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} ; 32R6-DAG: addu $[[T1:[0-9]+]], $[[T0]], $7 -; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $1 -; 32R6-DAG: muh $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T3]], $6 -; 32R6-DAG: addu $2, $[[T4]], $[[T2]] +; 32R6-DAG: sltu $[[T2:[0-9]+]], $[[T1]], $7 +; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T2]], $6 +; 32R6-DAG: muh $[[T5:[0-9]+]], ${{[45]}}, ${{[45]}} +; 32R6-DAG: addu $2, $[[T5]], $[[T4]] ; 64-DAG: sll $[[T0:[0-9]+]], $4, 0 ; 64-DAG: sll $[[T1:[0-9]+]], $5, 0 @@ -134,17 +134,6 @@ entry: ret i64 %add } -; ALL-LABEL: madd4 -; ALL-NOT: madd ${{[0-9]+}}, ${{[0-9]+}} - -define i32 @madd4(i32 %a, i32 %b, i32 %c) { -entry: - %mul = mul nsw i32 %a, %b - %add = add nsw i32 %c, %mul - - ret i32 %add -} - ; ALL-LABEL: msub1: ; 32-DAG: sra $[[T0:[0-9]+]], $6, 31 @@ -159,13 +148,13 @@ entry: ; DSP-DAG: mfhi $2, $[[AC]] ; DSP-DAG: mflo $3, $[[AC]] -; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: sltu $[[T1:[0-9]+]], $6, $[[T0]] -; 32R6-DAG: muh $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: sra $[[T3:[0-9]+]], $6, 31 -; 32R6-DAG: subu $[[T4:[0-9]+]], $[[T3]], $[[T2]] -; 32R6-DAG: subu $2, $[[T4]], $[[T1]] -; 32R6-DAG: subu $3, $6, $[[T0]] +; 32R6-DAG: muh $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} +; 32R6-DAG: mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}} +; 32R6-DAG: sltu $[[T3:[0-9]+]], $6, $[[T1]] +; 32R6-DAG: addu $[[T4:[0-9]+]], $[[T3]], $[[T0]] +; 32R6-DAG: sra $[[T5:[0-9]+]], $6, 31 +; 32R6-DAG: subu $2, $[[T5]], $[[T4]] +; 32R6-DAG: subu $3, $6, $[[T1]] ; 64-DAG: sll $[[T0:[0-9]+]], $4, 0 ; 64-DAG: sll $[[T1:[0-9]+]], $5, 0 @@ -205,12 +194,13 @@ entry: ; DSP-DAG: mfhi $2, $[[AC]] ; DSP-DAG: mflo $3, $[[AC]] -; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: sltu $[[T1:[0-9]+]], $6, $[[T0]] -; 32R6-DAG: muhu $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: negu $[[T3:[0-9]+]], $[[T2]] -; 32R6-DAG: subu $2, $[[T3]], $[[T1]] -; 32R6-DAG: subu $3, $6, $[[T0]] +; 32R6-DAG: muhu $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} +; 32R6-DAG: mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}} + +; 32R6-DAG: sltu $[[T2:[0-9]+]], $6, $[[T1]] +; 32R6-DAG: addu $[[T3:[0-9]+]], $[[T2]], $[[T0]] +; 32R6-DAG: negu $2, $[[T3]] +; 32R6-DAG: subu $3, $6, $[[T1]] ; 64-DAG: d[[m:m]]ult $5, $4 ; 64-DAG: [[m]]flo $[[T0:[0-9]+]] @@ -244,12 +234,12 @@ entry: ; DSP-DAG: mfhi $2, $[[AC]] ; DSP-DAG: mflo $3, $[[AC]] -; 32R6-DAG: mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: sltu $[[T1:[0-9]+]], $7, $[[T0]] -; 32R6-DAG: muh $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}} -; 32R6-DAG: subu $[[T3:[0-9]+]], $6, $[[T2]] -; 32R6-DAG: subu $2, $[[T3]], $[[T1]] -; 32R6-DAG: subu $3, $7, $[[T0]] +; 32R6-DAG: muh $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}} +; 32R6-DAG: mul $[[T1:[0-9]+]], ${{[45]}}, ${{[45]}} +; 32R6-DAG: sltu $[[T2:[0-9]+]], $7, $[[T1]] +; 32R6-DAG: addu $[[T3:[0-9]+]], $[[T2]], $[[T0]] +; 32R6-DAG: subu $2, $6, $[[T3]] +; 32R6-DAG: subu $3, $7, $[[T1]] ; 64-DAG: sll $[[T0:[0-9]+]], $4, 0 ; 64-DAG: sll $[[T1:[0-9]+]], $5, 0 @@ -270,14 +260,3 @@ entry: %sub = sub nsw i64 %c, %mul ret i64 %sub } - -; ALL-LABEL: msub4 -; ALL-NOT: msub ${{[0-9]+}}, ${{[0-9]+}} - -define i32 @msub4(i32 %a, i32 %b, i32 %c) { -entry: - %mul = mul nsw i32 %a, %b - %sub = sub nsw i32 %c, %mul - - ret i32 %sub -}