diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 258d807f701..608dde21444 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3294,6 +3294,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { LC = GetFPLibCall(VT, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_PPCF128); break; + case ISD::FDIV: + LC = GetFPLibCall(VT, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, + RTLIB::DIV_PPCF128); + break; default: break; } if (LC != RTLIB::UNKNOWN_LIBCALL) { diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt index 083867ff780..4783dd5d24e 100644 --- a/lib/Target/CellSPU/README.txt +++ b/lib/Target/CellSPU/README.txt @@ -8,7 +8,7 @@ Department in The Aerospace Corporation: - Mark Thomas (floating point instructions) - Michael AuYeung (intrinsics) - Chandler Carruth (LLVM expertise) -- Nehal Desai (debugging, RoadRunner SPU expertise) +- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF @@ -36,7 +36,7 @@ to add 'spu' to configure's --enable-targets option, e.g.: TODO: * Create a machine pass for performing dual-pipeline scheduling specifically - for CellSPU, handle inserting branch prediction instructions. + for CellSPU, and insert branch prediction instructions as needed. * i32 instructions: @@ -48,20 +48,43 @@ TODO: * sign and zero extension: done * addition: done * subtraction: needed - * multiplication: work-in-progress + * multiplication: done * i128 support: - * zero extension: done + * zero extension, any extension: done * sign extension: needed * arithmetic operators (add, sub, mul, div): needed + * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed -* Double floating point support + * or: done - This was started. "What's missing?" to be filled in. +* f64 support + + * Comparison operators: + SETOEQ unimplemented + SETOGT unimplemented + SETOGE unimplemented + SETOLT unimplemented + SETOLE unimplemented + SETONE unimplemented + SETO done (lowered) + SETUO done (lowered) + SETUEQ unimplemented + SETUGT unimplemented + SETUGE unimplemented + SETULT unimplemented + SETULE unimplemented + SETUNE unimplemented + +* LLVM vector suport + + * VSETCC needs to be implemented. It's pretty straightforward to code, but + needs implementation. * Intrinsics - Lots of progress. "What's missing/incomplete?" to be filled in. + * spu.h instrinsics added but not tested. Need to have an operational + llvm-spu-gcc in order to write a unit test harness. ===-------------------------------------------------------------------------=== diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td index 6612901d411..3031fda5438 100644 --- a/lib/Target/CellSPU/SPU128InstrInfo.td +++ b/lib/Target/CellSPU/SPU128InstrInfo.td @@ -2,7 +2,6 @@ // // Cell SPU 128-bit operations // -// Primary author: Scott Michel (scottm@aero.org) //===----------------------------------------------------------------------===// // zext 32->128: Zero extend 32-bit to 128-bit @@ -20,3 +19,23 @@ def : Pat<(i128 (zext R16C:$rSrc)), // zext 8->128: Zero extend 8-bit to 128-bit def : Pat<(i128 (zext R8C:$rSrc)), (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; + +// anyext 32->128: Zero extend 32-bit to 128-bit +def : Pat<(i128 (anyext R32C:$rSrc)), + (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; + +// anyext 64->128: Zero extend 64-bit to 128-bit +def : Pat<(i128 (anyext R64C:$rSrc)), + (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; + +// anyext 16->128: Zero extend 16-bit to 128-bit +def : Pat<(i128 (anyext R16C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; + +// anyext 8->128: Zero extend 8-bit to 128-bit +def : Pat<(i128 (anyext R8C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; + +// Shift left +def : Pat<(shl GPRC:$rA, R32C:$rB), + (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index cb8b48bc1fc..33298946c52 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -33,6 +33,13 @@ def SELBr64_cond: SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), [/* no pattern */]>; +// The generic i64 select pattern, which assumes that the comparison result +// is in a 32-bit register that contains a select mask pattern (i.e., gather +// bits result): + +def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), + (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; + // select the negative condition: class I64SELECTNegCond: Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), @@ -43,13 +50,6 @@ class I64SETCCNegCond: Pat<(cond R64C:$rA, R64C:$rB), (XORIr32 compare.Fragment, -1)>; -// The generic i64 select pattern, which assumes that the comparison result -// is in a 32-bit register that contains a select mask pattern (i.e., gather -// bits result): - -def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), - (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; - //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // The i64 seteq fragment that does the scalar->vector conversion and // comparison: @@ -331,8 +331,8 @@ class v2i64_highprod: (MPYHv4i32 v2i64_mul_bhi64.Fragment, v2i64_mul_ashlq4.Fragment), (Av4i32 - (MPYHv4i32 v2i64_mul_ashlq4.Fragment, - v2i64_mul_bhi64.Fragment), + (MPYHv4i32 v2i64_mul_ashlq4.Fragment, + v2i64_mul_bhi64.Fragment), (Av4i32 (MPYUv4i32 v2i64_mul_ashlq4.Fragment, v2i64_mul_bhi64.Fragment), @@ -381,3 +381,14 @@ def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)), v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// f64 comparisons +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// selb instruction definition for i64. Note that the selection mask is +// a vector, produced by various forms of FSM: +def SELBf64_cond: + SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), + [(set R64FP:$rT, + (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 816502d9f58..0fc7aec9906 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -685,26 +685,26 @@ SPUDAGToDAGISel::Select(SDValue Op) { break; case MVT::i32: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x00010203, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x08090a0b, MVT::i32)); + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x00010203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x08090a0b, MVT::i32)); break; case MVT::i16: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80800203, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80800a0b, MVT::i32)); + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800a0b, MVT::i32)); break; case MVT::i8: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80808003, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x8080800b, MVT::i32)); + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80808003, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x8080800b, MVT::i32)); break; } @@ -714,9 +714,9 @@ SPUDAGToDAGISel::Select(SDValue Op) { SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, OpVecVT, - SDValue(PromoteScalar, 0), - SDValue(PromoteScalar, 0), - SDValue(shufMaskLoad, 0)); + SDValue(PromoteScalar, 0), + SDValue(PromoteScalar, 0), + SDValue(shufMaskLoad, 0)); // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we // re-use it in the VEC2PREFSLOT selection without needing to explicitly @@ -742,6 +742,27 @@ SPUDAGToDAGISel::Select(SDValue Op) { SDNode *CGLoad = emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); + return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); + return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, Op.getOperand(0), Op.getOperand(1), SDValue(CGLoad, 0))); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 92bd92886c3..124f1a7536b 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -92,6 +92,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); + // Set RTLIB libcall names as used by SPU: + setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); + // Set up the SPU's register classes: addRegisterClass(MVT::i8, SPU::R8CRegisterClass); addRegisterClass(MVT::i16, SPU::R16CRegisterClass); @@ -183,6 +186,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + // Make sure that DAGCombine doesn't insert illegal 64-bit constants + setOperationAction(ISD::FABS, MVT::f64, Custom); + // SPU can do rotate right and left, so legalize it... but customize for i8 // because instructions don't exist. @@ -243,6 +249,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i16, Legal); setOperationAction(ISD::SETCC, MVT::i32, Legal); setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SETCC, MVT::f64, Custom); // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); @@ -410,6 +417,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = + "SPUISD::ROTBYTES_LEFT_BITS"; node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; @@ -1552,12 +1562,9 @@ static bool isConstantSplat(const uint64_t Bits128[2], return false; // Can't be a splat if two pieces don't match. } -// If this is a case we can't handle, return null and let the default -// expansion code take care of it. If we CAN select this case, and if it -// selects to a single instruction, return Op. Otherwise, if we can codegen -// this case more efficiently than a constant pool load, lower it to the -// sequence of ops that should be used. -static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +//! Lower a BUILD_VECTOR instruction creatively: +SDValue +SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); // If this is a vector of constants or undefs, get the bits. A bit in // UndefBits is set if the corresponding element of the vector is an @@ -1575,6 +1582,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { switch (VT.getSimpleVT()) { default: + cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + << VT.getMVTString() + << "\n"; + abort(); + /*NOTREACHED*/ case MVT::v4f32: { uint32_t Value32 = SplatBits; assert(SplatSize == 4 @@ -2188,32 +2200,32 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, //! Generate the carry-generate shuffle mask. SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) { -SmallVector ShufBytes; + SmallVector ShufBytes; -// Create the shuffle mask for "rotating" the borrow up one register slot -// once the borrow is generated. -ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); -ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); -ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); -ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); -return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); + return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } //! Generate the borrow-generate shuffle mask SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) { -SmallVector ShufBytes; + SmallVector ShufBytes; -// Create the shuffle mask for "rotating" the borrow up one register slot -// once the borrow is generated. -ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); -ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); -ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); -ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); -return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); + return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } //! Lower byte immediate operations for v16i8 vectors: @@ -2372,6 +2384,83 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +//! Lower ISD::FABS +/*! + DAGCombine does the same basic reduction: convert the double to i64 and mask + off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which + CellSPU has to legalize. Hence, the custom lowering. + */ + +static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) { + MVT OpVT = Op.getValueType(); + MVT IntVT(MVT::i64); + SDValue Op0 = Op.getOperand(0); + + assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n"); + + SDValue iABS = + DAG.getNode(ISD::AND, IntVT, + DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0), + DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT)); + + return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS); +} + +//! Lower ISD::SETCC +/*! + This handles MVT::f64 (double floating point) condition lowering + */ + +static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + CondCodeSDNode *CC = dyn_cast (Op.getOperand(2)); + MVT lhsVT = lhs.getValueType(); + SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64); + + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); + + switch (CC->get()) { + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; + abort(); + break; + case ISD::SETO: { + SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs); + SDValue i64lhs = + DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs); + + return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT); + } + case ISD::SETUO: { + SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs); + SDValue i64lhs = + DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs); + + return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE); + } + case ISD::SETUEQ: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUNE: + default: + cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; + abort(); + break; + } + + return SDValue(); +} + //! Lower ISD::SELECT_CC /*! ISD::SELECT_CC can (generally) be implemented directly on the SPU using the @@ -2501,9 +2590,12 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) break; } + case ISD::FABS: + return LowerFABS(Op, DAG); + // Vector-related lowering. case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG); + return SPU::LowerBUILD_VECTOR(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: @@ -2530,6 +2622,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this); + case ISD::SETCC: + return LowerSETCC(Op, DAG, *this); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); } @@ -2656,8 +2751,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } case SPUISD::IndirectAddr: { if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { - ConstantSDNode *CN = cast(N->getOperand(1)); - if (CN->getZExtValue() == 0) { + ConstantSDNode *CN = dyn_cast(N->getOperand(1)); + if (CN != 0 && CN->getZExtValue() == 0) { // (SPUindirect (SPUaform , 0), 0) -> // (SPUaform , 0) @@ -2736,7 +2831,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const break; } } - + // Otherwise, return unchanged. #ifndef NDEBUG if (Result.getNode()) { @@ -2809,41 +2904,18 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, unsigned Depth ) const { #if 0 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8; -#endif switch (Op.getOpcode()) { default: // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); break; - -#if 0 case CALL: case SHUFB: case SHUFFLE_MASK: case CNTB: -#endif - - case SPUISD::PREFSLOT2VEC: { - SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); - unsigned Op0VTBits = Op0VT.getSizeInBits(); - uint64_t InMask = Op0VT.getIntegerVTBitMask(); - KnownZero |= APInt(Op0VTBits, ~InMask, false); - KnownOne |= APInt(Op0VTBits, InMask, false); - break; - } - + case SPUISD::PREFSLOT2VEC: case SPUISD::LDRESULT: - case SPUISD::VEC2PREFSLOT: { - MVT OpVT = Op.getValueType(); - unsigned OpVTBits = OpVT.getSizeInBits(); - uint64_t InMask = OpVT.getIntegerVTBitMask(); - KnownZero |= APInt(OpVTBits, ~InMask, false); - KnownOne |= APInt(OpVTBits, InMask, false); - break; - } - -#if 0 + case SPUISD::VEC2PREFSLOT: case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: case SPUISD::VEC_SHL: @@ -2854,8 +2926,8 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case SPUISD::ROTBYTES_LEFT: case SPUISD::SELECT_MASK: case SPUISD::SELB: -#endif } +#endif } unsigned diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index a98a8f6bbef..079f3ba69ef 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -61,7 +61,7 @@ namespace llvm { }; } - /// Predicates that are used for node matching: + //! Utility functions specific to CellSPU-only: namespace SPU { SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, MVT ValueType); @@ -78,6 +78,7 @@ namespace llvm { SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM); + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); SDValue getBorrowGenerateShufMask(SelectionDAG &DAG); SDValue getCarryGenerateShufMask(SelectionDAG &DAG); diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 5802eb68fee..91d52facada 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -134,6 +134,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, case SPU::ORi64_v2i64: case SPU::ORf32_v4f32: case SPU::ORf64_v2f64: +/* case SPU::ORi128_r64: case SPU::ORi128_f64: case SPU::ORi128_r32: @@ -148,6 +149,8 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, case SPU::ORr16_i128: case SPU::ORr8_i128: case SPU::ORvec_i128: +*/ +/* case SPU::ORr16_r32: case SPU::ORr8_r32: case SPU::ORr32_r16: @@ -158,7 +161,11 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, case SPU::ORr64_r32: case SPU::ORr64_r16: case SPU::ORr64_r8: - { +*/ + case SPU::ORf32_r32: + case SPU::ORr32_f32: + case SPU::ORf64_r64: + case SPU::ORr64_f64: { assert(MI.getNumOperands() == 2 && MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index b639ec254a9..2834a1eb8d9 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1259,9 +1259,6 @@ multiclass BitwiseAnd def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), [/* Intentionally does not match a pattern */]>; - def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB), - [/* Intentionally does not match a pattern */]>; - // Could use v4i32, but won't for clarity def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* Intentionally does not match a pattern */]>; @@ -1408,12 +1405,12 @@ class ORRegInst: // These are effectively no-ops, but need to exist for proper type conversion // and type coercion. -class ORCvtForm +class ORCvtForm pattern = [/* no pattern */]> : SPUInstr { bits<7> RA; bits<7> RT; - let Pattern = [/* no pattern */]; + let Pattern = pattern; let Inst{0-10} = 0b10000010000; let Inst{11-17} = RA; @@ -1427,29 +1424,29 @@ class ORPromoteScalar: class ORExtractElt: ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>; -class ORCvtRegGPRC: - ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; +/* class ORCvtRegGPRC: + ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */ -class ORCvtVecGPRC: - ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; +/* class ORCvtVecGPRC: + ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; */ -class ORCvtGPRCReg: - ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; +/* class ORCvtGPRCReg: + ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */ -class ORCvtFormR32Reg: - ORCvtForm<(outs rclass:$rT), (ins R32C:$rA)>; +class ORCvtFormR32Reg pattern = [ ]>: + ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>; -class ORCvtFormRegR32: - ORCvtForm<(outs R32C:$rT), (ins rclass:$rA)>; +class ORCvtFormRegR32 pattern = [ ]>: + ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>; -class ORCvtFormR64Reg: - ORCvtForm<(outs rclass:$rT), (ins R64C:$rA)>; +class ORCvtFormR64Reg pattern = [ ]>: + ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>; -class ORCvtFormRegR64: - ORCvtForm<(outs R64C:$rT), (ins rclass:$rA)>; +class ORCvtFormRegR64 pattern = [ ]>: + ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>; -class ORCvtGPRCVec: - ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; +/* class ORCvtGPRCVec: + ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; */ multiclass BitwiseOr { @@ -1468,10 +1465,11 @@ multiclass BitwiseOr (v2f64 (bitconvert (or (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)))))]>; - def r64: ORRegInst; - def r32: ORRegInst; - def r16: ORRegInst; - def r8: ORRegInst; + def r128: ORRegInst; + def r64: ORRegInst; + def r32: ORRegInst; + def r16: ORRegInst; + def r8: ORRegInst; // OR instructions used to copy f32 and f64 registers. def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), @@ -1496,6 +1494,7 @@ multiclass BitwiseOr def f32_v4f32: ORExtractElt; def f64_v2f64: ORExtractElt; +/* // Conversion from GPRC to register def i128_r64: ORCvtRegGPRC; def i128_f64: ORCvtRegGPRC; @@ -1517,7 +1516,8 @@ multiclass BitwiseOr // Conversion from vector to GPRC def vec_i128: ORCvtGPRCVec; - +*/ +/* // Conversion from register to R32C: def r16_r32: ORCvtFormRegR32; def r8_r32: ORCvtFormRegR32; @@ -1535,6 +1535,18 @@ multiclass BitwiseOr def r64_r32: ORCvtFormRegR64; def r64_r16: ORCvtFormRegR64; def r64_r8: ORCvtFormRegR64; +*/ + + // bitconvert patterns: + def r32_f32: ORCvtFormR32Reg; + def f32_r32: ORCvtFormRegR32; + + def r64_f64: ORCvtFormR64Reg; + def f64_r64: ORCvtFormRegR64; } defm OR : BitwiseOr; @@ -1960,7 +1972,7 @@ multiclass SelectBits (v4f32 VECREG:$rB), (v4f32 VECREG:$rA)))]>; - // SELBr64_cond is defined further down, look for i64 comparisons + // SELBr64_cond is defined in SPU64InstrInfo.td def r32_cond: SELBRegCondInst; def f32_cond: SELBRegCondInst; def r16_cond: SELBRegCondInst; @@ -2146,14 +2158,6 @@ class SHLHVecInst: [(set (vectype VECREG:$rT), (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>; -// $rB gets promoted to 32-bit register type when confronted with -// this llvm assembly code: -// -// define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) { -// %A = shl i16 %arg1, %arg2 -// ret i16 %A -// } - multiclass ShiftLeftHalfword { def v8i16: SHLHVecInst; @@ -2250,6 +2254,10 @@ class SHLQBIVecInst: [(set (vectype VECREG:$rT), (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>; +class SHLQBIRegInst: + SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + multiclass ShiftLeftQuadByBits { def v16i8: SHLQBIVecInst; @@ -2258,6 +2266,8 @@ multiclass ShiftLeftQuadByBits def v4f32: SHLQBIVecInst; def v2i64: SHLQBIVecInst; def v2f64: SHLQBIVecInst; + + def r128: SHLQBIRegInst; } defm SHLQBI : ShiftLeftQuadByBits; @@ -2335,6 +2345,32 @@ multiclass ShiftLeftQuadBytesImm defm SHLQBYI : ShiftLeftQuadBytesImm; +class SHLQBYBIInst pattern>: + RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB", + RotateShift, pattern>; + +class SHLQBYBIVecInst: + SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern */]>; + +class SHLQBYBIRegInst: + SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass ShiftLeftQuadBytesBitCount +{ + def v16i8: SHLQBYBIVecInst; + def v8i16: SHLQBYBIVecInst; + def v4i32: SHLQBYBIVecInst; + def v4f32: SHLQBYBIVecInst; + def v2i64: SHLQBYBIVecInst; + def v2f64: SHLQBYBIVecInst; + + def r128: SHLQBYBIRegInst; +} + +defm SHLQBYBI : ShiftLeftQuadBytesBitCount; + //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // Rotate halfword: //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ @@ -4285,13 +4321,6 @@ def : Pat<(fabs (v4f32 VECREG:$rA)), (ANDfabsvec (v4f32 VECREG:$rA), (v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; -def : Pat<(fabs R64FP:$rA), - (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>; - -def : Pat<(fabs (v2f64 VECREG:$rA)), - (ANDfabsvec (v2f64 VECREG:$rA), - (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; - //===----------------------------------------------------------------------===// // Hint for branch instructions: //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll index 045bb052989..70220a563d9 100644 --- a/test/CodeGen/CellSPU/fneg-fabs.ll +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -1,9 +1,9 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep fsmbi %t1.s | count 3 +; RUN: grep fsmbi %t1.s | count 2 ; RUN: grep 32768 %t1.s | count 2 ; RUN: grep xor %t1.s | count 4 -; RUN: grep and %t1.s | count 5 -; RUN: grep andbi %t1.s | count 3 +; RUN: grep and %t1.s | count 4 +; RUN: grep andbi %t1.s | count 2 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu"