From 54f7f6d67fff9c4a6f866c6457594f15105c1416 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Tue, 6 Jan 2009 23:10:38 +0000 Subject: [PATCH] CellSPU: - Add preliminary support for v2i32; load/store generates the right code but there's a lot work to be done to make this vector type operational. llvm-svn: 61829 --- lib/Target/CellSPU/SPUCallingConv.td | 13 ++++---- lib/Target/CellSPU/SPUISelLowering.cpp | 42 ++++++-------------------- lib/Target/CellSPU/SPUISelLowering.h | 2 -- lib/Target/CellSPU/SPUInstrInfo.td | 26 +++++++++++----- lib/Target/CellSPU/SPUNodes.td | 10 ------ lib/Target/CellSPU/SPURegisterInfo.td | 4 ++- lib/Target/CellSPU/SPUSubtarget.h | 2 +- 7 files changed, 38 insertions(+), 61 deletions(-) diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index 5213e424523..10dc837d90b 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -21,13 +21,14 @@ class CCIfSubtarget // Return-value convention for Cell SPU: Everything can be passed back via $3: def RetCC_SPU : CallingConv<[ - CCIfType<[i8], CCAssignToReg<[R3]>>, - CCIfType<[i16], CCAssignToReg<[R3]>>, - CCIfType<[i32], CCAssignToReg<[R3]>>, - CCIfType<[i64], CCAssignToReg<[R3]>>, - CCIfType<[i128], CCAssignToReg<[R3]>>, + CCIfType<[i8], CCAssignToReg<[R3]>>, + CCIfType<[i16], CCAssignToReg<[R3]>>, + CCIfType<[i32], CCAssignToReg<[R3]>>, + CCIfType<[i64], CCAssignToReg<[R3]>>, + CCIfType<[i128], CCAssignToReg<[R3]>>, CCIfType<[f32, f64], CCAssignToReg<[R3]>>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>> + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>, + CCIfType<[v2i32], CCAssignToReg<[R3]>> ]>; diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 7e63a872883..9dd98558509 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -327,6 +327,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + // "Odd size" vector classes that we're willing to support: + addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; @@ -417,7 +420,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; - node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; } std::map::iterator i = node_names.find(Opcode); @@ -1029,8 +1031,7 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); } -static -SDValue +static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { CallSDNode *TheCall = cast(Op.getNode()); SDValue Chain = TheCall->getChain(); @@ -1618,6 +1619,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T); } + case MVT::v2i32: { + unsigned int Value = SplatBits; + SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T); + } case MVT::v2i64: { uint64_t val = SplatBits; uint32_t upper = uint32_t(val >> 32); @@ -2454,32 +2460,6 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -//! Lower ISD::SETCC -/*! - Lower i64 condition code handling. - */ - -static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - SDValue lhs = Op.getOperand(0); - SDValue rhs = Op.getOperand(1); - SDValue condition = Op.getOperand(2); - - if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) { - // Expand the i64 comparisons to what Cell can actually support, - // which is eq, ugt and sgt: -#if 0 - CondCodeSDNode *ccvalue = dyn_cast(condition); - - switch (ccvalue->get()) { - case - } -#endif - } - - return SDValue(); -} - //! Lower ISD::SELECT_CC /*! ISD::SELECT_CC can (generally) be implemented directly on the SPU using the @@ -2647,9 +2627,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); - - case ISD::SETCC: - return LowerSETCC(Op, DAG); } return SDValue(); @@ -2971,7 +2948,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case SPUISD::ROTBYTES_LEFT: case SPUISD::SELECT_MASK: case SPUISD::SELB: - case SPUISD::SEXT32TO64: #endif } } diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index feee6fd55c1..24b8f82ecbe 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -52,12 +52,10 @@ namespace llvm { ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) SELB, ///< Select bits -> (b & mask) | (a & ~mask) - GATHER_BITS, ///< Gather bits from bytes/words/halfwords ADD_EXTENDED, ///< Add extended, with carry CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED SUB_EXTENDED, ///< Subtract extended, with borrow BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED - SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits LAST_SPUISD ///< Last user-defined instruction }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 6a0fde398b8..b9956402d95 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -71,6 +71,8 @@ let canFoldAsLoad = 1 in { def v4f32: LoadDFormVec; def v2f64: LoadDFormVec; + def v2i32: LoadDFormVec; + def r128: LoadDForm; def r64: LoadDForm; def r32: LoadDForm; @@ -103,6 +105,8 @@ let canFoldAsLoad = 1 in { def v4f32: LoadAFormVec; def v2f64: LoadAFormVec; + def v2i32: LoadAFormVec; + def r128: LoadAForm; def r64: LoadAForm; def r32: LoadAForm; @@ -135,6 +139,8 @@ let canFoldAsLoad = 1 in { def v4f32: LoadXFormVec; def v2f64: LoadXFormVec; + def v2i32: LoadXFormVec; + def r128: LoadXForm; def r64: LoadXForm; def r32: LoadXForm; @@ -183,6 +189,8 @@ multiclass StoreDForms def v4f32: StoreDFormVec; def v2f64: StoreDFormVec; + def v2i32: StoreDFormVec; + def r128: StoreDForm; def r64: StoreDForm; def r32: StoreDForm; @@ -213,6 +221,8 @@ multiclass StoreAForms def v4f32: StoreAFormVec; def v2f64: StoreAFormVec; + def v2i32: StoreAFormVec; + def r128: StoreAForm; def r64: StoreAForm; def r32: StoreAForm; @@ -245,6 +255,8 @@ multiclass StoreXForms def v4f32: StoreXFormVec; def v2f64: StoreXFormVec; + def v2i32: StoreXFormVec; + def r128: StoreXForm; def r64: StoreXForm; def r32: StoreXForm; @@ -1044,11 +1056,11 @@ class GBBInst pattern>: class GBBRegInst: GBBInst<(outs rclass:$rT), (ins VECREG:$rA), - [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>; + [/* no pattern */]>; class GBBVecInst: GBBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), (SPUgatherbits (vectype VECREG:$rA)))]>; + [/* no pattern */]>; multiclass GatherBitsFromBytes { def v16i8_r32: GBBRegInst; @@ -1070,12 +1082,11 @@ class GBHInst pattern>: class GBHRegInst: GBHInst<(outs rclass:$rT), (ins VECREG:$rA), - [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>; + [/* no pattern */]>; class GBHVecInst: GBHInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), - (SPUgatherbits (vectype VECREG:$rA)))]>; + [/* no pattern */]>; multiclass GatherBitsHalfword { def v8i16_r32: GBHRegInst; @@ -1097,12 +1108,11 @@ class GBInst pattern>: class GBRegInst: GBInst<(outs rclass:$rT), (ins VECREG:$rA), - [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>; + [/* no pattern */]>; class GBVecInst: GBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), - (SPUgatherbits (vectype VECREG:$rA)))]>; + [/* no pattern */]>; multiclass GatherBitsWord { def v4i32_r32: GBRegInst; diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 0725d72e0f3..cae6023cd45 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -61,13 +61,6 @@ def SPUselb_type: SDTypeProfile<1, 3, [ def SPUvecshift_type: SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; -// SPU gather bits: -// This instruction looks at each vector (word|halfword|byte) slot's low bit -// and forms a mask in the low order bits of the first word's preferred slot. -def SPUgatherbits_type: SDTypeProfile<1, 1, [ - /* no type constraints defined */ -]>; - //===----------------------------------------------------------------------===// // Synthetic/pseudo-instructions //===----------------------------------------------------------------------===// @@ -115,9 +108,6 @@ def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; // SPU select bits instruction def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; -// SPU gather bits instruction: -def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>; - def SDTprefslot2vec: SDTypeProfile<1, 1, []>; def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td index 8ce6bc84e7a..bb88f2bf9a2 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -393,7 +393,9 @@ def R8C : RegisterClass<"SPU", [i8], 128, } // The SPU's registers as vector registers: -def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, +def VECREG : RegisterClass<"SPU", + [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64], + 128, [ /* volatile register */ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h index 51dd44c5f9d..695ec94fee2 100644 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -85,7 +85,7 @@ namespace llvm { /// properties of this subtarget. const char *getTargetDataString() const { return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" - "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v128:128:128" + "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128" "-s:128:128"; } };