1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

Add necessary 64-bit support so that gcc frontend compiles (mostly). Current

issue is operand promotion for setcc/select... but looks like the fundamental
stuff is implemented for CellSPU.

llvm-svn: 51884
This commit is contained in:
Scott Michel 2008-06-02 22:18:03 +00:00
parent 5a9c2a3434
commit 5323d58281
7 changed files with 402 additions and 141 deletions

View File

@ -4441,11 +4441,20 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
break;
}
case ISD::SELECT:
case ISD::SELECT: {
Tmp2 = PromoteOp(Node->getOperand(1)); // Legalize the op0
Tmp3 = PromoteOp(Node->getOperand(2)); // Legalize the op1
unsigned VT2 = Tmp2.getValueType();
assert(VT2 == Tmp3.getValueType()
&& "PromoteOp: Operands 2 and 3 ValueTypes don't match");
// Ensure tha NVT is the same as the operands' value types, because we
// cannot assume that TLI.getSetCCValueType() is constant.
if (NVT != VT2)
NVT = VT2;
Result = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), Tmp2, Tmp3);
break;
}
case ISD::SELECT_CC:
Tmp2 = PromoteOp(Node->getOperand(2)); // True
Tmp3 = PromoteOp(Node->getOperand(3)); // False

View File

@ -219,8 +219,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Custom lower i32 multiplications
setOperationAction(ISD::MUL, MVT::i32, Custom);
// Need to custom handle (some) common i8 math ops
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
setOperationAction(ISD::SUB, MVT::i64, Custom);
setOperationAction(ISD::MUL, MVT::i8, Custom);
// SPU does not have BSWAP. It does have i32 support CTLZ.
@ -238,7 +240,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
// SPU has a version of select that implements (a&~c)|(b|c), just like
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i8, Legal);
@ -427,8 +429,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
"SPUISD::ROTBYTES_LEFT_CHAINED";
node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
"SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
@ -1706,33 +1714,33 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
}
for (int i = 0; i < 4; ++i) {
uint64_t val = 0;
for (int j = 0; j < 4; ++j) {
SDOperand V;
bool process_upper, process_lower;
uint64_t val = 0;
val <<= 8;
process_upper = (upper_special && (i & 1) == 0);
process_lower = (lower_special && (i & 1) == 1);
if (process_upper || process_lower) {
if ((process_upper && upper == 0)
|| (process_lower && lower == 0))
val = 0x80;
val |= 0x80;
else if ((process_upper && upper == 0xffffffff)
|| (process_lower && lower == 0xffffffff))
val = 0xc0;
val |= 0xc0;
else if ((process_upper && upper == 0x80000000)
|| (process_lower && lower == 0x80000000))
val = (j == 0 ? 0xe0 : 0x80);
val |= (j == 0 ? 0xe0 : 0x80);
} else
val = i * 4 + j + ((i & 1) * 16);
ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
val |= i * 4 + j + ((i & 1) * 16);
}
ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
}
return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
}
}
@ -1904,7 +1912,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
// c) Use SELB to select upper and lower halves from the intermediate results
//
// NOTE: We really want to move the FSMBI to earlier to actually get the
// NOTE: We really want to move the SELECT_MASK to earlier to actually get the
// dual-issue. This code does manage to do this, even if it's a little on
// the wacky side
case MVT::v8i16: {
@ -1918,7 +1926,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
SDOperand FSMBOp =
DAG.getCopyToReg(Chain, FSMBIreg,
DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
DAG.getConstant(0xcccc, MVT::i16)));
SDOperand HHProd =
@ -1962,7 +1970,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
SDOperand FSMBmask = DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
DAG.getConstant(0x2222, MVT::i16));
SDOperand LoProdParts =
@ -2293,6 +2301,64 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
DAG.getConstant(4, MVT::i32))));
}
case ISD::ADD: {
// Turn operands into vectors to satisfy type checking (shufb works on
// vectors)
SDOperand Op0 =
DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
SDOperand Op1 =
DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
SmallVector<SDOperand, 16> ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
SDOperand CarryGen =
DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
SDOperand ShiftedCarry =
DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
CarryGen, CarryGen,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedCarry));
}
case ISD::SUB: {
// Turn operands into vectors to satisfy type checking (shufb works on
// vectors)
SDOperand Op0 =
DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
SDOperand Op1 =
DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
SmallVector<SDOperand, 16> ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
SDOperand BorrowGen =
DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
SDOperand ShiftedBorrow =
DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
BorrowGen, BorrowGen,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedBorrow));
}
case ISD::SHL: {
SDOperand ShiftAmt = Op.getOperand(1);
unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
@ -2301,7 +2367,7 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
DAG.getNode(SPUISD::SELB, VecVT,
Op0Vec,
DAG.getConstant(0, VecVT),
DAG.getNode(SPUISD::FSMBI, VecVT,
DAG.getNode(SPUISD::SELECT_MASK, VecVT,
DAG.getConstant(0xff00ULL, MVT::i16)));
SDOperand ShiftAmtBytes =
DAG.getNode(ISD::SRL, ShiftAmtVT,
@ -2337,6 +2403,43 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
Op0, ShiftAmtBytes),
ShiftAmtBits);
}
case ISD::SRA: {
// Promote Op0 to vector
SDOperand Op0 =
DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
SDOperand ShiftAmt = Op.getOperand(1);
unsigned ShiftVT = ShiftAmt.getValueType();
// Negate variable shift amounts
if (!isa<ConstantSDNode>(ShiftAmt)) {
ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
DAG.getConstant(0, ShiftVT), ShiftAmt);
}
SDOperand UpperHalfSign =
DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
Op0, DAG.getConstant(31, MVT::i32))));
SDOperand UpperHalfSignMask =
DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
SDOperand UpperLowerMask =
DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
DAG.getConstant(0xff00, MVT::i16));
SDOperand UpperLowerSelect =
DAG.getNode(SPUISD::SELB, MVT::v2i64,
UpperHalfSignMask, Op0, UpperLowerMask);
SDOperand RotateLeftBytes =
DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
UpperLowerSelect, ShiftAmt);
SDOperand RotateLeftBits =
DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
RotateLeftBytes, ShiftAmt);
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
RotateLeftBits);
}
}
return SDOperand();
@ -2567,17 +2670,19 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND:
case ISD::ADD:
case ISD::SUB:
case ISD::ROTR:
case ISD::ROTL:
case ISD::SRL:
case ISD::SHL:
case ISD::SRA:
case ISD::SRA: {
if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc);
else if (VT == MVT::i64)
return LowerI64Math(Op, DAG, Opc);
break;
}
// Vector-related lowering.
case ISD::BUILD_VECTOR:
@ -2641,9 +2746,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case ISD::ADD: {
SDOperand Op1 = N->getOperand(1);
if ((Op1.getOpcode() == ISD::Constant
|| Op1.getOpcode() == ISD::TargetConstant)
&& Op0.getOpcode() == SPUISD::IndirectAddr) {
if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
SDOperand Op01 = Op0.getOperand(1);
if (Op01.getOpcode() == ISD::Constant
|| Op01.getOpcode() == ISD::TargetConstant) {
@ -2662,8 +2765,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
Op0.getOperand(0), combinedConst);
}
} else if ((Op0.getOpcode() == ISD::Constant
|| Op0.getOpcode() == ISD::TargetConstant)
} else if (isa<ConstantSDNode>(Op0)
&& Op1.getOpcode() == SPUISD::IndirectAddr) {
SDOperand Op11 = Op1.getOperand(1);
if (Op11.getOpcode() == ISD::Constant
@ -2899,11 +3001,11 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
case SPUISD::ROTBYTES_RIGHT_S:
case SPUISD::ROTBYTES_LEFT:
case SPUISD::ROTBYTES_LEFT_CHAINED:
case FSMBI:
case SELB:
case FPInterp:
case FPRecipEst:
case SEXT32TO64:
case SPUISD::SELECT_MASK:
case SPUISD::SELB:
case SPUISD::FPInterp:
case SPUISD::FPRecipEst:
case SPUISD::SEXT32TO64:
#endif
}
}

View File

@ -62,8 +62,13 @@ namespace llvm {
ROTBYTES_RIGHT_S, ///< Vector rotate right, by bytes, sign fill
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain
FSMBI, ///< Form Select Mask for Bytes, Immediate
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
ADD_EXTENDED, ///< Add extended, with carry
CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
SUB_EXTENDED, ///< Subtract extended, with borrow
BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
FPInterp, ///< Floating point interpolate
FPRecipEst, ///< Floating point reciprocal estimate
SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits

View File

@ -469,7 +469,7 @@ class FSMBIVec<ValueType vectype>:
RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
"fsmbi\t$rT, $val",
SelectOp,
[(set (vectype VECREG:$rT), (SPUfsmbi (i16 immU16:$val)))]>;
[(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
multiclass FormSelectMaskBytesImm
{
@ -485,21 +485,37 @@ defm FSMBI : FormSelectMaskBytesImm;
def FSMB:
RRForm_1<0b01101101100, (outs VECREG:$rT), (ins R16C:$rA),
"fsmb\t$rT, $rA", SelectOp,
[(set (v16i8 VECREG:$rT), (SPUfsmbi R16C:$rA))]>;
[(set (v16i8 VECREG:$rT), (SPUselmask R16C:$rA))]>;
// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
// only 8-bits wide (even though it's input as 16-bits here)
def FSMH:
RRForm_1<0b10101101100, (outs VECREG:$rT), (ins R16C:$rA),
"fsmh\t$rT, $rA", SelectOp,
[(set (v8i16 VECREG:$rT), (SPUfsmbi R16C:$rA))]>;
[(set (v8i16 VECREG:$rT), (SPUselmask R16C:$rA))]>;
// fsm: Form select mask for words. Like the other fsm* instructions,
// only the lower 4 bits of $rA are significant.
def FSM:
RRForm_1<0b00101101100, (outs VECREG:$rT), (ins R16C:$rA),
"fsm\t$rT, $rA", SelectOp,
[(set (v4i32 VECREG:$rT), (SPUfsmbi R16C:$rA))]>;
class FSMInst<ValueType vectype, RegisterClass rclass>:
RRForm_1<0b00101101100, (outs VECREG:$rT), (ins rclass:$rA),
"fsm\t$rT, $rA",
SelectOp,
[(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
multiclass FormSelectMaskWord {
def r32 : FSMInst<v4i32, R32C>;
def r16 : FSMInst<v4i32, R16C>;
}
defm FSM : FormSelectMaskWord;
// Special case when used for i64 math operations
multiclass FormSelectMaskWord64 {
def r32 : FSMInst<v2i64, R32C>;
def r16 : FSMInst<v2i64, R16C>;
}
defm FSM64 : FormSelectMaskWord64;
//===----------------------------------------------------------------------===//
// Integer and Logical Operations:
@ -545,7 +561,7 @@ def Ar32:
def Ar8:
RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
"a\t$rT, $rA, $rB", IntegerOp,
[(set R8C:$rT, (add R8C:$rA, R8C:$rB))]>;
[/* no pattern */]>;
def AIvec:
RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
@ -600,42 +616,125 @@ def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
[(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
// ADDX: only available in vector form, doesn't match a pattern.
def ADDXvec:
RRForm<0b00000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
VECREG:$rCarry),
"addx\t$rT, $rA, $rB", IntegerOp,
[]>,
class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00000010110, OOL, IOL,
"addx\t$rT, $rA, $rB",
IntegerOp, pattern>;
class ADDXVecInst<ValueType vectype>:
ADDXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
[(set (vectype VECREG:$rT),
(SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB),
(vectype VECREG:$rCarry)))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
// CG: only available in vector form, doesn't match a pattern.
def CGvec:
RRForm<0b01000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
VECREG:$rCarry),
"cg\t$rT, $rA, $rB", IntegerOp,
[]>,
class ADDXRegInst<RegisterClass rclass>:
ADDXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
[(set rclass:$rT,
(SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
// SFX: only available in vector form, doesn't match a pattern
def SFXvec:
RRForm<0b10000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
VECREG:$rCarry),
"sfx\t$rT, $rA, $rB", IntegerOp,
[]>,
multiclass AddExtended {
def v2i64 : ADDXVecInst<v2i64>;
def v4i32 : ADDXVecInst<v4i32>;
def r64 : ADDXRegInst<R64C>;
def r32 : ADDXRegInst<R32C>;
}
defm ADDX : AddExtended;
// CG: Generate carry for add
class CGInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01000011000, OOL, IOL,
"cg\t$rT, $rA, $rB",
IntegerOp, pattern>;
class CGVecInst<ValueType vectype>:
CGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
(SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
class CGRegInst<RegisterClass rclass>:
CGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
[(set rclass:$rT,
(SPUcarry_gen rclass:$rA, rclass:$rB))]>;
multiclass CarryGenerate {
def v2i64 : CGVecInst<v2i64>;
def v4i32 : CGVecInst<v4i32>;
def r64 : CGRegInst<R64C>;
def r32 : CGRegInst<R32C>;
}
defm CG : CarryGenerate;
// SFX: Subract from, extended. This is used in conjunction with BG to subtract
// with carry (borrow, in this case)
class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10000010110, OOL, IOL,
"sfx\t$rT, $rA, $rB",
IntegerOp, pattern>;
class SFXVecInst<ValueType vectype>:
SFXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
[(set (vectype VECREG:$rT),
(SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB),
(vectype VECREG:$rCarry)))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class SFXRegInst<RegisterClass rclass>:
SFXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
[(set rclass:$rT,
(SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
multiclass SubtractExtended {
def v2i64 : SFXVecInst<v2i64>;
def v4i32 : SFXVecInst<v4i32>;
def r64 : SFXRegInst<R64C>;
def r32 : SFXRegInst<R32C>;
}
defm SFX : SubtractExtended;
// BG: only available in vector form, doesn't match a pattern.
def BGvec:
RRForm<0b01000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
VECREG:$rCarry),
"bg\t$rT, $rA, $rB", IntegerOp,
[]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class BGInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01000010000, OOL, IOL,
"bg\t$rT, $rA, $rB",
IntegerOp, pattern>;
// BGX: only available in vector form, doesn't match a pattern.
class BGVecInst<ValueType vectype>:
BGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
(SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
class BGRegInst<RegisterClass rclass>:
BGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
[(set rclass:$rT,
(SPUborrow_gen rclass:$rA, rclass:$rB))]>;
multiclass BorrowGenerate {
def v4i32 : BGVecInst<v4i32>;
def v2i64 : BGVecInst<v2i64>;
def r64 : BGRegInst<R64C>;
def r32 : BGRegInst<R32C>;
}
defm BG : BorrowGenerate;
// BGX: Borrow generate, extended.
def BGXvec:
RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
VECREG:$rCarry),
@ -817,17 +916,17 @@ def CLZr32:
def CNTBv16i8:
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
[(set (v16i8 VECREG:$rT), (SPUcntb_v16i8 (v16i8 VECREG:$rA)))]>;
[(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
def CNTBv8i16 :
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
[(set (v8i16 VECREG:$rT), (SPUcntb_v8i16 (v8i16 VECREG:$rA)))]>;
[(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
def CNTBv4i32 :
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
[(set (v4i32 VECREG:$rT), (SPUcntb_v4i32 (v4i32 VECREG:$rA)))]>;
[(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
// gbb: Gather all low order bits from each byte in $rA into a single 16-bit
// quantity stored into $rT
@ -869,31 +968,38 @@ def SUMB:
[]>;
// Sign extension operations:
def XSBHvec:
RRForm_1<0b01101101010, (outs VECREG:$rDst), (ins VECREG:$rSrc),
"xsbh\t$rDst, $rSrc", IntegerOp,
[(set (v8i16 VECREG:$rDst), (sext (v16i8 VECREG:$rSrc)))]>;
class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm_1<0b01101101010, OOL, IOL,
"xsbh\t$rDst, $rSrc",
IntegerOp, pattern>;
// Ordinary form for XSBH
def XSBHr16:
RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R16C:$rSrc),
"xsbh\t$rDst, $rSrc", IntegerOp,
[(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
class XSBHVecInst<ValueType vectype>:
XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
[(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
class XSBHRegInst<RegisterClass rclass>:
XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
[(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>;
multiclass ExtendByteHalfword {
def v16i8: XSBHVecInst<v8i16>;
def r16: XSBHRegInst<R16C>;
// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
// pattern below). Intentionally doesn't match a pattern because we want the
// sext 8->32 pattern to do the work for us, namely because we need the extra
// XSHWr32.
def r32: XSBHRegInst<R32C>;
}
defm XSBH : ExtendByteHalfword;
// Sign-extend, but take an 8-bit register to a 16-bit register (not done as
// sext_inreg)
def XSBHr8:
RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R8C:$rSrc),
"xsbh\t$rDst, $rSrc", IntegerOp,
[(set R16C:$rDst, (sext R8C:$rSrc))]>;
// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
// pattern below). Intentionally doesn't match a pattern because we want the
// sext 8->32 pattern to do the work for us, namely because we need the extra
// XSHWr32.
def XSBHr32:
RRForm_1<0b01101101010, (outs R32C:$rDst), (ins R32C:$rSrc),
"xsbh\t$rDst, $rSrc", IntegerOp,
[(set R32C:$rDst, (sext_inreg R32C:$rSrc, i8))]>;
XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
[(set R16C:$rDst, (sext R8C:$rSrc))]>;
// Sign extend halfwords to words:
def XSHWvec:
@ -1658,9 +1764,9 @@ class SHUFBVecInst<ValueType vectype>:
// It's this pattern that's probably the most useful, since SPUISelLowering
// methods create a v16i8 vector for $rC:
class SHUFBVecPat1<ValueType vectype, SPUInstr inst>:
class SHUFBVecPat1<ValueType vectype, ValueType masktype, SPUInstr inst>:
Pat<(SPUshuffle (vectype VECREG:$rA), (vectype VECREG:$rB),
(v16i8 VECREG:$rC)),
(masktype VECREG:$rC)),
(inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
multiclass ShuffleBytes
@ -1676,11 +1782,19 @@ multiclass ShuffleBytes
defm SHUFB : ShuffleBytes;
def : SHUFBVecPat1<v8i16, SHUFBv16i8>;
def : SHUFBVecPat1<v4i32, SHUFBv16i8>;
def : SHUFBVecPat1<v2i64, SHUFBv16i8>;
def : SHUFBVecPat1<v4f32, SHUFBv16i8>;
def : SHUFBVecPat1<v2f64, SHUFBv16i8>;
// Shuffle mask is a v16i8 vector
def : SHUFBVecPat1<v8i16, v16i8, SHUFBv16i8>;
def : SHUFBVecPat1<v4i32, v16i8, SHUFBv16i8>;
def : SHUFBVecPat1<v2i64, v16i8, SHUFBv16i8>;
def : SHUFBVecPat1<v4f32, v16i8, SHUFBv16i8>;
def : SHUFBVecPat1<v2f64, v16i8, SHUFBv16i8>;
// Shuffle mask is a v4i32 vector:
def : SHUFBVecPat1<v8i16, v4i32, SHUFBv4i32>;
def : SHUFBVecPat1<v4i32, v4i32, SHUFBv4i32>;
def : SHUFBVecPat1<v2i64, v4i32, SHUFBv4i32>;
def : SHUFBVecPat1<v4f32, v4i32, SHUFBv4i32>;
def : SHUFBVecPat1<v2f64, v4i32, SHUFBv4i32>;
//===----------------------------------------------------------------------===//
// Shift and rotate group:
@ -2079,10 +2193,24 @@ def : Pat<(SPUrotbytes_left_chained (v2i64 VECREG:$rA), (i16 uimm7:$val)),
(ROTQBYIv2i64 VECREG:$rA, uimm7:$val)>;
// See ROTQBY note above.
def ROTQBYBIvec:
RI7Form<0b00110011100, (outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
"rotqbybi\t$rT, $rA, $val", RotateShift,
[/* intrinsic */]>;
class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00110011100, OOL, IOL,
"rotqbybi\t$rT, $rA, $shift",
RotateShift, pattern>;
class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
[(set (vectype VECREG:$rT),
(SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
multiclass RotateQuadByBytesByBitshift {
def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
}
defm ROTQBYBI : RotateQuadByBytesByBitshift;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// See ROTQBY note above.
@ -2358,7 +2486,6 @@ multiclass RotateQuadBytesImm
defm ROTQMBYI : RotateQuadBytesImm;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate right and mask by bit count
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@ -2545,25 +2672,28 @@ def : Pat<(sra R32C:$rA, R8C:$rB),
(ROTMAr32 R32C:$rA,
(SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
def ROTMAIv4i32:
RRForm<0b01011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
"rotmai\t$rT, $rA, $val", RotateShift,
[(set (v4i32 VECREG:$rT),
(SPUvec_sra VECREG:$rA, (i32 uimm7:$val)))]>;
class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011110000, OOL, IOL,
"rotmai\t$rT, $rA, $val",
RotateShift, pattern>;
def : Pat<(SPUvec_sra VECREG:$rA, (i16 uimm7:$val)),
(ROTMAIv4i32 VECREG:$rA, uimm7:$val)>;
class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
[(set (vectype VECREG:$rT),
(SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
def ROTMAIr32:
RRForm<0b01011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
"rotmai\t$rT, $rA, $val", RotateShift,
[(set R32C:$rT, (sra R32C:$rA, (i32 uimm7:$val)))]>;
class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
[(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
def : Pat<(sra R32C:$rA, (i16 uimm7:$val)),
(ROTMAIr32 R32C:$rA, uimm7:$val)>;
multiclass RotateMaskAlgebraicImm {
def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
}
def : Pat<(sra R32C:$rA, (i8 uimm7:$val)),
(ROTMAIr32 R32C:$rA, uimm7:$val)>;
defm ROTMAI : RotateMaskAlgebraicImm;
//===----------------------------------------------------------------------===//
// Branch and conditionals:

View File

@ -36,29 +36,25 @@ def SDT_SPUshuffle : SDTypeProfile<1, 3, [
]>;
// Unary, binary v16i8 operator type constraints:
def SPUv16i8_unop: SDTypeProfile<1, 1, [
SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>]>;
def SPUv16i8_binop: SDTypeProfile<1, 2, [
SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
// Binary v8i16 operator type constraints:
def SPUv8i16_unop: SDTypeProfile<1, 1, [
SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>]>;
def SPUv8i16_binop: SDTypeProfile<1, 2, [
SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
// Binary v4i32 operator type constraints:
def SPUv4i32_unop: SDTypeProfile<1, 1, [
SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>]>;
def SPUv4i32_binop: SDTypeProfile<1, 2, [
SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
// FSMBI type constraints: There are several variations for the various
// Trinary operators, e.g., addx, carry generate
def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
]>;
// SELECT_MASK type constraints: There are several variations for the various
// vector types (this avoids having to bit_convert all over the place.)
def SPUfsmbi_type: SDTypeProfile<1, 1, [
def SPUselmask_type: SDTypeProfile<1, 1, [
SDTCisInt<1>
]>;
@ -74,10 +70,16 @@ def SPUvecshift_type: SDTypeProfile<1, 2, [
// Synthetic/pseudo-instructions
//===----------------------------------------------------------------------===//
/// Add extended, carry generate:
def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>;
def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>;
// Subtract extended, borrow generate
def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>;
def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>;
// SPU CNTB:
def SPUcntb_v16i8: SDNode<"SPUISD::CNTB", SPUv16i8_unop, []>;
def SPUcntb_v8i16: SDNode<"SPUISD::CNTB", SPUv8i16_unop, []>;
def SPUcntb_v4i32: SDNode<"SPUISD::CNTB", SPUv4i32_unop, []>;
def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
// SPUISelLowering.h):
@ -122,14 +124,23 @@ def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
def SPUrotbytes_right_sfill: SDNode<"SPUISD::ROTBYTES_RIGHT_S",
SPUvecshift_type, []>;
// Vector rotate left, bits shifted out of the left are rotated in on the right
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
SPUvecshift_type, []>;
// Same as above, but the node also has a chain associated (used in loads and
// stores)
def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED",
SPUvecshift_type, [SDNPHasChain]>;
// Vector rotate left by bytes, but the count is given in bits and the SPU
// internally converts it to bytes (saves an instruction to mask off lower
// three bits)
def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
SPUvecshift_type>;
// SPU form select mask for bytes, immediate
def SPUfsmbi: SDNode<"SPUISD::FSMBI", SPUfsmbi_type, []>;
def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
// SPU select bits instruction
def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;

View File

@ -559,6 +559,10 @@ def rotNeg7imm_i16 : Operand<i16> {
let PrintMethod = "printROTNeg7Imm";
}
def rotNeg7imm_i8 : Operand<i8> {
let PrintMethod = "printROTNeg7Imm";
}
def target : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
}

View File

@ -1,16 +1,16 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep lqa %t1.s | count 13
; RUN: grep il %t1.s | count 22
; RUN: grep shufb %t1.s | count 13
; RUN: grep 65520 %t1.s | count 1
; RUN: grep 43981 %t1.s | count 1
; RUN: grep 13702 %t1.s | count 1
; RUN: grep 81 %t1.s | count 2
; RUN: grep 28225 %t1.s | count 1
; RUN: grep 30720 %t1.s | count 1
; RUN: grep 192 %t1.s | count 32
; RUN: grep 128 %t1.s | count 30
; RUN: grep 224 %t1.s | count 2
; RUN: grep lqa %t1.s | count 13
; RUN: grep il %t1.s | count 22
; RUN: grep shufb %t1.s | count 13
; RUN: grep 65520 %t1.s | count 1
; RUN: grep 43981 %t1.s | count 1
; RUN: grep 13702 %t1.s | count 1
; RUN: grep 28225 %t1.s | count 1
; RUN: grep 30720 %t1.s | count 1
; RUN: grep 3233857728 %t1.s | count 8
; RUN: grep 2155905152 %t1.s | count 6
; RUN: grep 66051 %t1.s | count 7
; RUN: grep 471670303 %t1.s | count 11
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"