mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
Forward progress: crtbegin.c now compiles successfully!
Fixed CellSPU's A-form (local store) address mode, so that all globals, externals, constant pool and jump table symbols are now wrapped within a SPUISD::AFormAddr pseudo-instruction. This now identifies all local store memory addresses, although it requires a bit of legerdemain during instruction selection to properly select loads to and stores from local store, properly generating "LQA" instructions. Also added mul_ops.ll test harness for exercising integer multiplication. llvm-svn: 46142
This commit is contained in:
parent
41717f6989
commit
506e61bad1
@ -159,16 +159,38 @@ namespace {
|
||||
int prefslot_byte; /// Byte offset of the "preferred" slot
|
||||
unsigned brcc_eq_ins; /// br_cc equal instruction
|
||||
unsigned brcc_neq_ins; /// br_cc not equal instruction
|
||||
unsigned load_aform; /// A-form load instruction for this VT
|
||||
unsigned store_aform; /// A-form store instruction for this VT
|
||||
};
|
||||
|
||||
const valtype_map_s valtype_map[] = {
|
||||
{ MVT::i1, 0, 3, 0, 0 },
|
||||
{ MVT::i8, 0, 3, 0, 0 },
|
||||
{ MVT::i16, SPU::ORHIr16, 2, SPU::BRHZ, SPU::BRHNZ },
|
||||
{ MVT::i32, SPU::ORIr32, 0, SPU::BRZ, SPU::BRNZ },
|
||||
{ MVT::i64, SPU::ORIr64, 0, 0, 0 },
|
||||
{ MVT::f32, 0, 0, 0, 0 },
|
||||
{ MVT::f64, 0, 0, 0, 0 }
|
||||
{ MVT::i1, 0, 3, 0, 0, 0,
|
||||
0 },
|
||||
{ MVT::i8, SPU::ORBIr8, 3, 0, 0, SPU::LQAr8,
|
||||
SPU::STQAr8 },
|
||||
{ MVT::i16, SPU::ORHIr16, 2, SPU::BRHZ, SPU::BRHNZ, SPU::LQAr16,
|
||||
SPU::STQAr16 },
|
||||
{ MVT::i32, SPU::ORIr32, 0, SPU::BRZ, SPU::BRNZ, SPU::LQAr32,
|
||||
SPU::STQAr32 },
|
||||
{ MVT::i64, SPU::ORIr64, 0, 0, 0, SPU::LQAr64,
|
||||
SPU::STQAr64 },
|
||||
{ MVT::f32, 0, 0, 0, 0, SPU::LQAf32,
|
||||
SPU::STQAf32 },
|
||||
{ MVT::f64, 0, 0, 0, 0, SPU::LQAf64,
|
||||
SPU::STQAf64 },
|
||||
// vector types... (sigh!)
|
||||
{ MVT::v16i8, 0, 0, 0, 0, SPU::LQAv16i8,
|
||||
SPU::STQAv16i8 },
|
||||
{ MVT::v8i16, 0, 0, 0, 0, SPU::LQAv8i16,
|
||||
SPU::STQAv8i16 },
|
||||
{ MVT::v4i32, 0, 0, 0, 0, SPU::LQAv4i32,
|
||||
SPU::STQAv4i32 },
|
||||
{ MVT::v2i64, 0, 0, 0, 0, SPU::LQAv2i64,
|
||||
SPU::STQAv2i64 },
|
||||
{ MVT::v4f32, 0, 0, 0, 0, SPU::LQAv4f32,
|
||||
SPU::STQAv4f32 },
|
||||
{ MVT::v2f64, 0, 0, 0, 0, SPU::LQAv2f64,
|
||||
SPU::STQAv2f64 },
|
||||
};
|
||||
|
||||
const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
|
||||
@ -465,14 +487,6 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
||||
int32_t offset = (int32_t) CN->getSignExtended();
|
||||
unsigned Opc0 = Op0.getOpcode();
|
||||
|
||||
if ((offset & 0xf) != 0) {
|
||||
// Unaligned offset: punt and let X-form address handle it.
|
||||
// NOTE: This really doesn't have to be strictly 16-byte aligned,
|
||||
// since the load/store quadword instructions will implicitly
|
||||
// zero the lower 4 bits of the resulting address.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Opc0 == ISD::FrameIndex) {
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0);
|
||||
DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
|
||||
@ -506,7 +520,8 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
||||
const SDOperand Op0 = N.getOperand(0); // Frame index/base
|
||||
const SDOperand Op1 = N.getOperand(1); // Offset within base
|
||||
|
||||
if (Op0.getOpcode() != SPUISD::XFormAddr) {
|
||||
if (Op0.getOpcode() == ISD::Constant
|
||||
|| Op0.getOpcode() == ISD::TargetConstant) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
|
||||
assert(CN != 0 && "SelectDFormAddr/SPUISD::DFormAddr expecting constant");
|
||||
Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy);
|
||||
@ -523,6 +538,11 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
||||
Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
|
||||
return true;
|
||||
}
|
||||
} else if (Opc == SPUISD::LDRESULT) {
|
||||
// It's a load result dereference
|
||||
Base = CurDAG->getTargetConstant(0, PtrTy);
|
||||
Index = N.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -550,24 +570,9 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
||||
if (Opc == ISD::ADD) {
|
||||
SDOperand N1 = N.getOperand(0);
|
||||
SDOperand N2 = N.getOperand(1);
|
||||
unsigned N1Opc = N1.getOpcode();
|
||||
unsigned N2Opc = N2.getOpcode();
|
||||
|
||||
if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo)
|
||||
|| (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)
|
||||
|| (N1Opc == SPUISD::XFormAddr)) {
|
||||
Base = N.getOperand(0);
|
||||
Index = N.getOperand(1);
|
||||
return true;
|
||||
} else {
|
||||
cerr << "SelectXFormAddr: Unhandled ADD operands:\n";
|
||||
N1.Val->dump();
|
||||
cerr << "\n";
|
||||
N2.Val->dump();
|
||||
cerr << "\n";
|
||||
abort();
|
||||
/*UNREACHED*/
|
||||
}
|
||||
Base = N.getOperand(0);
|
||||
Index = N.getOperand(1);
|
||||
return true;
|
||||
} else if (Opc == SPUISD::XFormAddr) {
|
||||
Base = N;
|
||||
Index = N.getOperand(1);
|
||||
@ -608,6 +613,62 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Emit load for A-form addresses
|
||||
/*
|
||||
*/
|
||||
SDNode *
|
||||
Emit_LOAD_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel)
|
||||
{
|
||||
SDNode *Result;
|
||||
MVT::ValueType OpVT = Op.getValueType();
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
SDOperand Ptr = Op.getOperand(1);
|
||||
SDOperand PtrArg = Ptr.getOperand(0);
|
||||
SDOperand PtrOffs = Ptr.getOperand(1);
|
||||
const valtype_map_s *vtm = getValueTypeMapEntry(OpVT);
|
||||
|
||||
if (PtrOffs.getOpcode() == ISD::Constant) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(PtrOffs);
|
||||
MVT::ValueType PVT = PtrOffs.getValueType();
|
||||
PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT);
|
||||
}
|
||||
ISel.AddToISelQueue(PtrArg);
|
||||
ISel.AddToISelQueue(PtrOffs);
|
||||
ISel.AddToISelQueue(Chain);
|
||||
Result = CurDAG.getTargetNode(vtm->load_aform, OpVT, MVT::Other, PtrArg, PtrOffs, Chain);
|
||||
Chain = SDOperand(Result, 1);
|
||||
return Result;
|
||||
}
|
||||
|
||||
//! Emit store for A-form addresses
|
||||
/*
|
||||
*/
|
||||
SDNode *
|
||||
Emit_STORE_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel)
|
||||
{
|
||||
SDNode *Result;
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
SDOperand Val = Op.getOperand(1);
|
||||
SDOperand Ptr = Op.getOperand(2);
|
||||
SDOperand PtrArg = Ptr.getOperand(0);
|
||||
SDOperand PtrOffs = Ptr.getOperand(1);
|
||||
const valtype_map_s *vtm = getValueTypeMapEntry(Val.getValueType());
|
||||
|
||||
if (PtrOffs.getOpcode() == ISD::Constant) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(PtrOffs);
|
||||
MVT::ValueType PVT = PtrOffs.getValueType();
|
||||
PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT);
|
||||
}
|
||||
ISel.AddToISelQueue(Val);
|
||||
ISel.AddToISelQueue(PtrArg);
|
||||
ISel.AddToISelQueue(PtrOffs);
|
||||
ISel.AddToISelQueue(Chain);
|
||||
SDOperand Ops[4] = { Val, PtrArg, PtrOffs, Chain };
|
||||
Result = CurDAG.getTargetNode(vtm->store_aform, MVT::Other, Ops, 4);
|
||||
Chain = SDOperand(Result, 1);
|
||||
return Result;
|
||||
}
|
||||
|
||||
//! Convert the operand from a target-independent to a target-specific node
|
||||
/*!
|
||||
*/
|
||||
@ -615,6 +676,10 @@ SDNode *
|
||||
SPUDAGToDAGISel::Select(SDOperand Op) {
|
||||
SDNode *N = Op.Val;
|
||||
unsigned Opc = N->getOpcode();
|
||||
int n_ops = -1;
|
||||
unsigned NewOpc;
|
||||
MVT::ValueType OpVT = Op.getValueType();
|
||||
SDOperand Ops[8];
|
||||
|
||||
if (Opc >= ISD::BUILTIN_OP_END && Opc < SPUISD::FIRST_NUMBER) {
|
||||
return NULL; // Already selected.
|
||||
@ -626,9 +691,32 @@ SPUDAGToDAGISel::Select(SDOperand Op) {
|
||||
SDOperand TFI = CurDAG->getTargetFrameIndex(FI, PtrVT);
|
||||
|
||||
DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 <FI>, 0\n");
|
||||
if (N->hasOneUse())
|
||||
return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, Zero);
|
||||
CurDAG->getTargetNode(SPU::AIr32, Op.getValueType(), TFI, Zero);
|
||||
NewOpc = SPU::AIr32;
|
||||
Ops[0] = TFI;
|
||||
Ops[1] = Zero;
|
||||
n_ops = 2;
|
||||
} else if (Opc == ISD::LOAD
|
||||
&& Op.getOperand(1).getOpcode() == SPUISD::AFormAddr) {
|
||||
return Emit_LOAD_AFormAddr(Op, *CurDAG, *this);
|
||||
} else if (Opc == ISD::STORE
|
||||
&& Op.getOperand(2).getOpcode() == SPUISD::AFormAddr) {
|
||||
return Emit_STORE_AFormAddr(Op, *CurDAG, *this);
|
||||
} else if (Opc == ISD::ZERO_EXTEND) {
|
||||
// (zero_extend:i16 (and:i8 <arg>, <const>))
|
||||
const SDOperand &Op1 = N->getOperand(0);
|
||||
|
||||
if (Op.getValueType() == MVT::i16 && Op1.getValueType() == MVT::i8) {
|
||||
if (Op1.getOpcode() == ISD::AND) {
|
||||
// Fold this into a single ANDHI. This is often seen in expansions of i1
|
||||
// to i8, then i8 to i16 in logical/branching operations.
|
||||
DEBUG(cerr << "CellSPU: Coalescing (zero_extend:i16 (and:i8 "
|
||||
"<arg>, <const>))\n");
|
||||
NewOpc = SPU::ANDHI1To2;
|
||||
Ops[0] = Op1.getOperand(0);
|
||||
Ops[1] = Op1.getOperand(1);
|
||||
n_ops = 2;
|
||||
}
|
||||
}
|
||||
} else if (Opc == SPUISD::LDRESULT) {
|
||||
// Custom select instructions for LDRESULT
|
||||
unsigned VT = N->getValueType(0);
|
||||
@ -650,20 +738,54 @@ SPUDAGToDAGISel::Select(SDOperand Op) {
|
||||
Opc = vtm->ldresult_ins;
|
||||
|
||||
AddToISelQueue(Zero);
|
||||
Result = CurDAG->SelectNodeTo(N, Opc, VT, MVT::Other, Arg, Zero, Chain);
|
||||
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Zero, Chain);
|
||||
} else {
|
||||
Result =
|
||||
CurDAG->SelectNodeTo(N, (VT == MVT::f32 ? SPU::ORf32 : SPU::ORf64),
|
||||
MVT::Other, Arg, Arg, Chain);
|
||||
Opc = (VT == MVT::f32 ? SPU::ORf32 : SPU::ORf64);
|
||||
Result = CurDAG->getTargetNode(Opc, MVT::Other, Arg, Arg, Chain);
|
||||
}
|
||||
|
||||
Chain = SDOperand(Result, 1);
|
||||
AddToISelQueue(Chain);
|
||||
|
||||
return Result;
|
||||
} else if (Opc == SPUISD::XFormAddr) {
|
||||
SDOperand Op0 = Op.getOperand(0);
|
||||
if (Op0.getOpcode() == SPUISD::LDRESULT
|
||||
|| Op0.getOpcode() == SPUISD::AFormAddr) {
|
||||
// (XFormAddr (LDRESULT|AFormAddr, imm))
|
||||
SDOperand Op1 = Op.getOperand(1);
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
|
||||
DEBUG(cerr << "CellSPU: XFormAddr("
|
||||
<< (Op0.getOpcode() == SPUISD::LDRESULT
|
||||
? "LDRESULT"
|
||||
: "AFormAddr")
|
||||
<< ", imm):\nOp0 = ");
|
||||
DEBUG(Op.getOperand(0).Val->dump(CurDAG));
|
||||
DEBUG(cerr << "\nOp1 = ");
|
||||
DEBUG(Op.getOperand(1).Val->dump(CurDAG));
|
||||
DEBUG(cerr << "\n");
|
||||
|
||||
if (Op1.getOpcode() == ISD::Constant) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
|
||||
Op1 = CurDAG->getTargetConstant(CN->getValue(), VT);
|
||||
}
|
||||
AddToISelQueue(Op0);
|
||||
AddToISelQueue(Op1);
|
||||
NewOpc = SPU::AIr32;
|
||||
Ops[0] = Op0;
|
||||
Ops[1] = Op1;
|
||||
n_ops = 2;
|
||||
}
|
||||
}
|
||||
|
||||
return SelectCode(Op);
|
||||
if (n_ops > 0) {
|
||||
if (N->hasOneUse())
|
||||
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
|
||||
else
|
||||
return CurDAG->getTargetNode(NewOpc, OpVT, Ops, n_ops);
|
||||
} else
|
||||
return SelectCode(Op);
|
||||
}
|
||||
|
||||
/// createPPCISelDag - This pass converts a legalized DAG into a
|
||||
|
@ -100,6 +100,14 @@ namespace {
|
||||
|| Opc == ISD::TargetExternalSymbol
|
||||
|| Opc == SPUISD::AFormAddr);
|
||||
}
|
||||
|
||||
//! Predicate that returns true if the operand is an indirect target
|
||||
bool isIndirectOperand(const SDOperand &Op)
|
||||
{
|
||||
const unsigned Opc = Op.getOpcode();
|
||||
return (Opc == ISD::Register
|
||||
|| Opc == SPUISD::LDRESULT);
|
||||
}
|
||||
}
|
||||
|
||||
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
@ -126,7 +134,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
|
||||
|
||||
// SPU has no sign or zero extended loads for i1, i8, i16:
|
||||
setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
|
||||
setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
|
||||
setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
|
||||
setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
|
||||
setTruncStoreAction(MVT::i8, MVT::i1, Custom);
|
||||
@ -160,10 +168,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||
setOperationAction(ISD::STORE, sctype, Custom);
|
||||
}
|
||||
|
||||
// SPU supports BRCOND, although DAGCombine will convert BRCONDs
|
||||
// into BR_CCs. BR_CC instructions are custom selected in
|
||||
// SPUDAGToDAGISel.
|
||||
setOperationAction(ISD::BRCOND, MVT::Other, Legal);
|
||||
// Custom lower BRCOND for i1, i8 to "promote" the result to
|
||||
// i32 and i16, respectively.
|
||||
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
||||
|
||||
// Expand the jumptable branches
|
||||
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
||||
@ -472,7 +479,7 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
|
||||
SDOperand Op1 = basePtr.Val->getOperand(1);
|
||||
|
||||
if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
|
||||
const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
|
||||
const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
|
||||
|
||||
alignOffs = (int) CN->getValue();
|
||||
prefSlotOffs = (int) (alignOffs & 0xf);
|
||||
@ -482,15 +489,13 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
|
||||
prefSlotOffs -= vtm->prefslot_byte;
|
||||
basePtr = basePtr.getOperand(0);
|
||||
|
||||
// Modify alignment, since the ADD is likely from getElementPtr:
|
||||
switch (basePtr.getOpcode()) {
|
||||
case ISD::GlobalAddress:
|
||||
case ISD::TargetGlobalAddress: {
|
||||
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
|
||||
const GlobalValue *GV = GN->getGlobal();
|
||||
alignment = GV->getAlignment();
|
||||
break;
|
||||
}
|
||||
// Loading from memory, can we adjust alignment?
|
||||
if (basePtr.getOpcode() == SPUISD::AFormAddr) {
|
||||
SDOperand APtr = basePtr.getOperand(0);
|
||||
if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
|
||||
alignment = GSDN->getGlobal()->getAlignment();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
alignOffs = 0;
|
||||
@ -504,15 +509,9 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
|
||||
if (alignment == 16) {
|
||||
// Realign the base pointer as a D-Form address:
|
||||
if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
|
||||
if (isMemoryOperand(basePtr)) {
|
||||
SDOperand Zero = DAG.getConstant(0, PtrVT);
|
||||
unsigned Opc = (!ST->usingLargeMem()
|
||||
? SPUISD::AFormAddr
|
||||
: SPUISD::XFormAddr);
|
||||
basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
|
||||
}
|
||||
basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
|
||||
basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
|
||||
basePtr = DAG.getNode(ISD::ADD, PtrVT,
|
||||
basePtr,
|
||||
DAG.getConstant((alignOffs & ~0xf), PtrVT));
|
||||
}
|
||||
|
||||
// Emit the vector load:
|
||||
@ -524,7 +523,7 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
|
||||
|
||||
// Unaligned load or we're using the "large memory" model, which means that
|
||||
// we have to be very pessimistic:
|
||||
if (isMemoryOperand(basePtr)) {
|
||||
if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
|
||||
basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
|
||||
}
|
||||
|
||||
@ -551,13 +550,6 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
unsigned alignment = LN->getAlignment();
|
||||
SDOperand Ops[8];
|
||||
|
||||
// For an extending load of an i1 variable, just call it i8 (or whatever we
|
||||
// were passed) and make it zero-extended:
|
||||
if (VT == MVT::i1) {
|
||||
VT = OpVT;
|
||||
ExtType = ISD::ZEXTLOAD;
|
||||
}
|
||||
|
||||
switch (LN->getAddressingMode()) {
|
||||
case ISD::UNINDEXED: {
|
||||
int offset, rotamt;
|
||||
@ -575,15 +567,13 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
if (rotamt != 0 || !was16aligned) {
|
||||
SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
|
||||
|
||||
Ops[0] = the_chain;
|
||||
Ops[1] = result;
|
||||
if (was16aligned) {
|
||||
Ops[0] = the_chain;
|
||||
Ops[1] = result;
|
||||
Ops[2] = DAG.getConstant(rotamt, MVT::i16);
|
||||
} else {
|
||||
MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||
LoadSDNode *LN1 = cast<LoadSDNode>(result);
|
||||
Ops[0] = the_chain;
|
||||
Ops[1] = result;
|
||||
Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
|
||||
DAG.getConstant(rotamt, PtrVT));
|
||||
}
|
||||
@ -628,9 +618,14 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
}
|
||||
|
||||
SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
|
||||
SDOperand retops[2] = { result, the_chain };
|
||||
SDOperand retops[3] = {
|
||||
result,
|
||||
the_chain,
|
||||
DAG.getConstant(alignment, MVT::i32)
|
||||
};
|
||||
|
||||
result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
|
||||
result = DAG.getNode(SPUISD::LDRESULT, retvts,
|
||||
retops, sizeof(retops) / sizeof(retops[0]));
|
||||
return result;
|
||||
}
|
||||
case ISD::PRE_INC:
|
||||
@ -712,6 +707,7 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
DEBUG(cerr << "\n");
|
||||
|
||||
if (basePtr.getOpcode() == SPUISD::DFormAddr) {
|
||||
// Hmmmm... do we ever actually hit this code?
|
||||
insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
|
||||
basePtr.getOperand(0),
|
||||
insertEltOffs);
|
||||
@ -720,6 +716,8 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
&& basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
|
||||
insertEltPtr = basePtr;
|
||||
} else {
|
||||
// $sp is always aligned, so use it instead of potentially loading an
|
||||
// address into a new register:
|
||||
insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
|
||||
DAG.getRegister(SPU::R1, PtrVT),
|
||||
insertEltOffs);
|
||||
@ -766,10 +764,9 @@ LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
if (TM.getRelocationModel() == Reloc::Static) {
|
||||
if (!ST->usingLargeMem()) {
|
||||
// Just return the SDOperand with the constant pool address in it.
|
||||
return CPI;
|
||||
return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
|
||||
} else {
|
||||
#if 1
|
||||
// Generate hi/lo address pair
|
||||
SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
|
||||
SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
|
||||
|
||||
@ -795,7 +792,7 @@ LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
|
||||
if (TM.getRelocationModel() == Reloc::Static) {
|
||||
return (!ST->usingLargeMem()
|
||||
? JTI
|
||||
? DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero)
|
||||
: DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
|
||||
}
|
||||
|
||||
@ -815,7 +812,7 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
|
||||
if (TM.getRelocationModel() == Reloc::Static) {
|
||||
return (!ST->usingLargeMem()
|
||||
? GA
|
||||
? DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero)
|
||||
: DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
|
||||
} else {
|
||||
cerr << "LowerGlobalAddress: Relocation model other than static not "
|
||||
@ -880,6 +877,24 @@ LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
|
||||
return SDOperand();
|
||||
}
|
||||
|
||||
//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
|
||||
static SDOperand
|
||||
LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
|
||||
{
|
||||
SDOperand Cond = Op.getOperand(1);
|
||||
MVT::ValueType CondVT = Cond.getValueType();
|
||||
MVT::ValueType CondNVT;
|
||||
|
||||
if (CondVT == MVT::i1 || CondVT == MVT::i8) {
|
||||
CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
|
||||
return DAG.getNode(ISD::BRCOND, Op.getValueType(),
|
||||
Op.getOperand(0),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
|
||||
Op.getOperand(2));
|
||||
} else
|
||||
return SDOperand(); // Unchanged
|
||||
}
|
||||
|
||||
static SDOperand
|
||||
LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
|
||||
{
|
||||
@ -2458,8 +2473,10 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
|
||||
return LowerConstant(Op, DAG);
|
||||
case ISD::ConstantFP:
|
||||
return LowerConstantFP(Op, DAG);
|
||||
case ISD::BRCOND:
|
||||
return LowerBRCOND(Op, DAG);
|
||||
case ISD::FORMAL_ARGUMENTS:
|
||||
return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
|
||||
return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
|
||||
case ISD::CALL:
|
||||
return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
|
||||
case ISD::RET:
|
||||
@ -2537,48 +2554,16 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
#if 0
|
||||
TargetMachine &TM = getTargetMachine();
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
#endif
|
||||
SDOperand N0 = N->getOperand(0); // everything has at least one operand
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
|
||||
// Look for obvious optimizations for shift left:
|
||||
// a) Replace 0 << V with 0
|
||||
// b) Replace V << 0 with V
|
||||
//
|
||||
// N.B: llvm will generate an undef node if the shift amount is greater than
|
||||
// 15 (e.g.: V << 16), which will naturally trigger an assert.
|
||||
case SPU::SHLIr32:
|
||||
case SPU::SHLHIr16:
|
||||
case SPU::SHLQBIIvec:
|
||||
case SPU::ROTHIr16:
|
||||
case SPU::ROTHIr16_i32:
|
||||
case SPU::ROTIr32:
|
||||
case SPU::ROTIr32_i16:
|
||||
case SPU::ROTQBYIvec:
|
||||
case SPU::ROTQBYBIvec:
|
||||
case SPU::ROTQBIIvec:
|
||||
case SPU::ROTHMIr16:
|
||||
case SPU::ROTMIr32:
|
||||
case SPU::ROTQMBYIvec: {
|
||||
if (N0.getOpcode() == ISD::Constant) {
|
||||
if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
|
||||
if (C->getValue() == 0) // 0 << V -> 0.
|
||||
return N0;
|
||||
}
|
||||
}
|
||||
SDOperand N1 = N->getOperand(1);
|
||||
if (N1.getOpcode() == ISD::Constant) {
|
||||
if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
|
||||
if (C->getValue() == 0) // V << 0 -> V
|
||||
return N1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Do something creative here for ISD nodes that can be coalesced in unique
|
||||
// ways.
|
||||
}
|
||||
#endif
|
||||
|
||||
// Otherwise, return unchanged.
|
||||
return SDOperand();
|
||||
}
|
||||
|
||||
|
@ -1359,6 +1359,9 @@ def ORi8_v16i8:
|
||||
def : Pat<(SPUextract_elt0 (v16i8 VECREG:$rA)),
|
||||
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
||||
|
||||
def : Pat<(SPUextract_elt0_chained (v16i8 VECREG:$rA)),
|
||||
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
||||
|
||||
def ORi16_v8i16:
|
||||
RRForm<0b10000010000, (outs R16C:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
"or\t$rT, $rA, $rB", IntegerOp,
|
||||
@ -2868,6 +2871,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
|
||||
*/
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// brcond predicates:
|
||||
//===----------------------------------------------------------------------===//
|
||||
def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
|
||||
(BRHZ R16C:$rA, bb:$dest)>;
|
||||
def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
|
||||
@ -2876,7 +2882,7 @@ def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
|
||||
def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
|
||||
(BRZ R32C:$rA, bb:$dest)>;
|
||||
def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
|
||||
(BRZ R32C:$rA, bb:$dest)>;
|
||||
(BRNZ R32C:$rA, bb:$dest)>;
|
||||
|
||||
let isTerminator = 1, isBarrier = 1 in {
|
||||
let isReturn = 1 in {
|
||||
@ -2885,23 +2891,6 @@ let isTerminator = 1, isBarrier = 1 in {
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Various brcond predicates:
|
||||
//===----------------------------------------------------------------------===//
|
||||
/*
|
||||
def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
|
||||
(BRZ R32C:$rA, bb:$dest)>;
|
||||
|
||||
def : Pat<(brcond (i32 (seteq R32C:$rA, R32C:$rB)), bb:$dest),
|
||||
(BRNZ (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
|
||||
|
||||
def : Pat<(brcond (i16 (seteq R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
|
||||
(BRHNZ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
|
||||
|
||||
def : Pat<(brcond (i16 (seteq R16C:$rA, R16C:$rB)), bb:$dest),
|
||||
(BRHNZ (CEQHr16 R16C:$rA, R16C:$rB), bb:$dest)>;
|
||||
*/
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Single precision floating point instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3475,21 +3464,20 @@ def : Pat<(i32 (anyext R16C:$rSrc)),
|
||||
// low parts in order to load them into a register.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>;
|
||||
def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>;
|
||||
def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
|
||||
def : Pat<(SPUxform tglobaladdr:$in, 0),
|
||||
(IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
|
||||
def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>;
|
||||
def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>;
|
||||
|
||||
def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
|
||||
def : Pat<(SPUxform tjumptable:$in, 0),
|
||||
(IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
|
||||
def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>;
|
||||
def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>;
|
||||
def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
|
||||
/* def : Pat<(SPUxform tconstpool:$in, 0),
|
||||
(IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; */
|
||||
|
||||
def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>;
|
||||
def : Pat<(SPUlo tconstpool:$in , 0), (ILAlsa tconstpool:$in)>;
|
||||
def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
|
||||
// tblgen bug prevents this from working.
|
||||
// def : Pat<(SPUxform tconstpool:$in, 0),
|
||||
// (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
|
||||
|
||||
// Instrinsics:
|
||||
include "CellSDKIntrinsics.td"
|
||||
|
@ -195,6 +195,10 @@ def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>;
|
||||
// X-Form "$reg($reg)" addresses
|
||||
def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>;
|
||||
|
||||
// Load result node
|
||||
def SPUload_result : SDTypeProfile<1, 3, []>;
|
||||
def SPUldresult : SDNode<"SPUISD::LDRESULT", SPUload_result, [SDNPHasChain]>;
|
||||
|
||||
// SPU 32-bit sign-extension to 64-bits
|
||||
def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>;
|
||||
|
||||
|
@ -1,18 +1,18 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
|
||||
; RUN: grep bisl %t1.s | count 6 &&
|
||||
; RUN: grep bisl %t1.s | count 7 &&
|
||||
; RUN: grep ila %t1.s | count 1 &&
|
||||
; RUN: grep rotqbyi %t1.s | count 4 &&
|
||||
; RUN: grep lqa %t1.s | count 4 &&
|
||||
; RUN: grep lqa %t1.s | count 5 &&
|
||||
; RUN: grep lqd %t1.s | count 6 &&
|
||||
; RUN: grep dispatch_tab %t1.s | count 10
|
||||
; RUN: grep bisl %t2.s | count 6 &&
|
||||
; RUN: grep ilhu %t2.s | count 1 &&
|
||||
; RUN: grep iohl %t2.s | count 1 &&
|
||||
; RUN: grep rotqby %t2.s | count 5 &&
|
||||
; RUN: grep bisl %t2.s | count 7 &&
|
||||
; RUN: grep ilhu %t2.s | count 2 &&
|
||||
; RUN: grep iohl %t2.s | count 2 &&
|
||||
; RUN: grep rotqby %t2.s | count 6 &&
|
||||
; RUN: grep lqd %t2.s | count 12 &&
|
||||
; RUN: grep lqx %t2.s | count 6 &&
|
||||
; RUN: grep il %t2.s | count 7 &&
|
||||
; RUN: grep lqx %t2.s | count 8 &&
|
||||
; RUN: grep il %t2.s | count 9 &&
|
||||
; RUN: grep ai %t2.s | count 5 &&
|
||||
; RUN: grep dispatch_tab %t2.s | count 7
|
||||
|
||||
@ -38,3 +38,13 @@ entry:
|
||||
tail call void %tmp2.5( i32 %i_arg, float %f_arg )
|
||||
ret void
|
||||
}
|
||||
|
||||
@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4
|
||||
@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16
|
||||
|
||||
define void @double_indirect_call() {
|
||||
%a = load void ()*** @ptr.a, align 16
|
||||
%b = load void ()** %a, align 4
|
||||
tail call void %b()
|
||||
ret void
|
||||
}
|
||||
|
90
test/CodeGen/CellSPU/mul_ops.ll
Normal file
90
test/CodeGen/CellSPU/mul_ops.ll
Normal file
@ -0,0 +1,90 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep mpy %t1.s | count 44 &&
|
||||
; RUN: grep mpyu %t1.s | count 4 &&
|
||||
; RUN: grep mpyh %t1.s | count 10 &&
|
||||
; RUN: grep mpyhh %t1.s | count 2 &&
|
||||
; RUN: grep rotma %t1.s | count 12 &&
|
||||
; RUN: grep rotmahi %t1.s | count 4 &&
|
||||
; RUN: grep and %t1.s | count 2 &&
|
||||
; RUN: grep selb %t1.s | count 6 &&
|
||||
; RUN: grep fsmbi %t1.s | count 4 &&
|
||||
; RUN: grep shli %t1.s | count 4 &&
|
||||
; RUN: grep shlhi %t1.s | count 4 &&
|
||||
; RUN: grep ila %t1.s | count 2 &&
|
||||
; RUN: grep xsbh %t1.s | count 8 &&
|
||||
; RUN: grep xshw %t1.s | count 4
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
; 32-bit multiply instruction generation:
|
||||
define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
|
||||
entry:
|
||||
%A = mul <4 x i32> %arg1, %arg2
|
||||
ret <4 x i32> %A
|
||||
}
|
||||
|
||||
define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
|
||||
entry:
|
||||
%A = mul <4 x i32> %arg2, %arg1
|
||||
ret <4 x i32> %A
|
||||
}
|
||||
|
||||
define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
|
||||
entry:
|
||||
%A = mul <8 x i16> %arg1, %arg2
|
||||
ret <8 x i16> %A
|
||||
}
|
||||
|
||||
define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
|
||||
entry:
|
||||
%A = mul <8 x i16> %arg2, %arg1
|
||||
ret <8 x i16> %A
|
||||
}
|
||||
|
||||
define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
|
||||
entry:
|
||||
%A = mul <16 x i8> %arg2, %arg1
|
||||
ret <16 x i8> %A
|
||||
}
|
||||
|
||||
define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
|
||||
entry:
|
||||
%A = mul <16 x i8> %arg1, %arg2
|
||||
ret <16 x i8> %A
|
||||
}
|
||||
|
||||
define i32 @mul_i32_1(i32 %arg1, i32 %arg2) {
|
||||
entry:
|
||||
%A = mul i32 %arg2, %arg1
|
||||
ret i32 %A
|
||||
}
|
||||
|
||||
define i32 @mul_i32_2(i32 %arg1, i32 %arg2) {
|
||||
entry:
|
||||
%A = mul i32 %arg1, %arg2
|
||||
ret i32 %A
|
||||
}
|
||||
|
||||
define i16 @mul_i16_1(i16 %arg1, i16 %arg2) {
|
||||
entry:
|
||||
%A = mul i16 %arg2, %arg1
|
||||
ret i16 %A
|
||||
}
|
||||
|
||||
define i16 @mul_i16_2(i16 %arg1, i16 %arg2) {
|
||||
entry:
|
||||
%A = mul i16 %arg1, %arg2
|
||||
ret i16 %A
|
||||
}
|
||||
|
||||
define i8 @mul_i8_1(i8 %arg1, i8 %arg2) {
|
||||
entry:
|
||||
%A = mul i8 %arg2, %arg1
|
||||
ret i8 %A
|
||||
}
|
||||
|
||||
define i8 @mul_i8_2(i8 %arg1, i8 %arg2) {
|
||||
entry:
|
||||
%A = mul i8 %arg1, %arg2
|
||||
ret i8 %A
|
||||
}
|
@ -16,9 +16,10 @@
|
||||
; RUN: grep rotqbyi %t2.s | count 5 &&
|
||||
; RUN: grep xshw %t2.s | count 1 &&
|
||||
; RUN: grep andi %t2.s | count 4 &&
|
||||
; RUN: grep cbd %t2.s | count 3 &&
|
||||
; RUN: grep chd %t2.s | count 1 &&
|
||||
; RUN: grep cwd %t2.s | count 3 &&
|
||||
; RUN: grep cbx %t2.s | count 3 &&
|
||||
; RUN: grep chx %t2.s | count 1 &&
|
||||
; RUN: grep cwx %t2.s | count 1 &&
|
||||
; RUN: grep cwd %t2.s | count 2 &&
|
||||
; RUN: grep shufb %t2.s | count 7 &&
|
||||
; RUN: grep stqx %t2.s | count 7
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user