mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
CellSPU:
- Fix v2[if]64 vector insertion code before IBM files a bug report. - Ensure that zero (0) offsets relative to $sp don't trip an assert (add $sp, 0 gets legalized to $sp alone, tripping an assert) - Shuffle masks passed to SPUISD::SHUFB are now v16i8 or v4i32 llvm-svn: 60358
This commit is contained in:
parent
ddae8937e6
commit
cf677b5a67
@ -593,8 +593,8 @@ SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
|
||||
&& !SelectDFormAddr(Op, N, Base, Index)) {
|
||||
// If the address is neither A-form or D-form, punt and use an X-form
|
||||
// address:
|
||||
Base = N.getOperand(0);
|
||||
Index = N.getOperand(1);
|
||||
Base = N.getOperand(1);
|
||||
Index = N.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -759,12 +759,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
}
|
||||
|
||||
SDValue insertEltOp =
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr);
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
|
||||
SDValue vectorizeOp =
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
|
||||
|
||||
result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
|
||||
DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
|
||||
result = DAG.getNode(SPUISD::SHUFB, vecVT,
|
||||
vectorizeOp, alignLoadVec,
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
|
||||
|
||||
result = DAG.getStore(the_chain, result, basePtr,
|
||||
LN->getSrcValue(), LN->getSrcValueOffset(),
|
||||
@ -885,10 +886,10 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||
static SDValue
|
||||
LowerConstant(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
|
||||
|
||||
if (VT == MVT::i64) {
|
||||
SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
|
||||
SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
|
||||
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
|
||||
} else {
|
||||
@ -906,15 +907,18 @@ LowerConstant(SDValue Op, SelectionDAG &DAG) {
|
||||
static SDValue
|
||||
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
|
||||
|
||||
assert((FP != 0) &&
|
||||
"LowerConstantFP: Node is not ConstantFPSDNode");
|
||||
|
||||
if (VT == MVT::f64) {
|
||||
ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
|
||||
|
||||
assert((FP != 0) &&
|
||||
"LowerConstantFP: Node is not ConstantFPSDNode");
|
||||
|
||||
uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
|
||||
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
||||
LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
|
||||
SDValue T = DAG.getConstant(dbits, MVT::i64);
|
||||
SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
|
||||
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -1793,7 +1797,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
|
||||
// Copy register's contents as index in SHUFFLE_MASK:
|
||||
SDValue ShufMaskOp =
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(),
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
|
||||
DAG.getTargetConstant(V2Elt, MVT::i32),
|
||||
DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
|
||||
// Use shuffle mask in SHUFB synthetic instruction:
|
||||
@ -1818,7 +1822,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
|
||||
&ResultMask[0], ResultMask.size());
|
||||
&ResultMask[0], ResultMask.size());
|
||||
return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
|
||||
}
|
||||
}
|
||||
@ -2165,7 +2169,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
if (scaleShift > 0) {
|
||||
// Scale the shift factor:
|
||||
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
|
||||
DAG.getConstant(scaleShift, MVT::i32));
|
||||
DAG.getConstant(scaleShift, MVT::i32));
|
||||
}
|
||||
|
||||
vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
|
||||
@ -2209,7 +2213,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||
DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
|
||||
DAG.getNode(SPUISD::SHUFB, VecVT,
|
||||
vecShift, vecShift, replicate));
|
||||
}
|
||||
|
||||
return retval;
|
||||
@ -2225,18 +2230,17 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
|
||||
|
||||
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||
// Use $2 because it's always 16-byte aligned and it's available:
|
||||
SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
|
||||
// Use $sp ($1) because it's always 16-byte aligned and it's available:
|
||||
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
|
||||
DAG.getRegister(SPU::R1, PtrVT),
|
||||
DAG.getConstant(CN->getSExtValue(), PtrVT));
|
||||
SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
|
||||
|
||||
SDValue result =
|
||||
DAG.getNode(SPUISD::SHUFB, VT,
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
|
||||
VecOp,
|
||||
DAG.getNode(SPUISD::SHUFFLE_MASK, VT,
|
||||
DAG.getNode(ISD::ADD, PtrVT,
|
||||
PtrBase,
|
||||
DAG.getConstant(CN->getZExtValue(),
|
||||
PtrVT))));
|
||||
VecOp,
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -2901,8 +2905,10 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
#endif
|
||||
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDValue Op0 = N->getOperand(0); // everything has at least one operand
|
||||
SDValue Result; // Initially, NULL result
|
||||
SDValue Op0 = N->getOperand(0); // everything has at least one operand
|
||||
MVT NodeVT = N->getValueType(0); // The node's value type
|
||||
MVT Op0VT = Op0.getValueType(); // The first operand's result
|
||||
SDValue Result; // Initially, empty result
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
@ -2918,14 +2924,13 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
|
||||
ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
|
||||
SDValue combinedConst =
|
||||
DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
|
||||
Op0.getValueType());
|
||||
DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
|
||||
|
||||
DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
|
||||
<< "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
|
||||
DEBUG(cerr << "With: (SPUindirect <arg>, "
|
||||
<< CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
|
||||
return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
|
||||
return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
|
||||
Op0.getOperand(0), combinedConst);
|
||||
}
|
||||
} else if (isa<ConstantSDNode>(Op0)
|
||||
@ -2938,8 +2943,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
|
||||
ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
|
||||
SDValue combinedConst =
|
||||
DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
|
||||
Op0.getValueType());
|
||||
DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
|
||||
|
||||
DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
|
||||
<< "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
|
||||
@ -2955,8 +2959,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND: {
|
||||
if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT &&
|
||||
N->getValueType(0) == Op0.getValueType()) {
|
||||
if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
|
||||
// (any_extend (SPUextract_elt0 <arg>)) ->
|
||||
// (SPUextract_elt0 <arg>)
|
||||
// Types must match, however...
|
||||
@ -3000,7 +3003,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
if (isa<ConstantSDNode>(Op1)) {
|
||||
// Kill degenerate vector shifts:
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
|
||||
|
||||
if (CN->getZExtValue() == 0) {
|
||||
Result = Op0;
|
||||
}
|
||||
@ -3014,20 +3016,20 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
case ISD::ANY_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::SIGN_EXTEND: {
|
||||
// (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
|
||||
// (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
|
||||
// <arg>
|
||||
// but only if the SPUpromote_scalar and <arg> types match.
|
||||
SDValue Op00 = Op0.getOperand(0);
|
||||
if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
|
||||
SDValue Op000 = Op00.getOperand(0);
|
||||
if (Op000.getValueType() == N->getValueType(0)) {
|
||||
if (Op000.getValueType() == NodeVT) {
|
||||
Result = Op000;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SPUISD::VEC2PREFSLOT: {
|
||||
// (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
|
||||
// (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
|
||||
// <arg>
|
||||
Result = Op0.getOperand(0);
|
||||
break;
|
||||
@ -3037,7 +3039,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||
}
|
||||
}
|
||||
// Otherwise, return unchanged.
|
||||
#ifdef NDEBUG
|
||||
#ifndef NDEBUG
|
||||
if (Result.getNode()) {
|
||||
DEBUG(cerr << "\nReplace.SPU: ");
|
||||
DEBUG(N->dump(&DAG));
|
||||
|
@ -269,52 +269,51 @@ def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
|
||||
// Generate Controls for Insertion:
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CBD :
|
||||
RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cbd\t$rT, $src", ShuffleOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cbd\t$rT, $src", ShuffleOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CBX : RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cbx\t$rT, $src", ShuffleOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CHD : RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"chd\t$rT, $src", ShuffleOp,
|
||||
[(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CHX : RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"chx\t$rT, $src", ShuffleOp,
|
||||
[(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CWD : RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cwd\t$rT, $src", ShuffleOp,
|
||||
[(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CWDf32 : RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cwd\t$rT, $src", ShuffleOp,
|
||||
[(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CWX : RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cwx\t$rT, $src", ShuffleOp,
|
||||
[(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CWXf32 : RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cwd\t$rT, $src", ShuffleOp,
|
||||
[(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cwx\t$rT, $src", ShuffleOp,
|
||||
[(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CDD : RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cdd\t$rT, $src", ShuffleOp,
|
||||
[(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CDDf64 : RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cdd\t$rT, $src", ShuffleOp,
|
||||
[(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CDX : RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cdx\t$rT, $src", ShuffleOp,
|
||||
[(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CDXf64 : RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
"cdd\t$rT, $src", ShuffleOp,
|
||||
[(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cdx\t$rT, $src", ShuffleOp,
|
||||
[(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
@ -1786,46 +1785,33 @@ class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
|
||||
IntegerOp, pattern>;
|
||||
|
||||
class SHUFBVecInst<ValueType vectype>:
|
||||
class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
|
||||
SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
||||
[(set (vectype VECREG:$rT), (SPUshuffle (vectype VECREG:$rA),
|
||||
(vectype VECREG:$rB),
|
||||
(vectype VECREG:$rC)))]>;
|
||||
|
||||
// It's this pattern that's probably the most useful, since SPUISelLowering
|
||||
// methods create a v16i8 vector for $rC:
|
||||
class SHUFBVecPat1<ValueType vectype, ValueType masktype, SPUInstr inst>:
|
||||
Pat<(SPUshuffle (vectype VECREG:$rA), (vectype VECREG:$rB),
|
||||
(masktype VECREG:$rC)),
|
||||
(inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
|
||||
[(set (resultvec VECREG:$rT),
|
||||
(SPUshuffle (resultvec VECREG:$rA),
|
||||
(resultvec VECREG:$rB),
|
||||
(maskvec VECREG:$rC)))]>;
|
||||
|
||||
multiclass ShuffleBytes
|
||||
{
|
||||
def v16i8 : SHUFBVecInst<v16i8>;
|
||||
def v8i16 : SHUFBVecInst<v8i16>;
|
||||
def v4i32 : SHUFBVecInst<v4i32>;
|
||||
def v2i64 : SHUFBVecInst<v2i64>;
|
||||
def v16i8 : SHUFBVecInst<v16i8, v16i8>;
|
||||
def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
|
||||
def v8i16 : SHUFBVecInst<v8i16, v16i8>;
|
||||
def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
|
||||
def v4i32 : SHUFBVecInst<v4i32, v16i8>;
|
||||
def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
|
||||
def v2i64 : SHUFBVecInst<v2i64, v16i8>;
|
||||
def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
|
||||
|
||||
def v4f32 : SHUFBVecInst<v4f32>;
|
||||
def v2f64 : SHUFBVecInst<v2f64>;
|
||||
def v4f32 : SHUFBVecInst<v4f32, v16i8>;
|
||||
def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
|
||||
|
||||
def v2f64 : SHUFBVecInst<v2f64, v16i8>;
|
||||
def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
|
||||
}
|
||||
|
||||
defm SHUFB : ShuffleBytes;
|
||||
|
||||
// Shuffle mask is a v16i8 vector
|
||||
def : SHUFBVecPat1<v8i16, v16i8, SHUFBv16i8>;
|
||||
def : SHUFBVecPat1<v4i32, v16i8, SHUFBv16i8>;
|
||||
def : SHUFBVecPat1<v2i64, v16i8, SHUFBv16i8>;
|
||||
def : SHUFBVecPat1<v4f32, v16i8, SHUFBv16i8>;
|
||||
def : SHUFBVecPat1<v2f64, v16i8, SHUFBv16i8>;
|
||||
|
||||
// Shuffle mask is a v4i32 vector:
|
||||
def : SHUFBVecPat1<v16i8, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v8i16, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v2i64, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v4f32, v4i32, SHUFBv4i32>;
|
||||
def : SHUFBVecPat1<v2f64, v4i32, SHUFBv4i32>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Shift and rotate group:
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1,12 +1,12 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep cbd %t1.s | count 3
|
||||
; RUN: grep chd %t1.s | count 3
|
||||
; RUN: grep cwd %t1.s | count 6
|
||||
; RUN: grep il %t1.s | count 4
|
||||
; RUN: grep ilh %t1.s | count 3
|
||||
; RUN: grep cbd %t1.s | count 5
|
||||
; RUN: grep chd %t1.s | count 5
|
||||
; RUN: grep cwd %t1.s | count 10
|
||||
; RUN: grep il %t1.s | count 15
|
||||
; RUN: grep ilh %t1.s | count 10
|
||||
; RUN: grep iohl %t1.s | count 1
|
||||
; RUN: grep ilhu %t1.s | count 1
|
||||
; RUN: grep shufb %t1.s | count 12
|
||||
; RUN: grep ilhu %t1.s | count 4
|
||||
; RUN: grep shufb %t1.s | count 26
|
||||
; RUN: grep 17219 %t1.s | count 1
|
||||
; RUN: grep 22598 %t1.s | count 1
|
||||
; RUN: grep -- -39 %t1.s | count 1
|
||||
@ -51,3 +51,70 @@ entry:
|
||||
%tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
|
||||
ret <4 x i32> %tmp1.2
|
||||
}
|
||||
|
||||
define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind {
|
||||
entry:
|
||||
%arrayidx = getelementptr <16 x i8>* %a, i32 %i
|
||||
%tmp2 = load <16 x i8>* %arrayidx
|
||||
%tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1
|
||||
%tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11
|
||||
store <16 x i8> %tmp8, <16 x i8>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind {
|
||||
entry:
|
||||
%arrayidx = getelementptr <8 x i16>* %a, i32 %i
|
||||
%tmp2 = load <8 x i16>* %arrayidx
|
||||
%tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1
|
||||
%tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6
|
||||
store <8 x i16> %tmp8, <8 x i16>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind {
|
||||
entry:
|
||||
%arrayidx = getelementptr <4 x i32>* %a, i32 %i
|
||||
%tmp2 = load <4 x i32>* %arrayidx
|
||||
%tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1
|
||||
%tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2
|
||||
store <4 x i32> %tmp8, <4 x i32>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind {
|
||||
entry:
|
||||
%arrayidx = getelementptr <4 x float>* %a, i32 %i
|
||||
%tmp2 = load <4 x float>* %arrayidx
|
||||
%tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1
|
||||
%tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2
|
||||
store <4 x float> %tmp8, <4 x float>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind {
|
||||
entry:
|
||||
%arrayidx = getelementptr <2 x i64>* %a, i32 %i
|
||||
%tmp2 = load <2 x i64>* %arrayidx
|
||||
%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0
|
||||
store <2 x i64> %tmp3, <2 x i64>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind {
|
||||
entry:
|
||||
%arrayidx = getelementptr <2 x i64>* %a, i32 %i
|
||||
%tmp2 = load <2 x i64>* %arrayidx
|
||||
%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1
|
||||
store <2 x i64> %tmp3, <2 x i64>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind {
|
||||
entry:
|
||||
%arrayidx = getelementptr <2 x double>* %a, i32 %i
|
||||
%tmp2 = load <2 x double>* %arrayidx
|
||||
%tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1
|
||||
store <2 x double> %tmp3, <2 x double>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user