mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
CellSPU:
- Fix fabs, fneg for f32 and f64. - Use BuildVectorSDNode.isConstantSplat, now that the functionality exists - Continue to improve i64 constant lowering. Lower certain special constants to the constant pool when they correspond to SPU's shufb instruction's special mask values. This avoids the overhead of performing a shuffle on a zero-filled vector just to get the special constant when the memory load suffices. llvm-svn: 67067
This commit is contained in:
parent
b04be1838d
commit
2c4ac99ef8
@ -200,11 +200,38 @@ namespace {
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
//! Generate the carry-generate shuffle mask.
|
||||
SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
|
||||
SmallVector<SDValue, 16 > ShufBytes;
|
||||
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
}
|
||||
|
||||
namespace {
|
||||
//! Generate the borrow-generate shuffle mask
|
||||
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
|
||||
SmallVector<SDValue, 16 > ShufBytes;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
}
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
/// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
|
||||
/// instructions for SelectionDAG operations.
|
||||
///
|
||||
@ -248,6 +275,7 @@ public:
|
||||
|
||||
SDNode *emitBuildVector(SDValue build_vec) {
|
||||
MVT vecVT = build_vec.getValueType();
|
||||
MVT eltVT = vecVT.getVectorElementType();
|
||||
SDNode *bvNode = build_vec.getNode();
|
||||
DebugLoc dl = bvNode->getDebugLoc();
|
||||
|
||||
@ -300,7 +328,10 @@ public:
|
||||
SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
|
||||
|
||||
//! Emit the necessary sequence for loading i64 constants:
|
||||
SDNode *SelectI64Constant(SDValue &Op, MVT OpVT);
|
||||
SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl);
|
||||
|
||||
//! Alternate instruction emit sequence for loading i64 constants
|
||||
SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl);
|
||||
|
||||
//! Returns true if the address N is an A-form (local store) address
|
||||
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
|
||||
@ -375,7 +406,6 @@ public:
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "SPUGenDAGISel.inc"
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
/// InstructionSelect - This callback is invoked by
|
||||
@ -689,7 +719,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
// Catch the i64 constants that end up here. Note: The backend doesn't
|
||||
// attempt to legalize the constant (it's useless because DAGCombiner
|
||||
// will insert 64-bit constants and we can't stop it).
|
||||
return SelectI64Constant(Op, OpVT);
|
||||
return SelectI64Constant(Op, OpVT, Op.getDebugLoc());
|
||||
} else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
|
||||
&& OpVT == MVT::i64) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
@ -747,21 +777,21 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
zextShuffle));
|
||||
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl));
|
||||
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG, dl));
|
||||
emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl));
|
||||
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
@ -813,6 +843,54 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
if (OpVT == MVT::i64) {
|
||||
return SelectSRAi64(Op, OpVT);
|
||||
}
|
||||
} else if (Opc == ISD::FNEG
|
||||
&& (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
// Check if the pattern is a special form of DFNMS:
|
||||
// (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
if (Op0.getOpcode() == ISD::FSUB) {
|
||||
SDValue Op00 = Op0.getOperand(0);
|
||||
if (Op00.getOpcode() == ISD::FMUL) {
|
||||
unsigned Opc = SPU::DFNMSf64;
|
||||
if (OpVT == MVT::v2f64)
|
||||
Opc = SPU::DFNMSv2f64;
|
||||
|
||||
return CurDAG->getTargetNode(Opc, dl, OpVT,
|
||||
Op00.getOperand(0),
|
||||
Op00.getOperand(1),
|
||||
Op0.getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
|
||||
SDNode *signMask = 0;
|
||||
unsigned Opc = SPU::ORfneg64;
|
||||
|
||||
if (OpVT == MVT::f64) {
|
||||
signMask = SelectI64Constant(negConst, MVT::i64, dl);
|
||||
} else if (OpVT == MVT::v2f64) {
|
||||
Opc = SPU::ORfnegvec;
|
||||
signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::v2i64,
|
||||
negConst, negConst));
|
||||
}
|
||||
|
||||
return CurDAG->getTargetNode(Opc, dl, OpVT,
|
||||
Op.getOperand(0), SDValue(signMask, 0));
|
||||
} else if (Opc == ISD::FABS) {
|
||||
if (OpVT == MVT::f64) {
|
||||
SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
|
||||
return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT,
|
||||
Op.getOperand(0), SDValue(signMask, 0));
|
||||
} else if (OpVT == MVT::v2f64) {
|
||||
SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
|
||||
SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
|
||||
absConst, absConst);
|
||||
SDNode *signMask = emitBuildVector(absVec);
|
||||
return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT,
|
||||
Op.getOperand(0), SDValue(signMask, 0));
|
||||
}
|
||||
} else if (Opc == SPUISD::LDRESULT) {
|
||||
// Custom select instructions for LDRESULT
|
||||
MVT VT = N->getValueType(0);
|
||||
@ -1087,13 +1165,17 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
|
||||
/*!
|
||||
Do the necessary magic necessary to load a i64 constant
|
||||
*/
|
||||
SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
|
||||
SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT,
|
||||
DebugLoc dl) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
|
||||
// Currently there's no DL on the input, but won't hurt to pretend.
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
|
||||
}
|
||||
|
||||
SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
|
||||
DebugLoc dl) {
|
||||
MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
|
||||
SDValue i64vec =
|
||||
SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue(), dl);
|
||||
SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
|
||||
|
||||
// Here's where it gets interesting, because we have to parse out the
|
||||
// subtree handed back in i64vec:
|
||||
@ -1145,6 +1227,9 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
|
||||
|
||||
return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
|
||||
SDValue(shufNode, 0));
|
||||
} else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
|
||||
return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
|
||||
SDValue(emitBuildVector(i64vec), 0));
|
||||
} else {
|
||||
cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
|
||||
abort();
|
||||
|
@ -1,5 +1,5 @@
|
||||
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
|
||||
//
|
||||
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
@ -1353,7 +1353,7 @@ getVecImm(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
return 0; // All UNDEF: use implicit def.; not Constant node
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// get_vec_i18imm - Test if this vector is a vector filled with the same value
|
||||
@ -1480,131 +1480,30 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// If this is a vector of constants or undefs, get the bits. A bit in
|
||||
// UndefBits is set if the corresponding element of the vector is an
|
||||
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
|
||||
// zero. Return true if this is not an array of constants, false if it is.
|
||||
//
|
||||
static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
|
||||
uint64_t UndefBits[2]) {
|
||||
// Start with zero'd results.
|
||||
VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
|
||||
|
||||
unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
|
||||
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
|
||||
SDValue OpVal = BV->getOperand(i);
|
||||
|
||||
unsigned PartNo = i >= e/2; // In the upper 128 bits?
|
||||
unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
|
||||
|
||||
uint64_t EltBits = 0;
|
||||
if (OpVal.getOpcode() == ISD::UNDEF) {
|
||||
uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
|
||||
UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
|
||||
continue;
|
||||
} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
|
||||
EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
|
||||
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
|
||||
const APFloat &apf = CN->getValueAPF();
|
||||
EltBits = (CN->getValueType(0) == MVT::f32
|
||||
? FloatToBits(apf.convertToFloat())
|
||||
: DoubleToBits(apf.convertToDouble()));
|
||||
} else {
|
||||
// Nonconstant element.
|
||||
return true;
|
||||
}
|
||||
|
||||
VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
|
||||
}
|
||||
|
||||
//printf("%llx %llx %llx %llx\n",
|
||||
// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
|
||||
return false;
|
||||
}
|
||||
|
||||
/// If this is a splat (repetition) of a value across the whole vector, return
|
||||
/// the smallest size that splats it. For example, "0x01010101010101..." is a
|
||||
/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
|
||||
/// SplatSize = 1 byte.
|
||||
static bool isConstantSplat(const uint64_t Bits128[2],
|
||||
const uint64_t Undef128[2],
|
||||
int MinSplatBits,
|
||||
uint64_t &SplatBits, uint64_t &SplatUndef,
|
||||
int &SplatSize) {
|
||||
// Don't let undefs prevent splats from matching. See if the top 64-bits are
|
||||
// the same as the lower 64-bits, ignoring undefs.
|
||||
uint64_t Bits64 = Bits128[0] | Bits128[1];
|
||||
uint64_t Undef64 = Undef128[0] & Undef128[1];
|
||||
uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
|
||||
uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
|
||||
uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
|
||||
uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
|
||||
|
||||
if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
|
||||
if (MinSplatBits < 64) {
|
||||
|
||||
// Check that the top 32-bits are the same as the lower 32-bits, ignoring
|
||||
// undefs.
|
||||
if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
|
||||
if (MinSplatBits < 32) {
|
||||
|
||||
// If the top 16-bits are different than the lower 16-bits, ignoring
|
||||
// undefs, we have an i32 splat.
|
||||
if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
|
||||
if (MinSplatBits < 16) {
|
||||
// If the top 8-bits are different than the lower 8-bits, ignoring
|
||||
// undefs, we have an i16 splat.
|
||||
if ((Bits16 & (uint16_t(~Undef16) >> 8))
|
||||
== ((Bits16 >> 8) & ~Undef16)) {
|
||||
// Otherwise, we have an 8-bit splat.
|
||||
SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
|
||||
SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
|
||||
SplatSize = 1;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
SplatBits = Bits16;
|
||||
SplatUndef = Undef16;
|
||||
SplatSize = 2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SplatBits = Bits32;
|
||||
SplatUndef = Undef32;
|
||||
SplatSize = 4;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SplatBits = Bits128[0];
|
||||
SplatUndef = Undef128[0];
|
||||
SplatSize = 8;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false; // Can't be a splat if two pieces don't match.
|
||||
}
|
||||
|
||||
//! Lower a BUILD_VECTOR instruction creatively:
|
||||
SDValue
|
||||
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
// If this is a vector of constants or undefs, get the bits. A bit in
|
||||
// UndefBits is set if the corresponding element of the vector is an
|
||||
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
|
||||
// zero.
|
||||
uint64_t VectorBits[2];
|
||||
uint64_t UndefBits[2];
|
||||
uint64_t SplatBits, SplatUndef;
|
||||
int SplatSize;
|
||||
if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
|
||||
|| !isConstantSplat(VectorBits, UndefBits,
|
||||
VT.getVectorElementType().getSizeInBits(),
|
||||
SplatBits, SplatUndef, SplatSize))
|
||||
return SDValue(); // Not a constant vector, not a splat.
|
||||
BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
|
||||
assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
|
||||
unsigned minSplatBits = EltVT.getSizeInBits();
|
||||
|
||||
if (minSplatBits < 16)
|
||||
minSplatBits = 16;
|
||||
|
||||
APInt APSplatBits, APSplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
|
||||
if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
||||
HasAnyUndefs, minSplatBits)
|
||||
|| minSplatBits < SplatBitSize)
|
||||
return SDValue(); // Wasn't a constant vector or splat exceeded min
|
||||
|
||||
uint64_t SplatBits = APSplatBits.getZExtValue();
|
||||
unsigned SplatSize = SplatBitSize / 8;
|
||||
|
||||
switch (VT.getSimpleVT()) {
|
||||
default:
|
||||
@ -1620,8 +1519,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
|
||||
SDValue T = DAG.getConstant(Value32, MVT::i32);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::v4i32, T, T, T, T));
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T, T, T, T));
|
||||
break;
|
||||
}
|
||||
case MVT::v2f64: {
|
||||
@ -1636,44 +1534,41 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
case MVT::v16i8: {
|
||||
// 8-bit constants have to be expanded to 16-bits
|
||||
unsigned short Value16 = SplatBits | (SplatBits << 8);
|
||||
SDValue Ops[8];
|
||||
for (int i = 0; i < 8; ++i)
|
||||
Ops[i] = DAG.getConstant(Value16, MVT::i16);
|
||||
unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
|
||||
Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, Ops, 8));
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
|
||||
}
|
||||
case MVT::v8i16: {
|
||||
unsigned short Value16;
|
||||
if (SplatSize == 2)
|
||||
Value16 = (unsigned short) (SplatBits & 0xffff);
|
||||
else
|
||||
Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
|
||||
SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
|
||||
SDValue Ops[8];
|
||||
for (int i = 0; i < 8; ++i) Ops[i] = T;
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops, 8);
|
||||
unsigned short Value16 = SplatBits;
|
||||
SDValue T = DAG.getConstant(Value16, EltVT);
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
|
||||
Ops.assign(8, T);
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
|
||||
}
|
||||
case MVT::v4i32: {
|
||||
unsigned int Value = SplatBits;
|
||||
SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
|
||||
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
|
||||
}
|
||||
case MVT::v2i32: {
|
||||
unsigned int Value = SplatBits;
|
||||
SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
|
||||
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
|
||||
}
|
||||
case MVT::v2i64: {
|
||||
return SPU::LowerSplat_v2i64(VT, DAG, SplatBits, dl);
|
||||
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/*!
|
||||
*/
|
||||
SDValue
|
||||
SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
|
||||
SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
|
||||
DebugLoc dl) {
|
||||
uint32_t upper = uint32_t(SplatVal >> 32);
|
||||
uint32_t lower = uint32_t(SplatVal);
|
||||
@ -1685,10 +1580,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
Val, Val, Val, Val));
|
||||
} else {
|
||||
SDValue LO32;
|
||||
SDValue HI32;
|
||||
SmallVector<SDValue, 16> ShufBytes;
|
||||
SDValue Result;
|
||||
bool upper_special, lower_special;
|
||||
|
||||
// NOTE: This code creates common-case shuffle masks that can be easily
|
||||
@ -1699,6 +1590,18 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
|
||||
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
|
||||
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
|
||||
|
||||
// Both upper and lower are special, lower to a constant pool load:
|
||||
if (lower_special && upper_special) {
|
||||
SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
|
||||
SplatValCN, SplatValCN);
|
||||
}
|
||||
|
||||
SDValue LO32;
|
||||
SDValue HI32;
|
||||
SmallVector<SDValue, 16> ShufBytes;
|
||||
SDValue Result;
|
||||
|
||||
// Create lower vector if not a special pattern
|
||||
if (!lower_special) {
|
||||
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
|
||||
@ -1721,13 +1624,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
|
||||
LO32 = HI32;
|
||||
if (upper_special)
|
||||
HI32 = LO32;
|
||||
if (lower_special && upper_special) {
|
||||
// Unhappy situation... both upper and lower are special, so punt with
|
||||
// a target constant:
|
||||
SDValue Zero = DAG.getConstant(0, MVT::i32);
|
||||
HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Zero, Zero,
|
||||
Zero, Zero);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
uint64_t val = 0;
|
||||
@ -2022,9 +1918,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
ShufMask[i] = DAG.getConstant(bits, MVT::i32);
|
||||
}
|
||||
|
||||
SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
&ShufMask[0],
|
||||
sizeof(ShufMask) / sizeof(ShufMask[0]));
|
||||
SDValue ShufMaskVec =
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
&ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
|
||||
|
||||
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
|
||||
DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
|
||||
@ -2067,21 +1963,21 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||
/*NOTREACHED*/
|
||||
case MVT::i8: {
|
||||
SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
|
||||
factor, factor);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
factor, factor, factor, factor);
|
||||
break;
|
||||
}
|
||||
case MVT::i16: {
|
||||
SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
|
||||
factor, factor);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
factor, factor, factor, factor);
|
||||
break;
|
||||
}
|
||||
case MVT::i32:
|
||||
case MVT::f32: {
|
||||
SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
|
||||
factor, factor);
|
||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
factor, factor, factor, factor);
|
||||
break;
|
||||
}
|
||||
case MVT::i64:
|
||||
@ -2164,71 +2060,65 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
|
||||
case ISD::ROTR:
|
||||
case ISD::ROTL: {
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
unsigned N1Opc;
|
||||
N0 = (N0.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
|
||||
MVT::i16));
|
||||
N1Opc = N1.getValueType().bitsLT(ShiftVT)
|
||||
MVT N1VT = N1.getValueType();
|
||||
|
||||
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
|
||||
if (!N1VT.bitsEq(ShiftVT)) {
|
||||
unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
|
||||
? ISD::ZERO_EXTEND
|
||||
: ISD::TRUNCATE;
|
||||
N1 = (N1.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(N1Opc, dl, ShiftVT, N1)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
|
||||
TLI.getShiftAmountTy()));
|
||||
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
|
||||
}
|
||||
|
||||
// Replicate lower 8-bits into upper 8:
|
||||
SDValue ExpandArg =
|
||||
DAG.getNode(ISD::OR, dl, MVT::i16, N0,
|
||||
DAG.getNode(ISD::SHL, dl, MVT::i16,
|
||||
N0, DAG.getConstant(8, MVT::i32)));
|
||||
|
||||
// Truncate back down to i8
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
|
||||
DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
|
||||
}
|
||||
case ISD::SRL:
|
||||
case ISD::SHL: {
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
unsigned N1Opc;
|
||||
N0 = (N0.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
|
||||
MVT::i32));
|
||||
N1Opc = N1.getValueType().bitsLT(ShiftVT)
|
||||
? ISD::ZERO_EXTEND
|
||||
: ISD::TRUNCATE;
|
||||
N1 = (N1.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(N1Opc, dl, ShiftVT, N1)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
|
||||
MVT N1VT = N1.getValueType();
|
||||
|
||||
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
|
||||
if (!N1VT.bitsEq(ShiftVT)) {
|
||||
unsigned N1Opc = ISD::ZERO_EXTEND;
|
||||
|
||||
if (N1.getValueType().bitsGT(ShiftVT))
|
||||
N1Opc = ISD::TRUNCATE;
|
||||
|
||||
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
|
||||
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
|
||||
}
|
||||
case ISD::SRA: {
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
unsigned N1Opc;
|
||||
N0 = (N0.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
|
||||
MVT::i16));
|
||||
N1Opc = N1.getValueType().bitsLT(ShiftVT)
|
||||
? ISD::SIGN_EXTEND
|
||||
: ISD::TRUNCATE;
|
||||
N1 = (N1.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(N1Opc, dl, ShiftVT, N1)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
|
||||
ShiftVT));
|
||||
MVT N1VT = N1.getValueType();
|
||||
|
||||
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
|
||||
if (!N1VT.bitsEq(ShiftVT)) {
|
||||
unsigned N1Opc = ISD::SIGN_EXTEND;
|
||||
|
||||
if (N1VT.bitsGT(ShiftVT))
|
||||
N1Opc = ISD::TRUNCATE;
|
||||
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
|
||||
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
|
||||
}
|
||||
case ISD::MUL: {
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
unsigned N1Opc;
|
||||
N0 = (N0.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
|
||||
MVT::i16));
|
||||
N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
|
||||
N1 = (N1.getOpcode() != ISD::Constant
|
||||
? DAG.getNode(N1Opc, dl, MVT::i16, N1)
|
||||
: DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
|
||||
MVT::i16));
|
||||
|
||||
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
|
||||
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
|
||||
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
|
||||
break;
|
||||
@ -2238,36 +2128,6 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Generate the carry-generate shuffle mask.
|
||||
SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
|
||||
SmallVector<SDValue, 16 > ShufBytes;
|
||||
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
}
|
||||
|
||||
//! Generate the borrow-generate shuffle mask
|
||||
SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
|
||||
SmallVector<SDValue, 16 > ShufBytes;
|
||||
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
}
|
||||
|
||||
//! Lower byte immediate operations for v16i8 vectors:
|
||||
static SDValue
|
||||
LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
|
||||
@ -2291,26 +2151,24 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
|
||||
uint64_t VectorBits[2];
|
||||
uint64_t UndefBits[2];
|
||||
uint64_t SplatBits, SplatUndef;
|
||||
int SplatSize;
|
||||
BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
|
||||
assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
|
||||
|
||||
if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
|
||||
&& isConstantSplat(VectorBits, UndefBits,
|
||||
VT.getVectorElementType().getSizeInBits(),
|
||||
SplatBits, SplatUndef, SplatSize)) {
|
||||
SDValue tcVec[16];
|
||||
APInt APSplatBits, APSplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
|
||||
|
||||
if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
||||
HasAnyUndefs, minSplatBits)
|
||||
&& minSplatBits <= SplatBitSize) {
|
||||
uint64_t SplatBits = APSplatBits.getZExtValue();
|
||||
SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
|
||||
const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
|
||||
|
||||
// Turn the BUILD_VECTOR into a set of target constants:
|
||||
for (size_t i = 0; i < tcVecSize; ++i)
|
||||
tcVec[i] = tc;
|
||||
|
||||
SmallVector<SDValue, 16> tcVec;
|
||||
tcVec.assign(16, tc);
|
||||
return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
|
||||
tcVec, tcVecSize));
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2452,7 +2310,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
|
||||
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
|
||||
}
|
||||
|
||||
return Op; // return unmolested, legalized op
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
|
||||
@ -2478,7 +2336,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
|
||||
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
|
||||
}
|
||||
|
||||
return Op; // return unmolested, legalized
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Lower ISD::SETCC
|
||||
|
@ -78,11 +78,9 @@ namespace llvm {
|
||||
|
||||
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
|
||||
const SPUTargetMachine &TM);
|
||||
SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
|
||||
//! Simplify a MVT::v2i64 constant splat to CellSPU-ready form
|
||||
SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
|
||||
DebugLoc dl);
|
||||
|
||||
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl);
|
||||
SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl);
|
||||
}
|
||||
|
||||
class SPUTargetMachine; // forward dec'l.
|
||||
|
@ -60,9 +60,6 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
unsigned& SrcSR, unsigned& DstSR) const {
|
||||
SrcSR = DstSR = 0; // No sub-registers.
|
||||
|
||||
// Primarily, ORI and OR are generated by copyRegToReg. But, there are other
|
||||
// cases where we can safely say that what's being done is really a move
|
||||
// (see how PowerPC does this -- it's the model for this code too.)
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
@ -167,7 +164,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
MI.getOperand(1).isReg() &&
|
||||
"invalid SPU OR<type>_<vec> or LR instruction!");
|
||||
if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
|
||||
sourceReg = MI.getOperand(0).getReg();
|
||||
sourceReg = MI.getOperand(1).getReg();
|
||||
destReg = MI.getOperand(0).getReg();
|
||||
return true;
|
||||
}
|
||||
|
@ -1258,10 +1258,9 @@ multiclass BitwiseAnd
|
||||
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
||||
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
|
||||
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
||||
// Could use v4i32, but won't for clarity
|
||||
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
||||
@ -1288,10 +1287,11 @@ class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
|
||||
IntegerOp, pattern>;
|
||||
|
||||
class ANDCVecInst<ValueType vectype>:
|
||||
class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
|
||||
ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
|
||||
(vnot (vectype VECREG:$rB))))]>;
|
||||
[(set (vectype VECREG:$rT),
|
||||
(and (vectype VECREG:$rA),
|
||||
(vnot_frag (vectype VECREG:$rB))))]>;
|
||||
|
||||
class ANDCRegInst<RegisterClass rclass>:
|
||||
ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
|
||||
@ -1309,6 +1309,9 @@ multiclass AndComplement
|
||||
def r32: ANDCRegInst<R32C>;
|
||||
def r16: ANDCRegInst<R16C>;
|
||||
def r8: ANDCRegInst<R8C>;
|
||||
|
||||
// Sometimes, the xor pattern has a bitcast constant:
|
||||
def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>;
|
||||
}
|
||||
|
||||
defm ANDC : AndComplement;
|
||||
@ -1480,6 +1483,17 @@ multiclass BitwiseOr
|
||||
def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
// OR instructions used to negate f32 and f64 quantities.
|
||||
|
||||
def fneg32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
def fneg64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
def fnegvec: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[/* no pattern, see fneg{32,64} */]>;
|
||||
|
||||
// scalar->vector promotion, prefslot2vec:
|
||||
def v16i8_i8: ORPromoteScalar<R8C>;
|
||||
def v8i16_i16: ORPromoteScalar<R16C>;
|
||||
@ -1783,18 +1797,6 @@ multiclass BitwiseExclusiveOr
|
||||
def r32: XORRegInst<R32C>;
|
||||
def r16: XORRegInst<R16C>;
|
||||
def r8: XORRegInst<R8C>;
|
||||
|
||||
// Special forms for floating point instructions.
|
||||
// fneg and fabs require bitwise logical ops to manipulate the sign bit.
|
||||
|
||||
def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[/* no pattern, see fneg{32,64} */]>;
|
||||
}
|
||||
|
||||
defm XOR : BitwiseExclusiveOr;
|
||||
@ -4239,33 +4241,36 @@ def FMSv2f64 :
|
||||
(fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
|
||||
(v2f64 VECREG:$rC)))]>;
|
||||
|
||||
// FNMS: - (a * b - c)
|
||||
// DFNMS: - (a * b - c)
|
||||
// - (a * b) + c => c - (a * b)
|
||||
def FNMSf64 :
|
||||
RRForm<0b01111010110, (outs R64FP:$rT),
|
||||
(ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
|
||||
"dfnms\t$rT, $rA, $rB", DPrecFP,
|
||||
[(set R64FP:$rT, (fsub R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
|
||||
|
||||
class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
|
||||
DPrecFP, pattern>,
|
||||
RegConstraint<"$rC = $rT">,
|
||||
NoEncode<"$rC">;
|
||||
|
||||
def : Pat<(fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)),
|
||||
(FNMSf64 R64FP:$rA, R64FP:$rB, R64FP:$rC)>;
|
||||
class DFNMSVecInst<list<dag> pattern>:
|
||||
DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
||||
pattern>;
|
||||
|
||||
def FNMSv2f64 :
|
||||
RRForm<0b01111010110, (outs VECREG:$rT),
|
||||
(ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
||||
"dfnms\t$rT, $rA, $rB", DPrecFP,
|
||||
[(set (v2f64 VECREG:$rT),
|
||||
class DFNMSRegInst<list<dag> pattern>:
|
||||
DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
|
||||
pattern>;
|
||||
|
||||
multiclass DFMultiplySubtract
|
||||
{
|
||||
def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
|
||||
(fsub (v2f64 VECREG:$rC),
|
||||
(fmul (v2f64 VECREG:$rA),
|
||||
(v2f64 VECREG:$rB))))]>,
|
||||
RegConstraint<"$rC = $rT">,
|
||||
NoEncode<"$rC">;
|
||||
(v2f64 VECREG:$rB))))]>;
|
||||
|
||||
def : Pat<(fneg (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
|
||||
(v2f64 VECREG:$rC))),
|
||||
(FNMSv2f64 VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
|
||||
def f64 : DFNMSRegInst<[(set R64FP:$rT,
|
||||
(fsub R64FP:$rC,
|
||||
(fmul R64FP:$rA, R64FP:$rB)))]>;
|
||||
}
|
||||
|
||||
defm DFNMS : DFMultiplySubtract;
|
||||
|
||||
// - (a * b + c)
|
||||
// - (a * b) - c
|
||||
@ -4293,35 +4298,21 @@ def FNMAv2f64 :
|
||||
//===----------------------------------------------------------------------==//
|
||||
|
||||
def : Pat<(fneg (v4f32 VECREG:$rA)),
|
||||
(XORfnegvec (v4f32 VECREG:$rA),
|
||||
(ORfnegvec (v4f32 VECREG:$rA),
|
||||
(v4f32 (ILHUv4i32 0x8000)))>;
|
||||
|
||||
def : Pat<(fneg R32FP:$rA),
|
||||
(XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
|
||||
|
||||
def : Pat<(fneg (v2f64 VECREG:$rA)),
|
||||
(XORfnegvec (v2f64 VECREG:$rA),
|
||||
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80)))>;
|
||||
|
||||
def : Pat<(fneg R64FP:$rA),
|
||||
(XORfneg64 R64FP:$rA,
|
||||
(ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80))>;
|
||||
(ORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
|
||||
|
||||
// Floating point absolute value
|
||||
// Note: f64 fabs is custom-selected.
|
||||
|
||||
def : Pat<(fabs R32FP:$rA),
|
||||
(ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
|
||||
|
||||
def : Pat<(fabs (v4f32 VECREG:$rA)),
|
||||
(ANDfabsvec (v4f32 VECREG:$rA),
|
||||
(v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
|
||||
|
||||
def : Pat<(fabs R64FP:$rA),
|
||||
(ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
|
||||
|
||||
def : Pat<(fabs (v2f64 VECREG:$rA)),
|
||||
(ANDfabsvec (v2f64 VECREG:$rA),
|
||||
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
|
||||
(IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Hint for branch instructions:
|
||||
|
@ -8,11 +8,11 @@ target triple = "spu"
|
||||
|
||||
define double @__floatunsidf(i32 %arg_a) nounwind {
|
||||
entry:
|
||||
%in = alloca %struct.fp_number_type, align 8 ; <%struct.fp_number_type*> [#uses=5]
|
||||
%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1 ; <i32*> [#uses=1]
|
||||
%in = alloca %struct.fp_number_type, align 16
|
||||
%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
|
||||
store i32 0, i32* %0, align 4
|
||||
%1 = icmp eq i32 %arg_a, 0 ; <i1> [#uses=1]
|
||||
%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0 ; <i32*> [#uses=2]
|
||||
%1 = icmp eq i32 %arg_a, 0
|
||||
%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
|
||||
br i1 %1, label %bb, label %bb1
|
||||
|
||||
bb: ; preds = %entry
|
||||
@ -26,6 +26,6 @@ bb7: ; preds = %bb5, %bb1, %bb
|
||||
ret double 1.0
|
||||
}
|
||||
|
||||
declare i32 @llvm.ctlz.i32(i32) nounwind readnone
|
||||
; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
|
||||
|
||||
declare double @__pack_d(%struct.fp_number_type*)
|
||||
|
@ -1,9 +1,7 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep fsmbi %t1.s | count 3
|
||||
; RUN: grep 32768 %t1.s | count 2
|
||||
; RUN: grep xor %t1.s | count 4
|
||||
; RUN: grep and %t1.s | count 5
|
||||
; RUN: grep andbi %t1.s | count 3
|
||||
; RUN: grep or %t1.s | count 4
|
||||
; RUN: grep and %t1.s | count 2
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
Loading…
Reference in New Issue
Block a user