mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[X86][SSE] Lower 128-bit vectors to SIGN/ZERO_EXTEND_VECTOR_IN_REG ops
As described on PR31712, we miss a variety of legalization combines because we lower these to X86ISD::VSEXT/VZEXT despite them having the same functionality. This patch makes 128-bit (SSE41) SIGN/ZERO_EXTEND_VECTOR_IN_REG ops legal, adds the necessary tablegen plumbing and uses a helper 'getExtendInVec' to decide when to use SIGN/ZERO_EXTEND_VECTOR_IN_REG or VSEXT/VZEXT. We're missing a couple of shuffle combines that will be added in a future patch for review. Later patches can then support the AVX2 cases as a mixture of SIGN/ZERO_EXTEND and SIGN/ZERO_EXTEND_VECTOR_IN_REG, and then finally deal with the AVX512 cases. Differential Revision: https://reviews.llvm.org/D30549 llvm-svn: 296985
This commit is contained in:
parent
8d9fcfbbee
commit
84ae30b32d
@ -160,6 +160,10 @@ def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg
|
||||
SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>,
|
||||
SDTCisVTSmallerThanOp<2, 1>
|
||||
]>;
|
||||
def SDTExtInvec : SDTypeProfile<1, 1, [ // sext_invec
|
||||
SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>, SDTCisVec<1>,
|
||||
SDTCisOpSmallerThanOp<1, 0>, SDTCisSameSizeAs<0,1>
|
||||
]>;
|
||||
|
||||
def SDTSetCC : SDTypeProfile<1, 3, [ // setcc
|
||||
SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
|
||||
@ -406,6 +410,9 @@ def umax : SDNode<"ISD::UMAX" , SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
|
||||
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
|
||||
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
|
||||
def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
|
||||
|
||||
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
|
||||
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
|
||||
def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
|
||||
|
@ -2419,6 +2419,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG: {
|
||||
EVT InVT = Op.getOperand(0).getValueType();
|
||||
unsigned InBits = InVT.getScalarSizeInBits();
|
||||
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
|
||||
KnownZero = KnownZero.trunc(InBits);
|
||||
KnownOne = KnownOne.trunc(InBits);
|
||||
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,
|
||||
DemandedElts.zext(InVT.getVectorNumElements()),
|
||||
Depth + 1);
|
||||
KnownZero = KnownZero.zext(BitWidth);
|
||||
KnownOne = KnownOne.zext(BitWidth);
|
||||
KnownZero |= NewBits;
|
||||
break;
|
||||
}
|
||||
case ISD::ZERO_EXTEND: {
|
||||
EVT InVT = Op.getOperand(0).getValueType();
|
||||
unsigned InBits = InVT.getScalarSizeInBits();
|
||||
@ -2432,6 +2446,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
|
||||
KnownZero |= NewBits;
|
||||
break;
|
||||
}
|
||||
// TODO ISD::SIGN_EXTEND_VECTOR_INREG
|
||||
case ISD::SIGN_EXTEND: {
|
||||
EVT InVT = Op.getOperand(0).getValueType();
|
||||
unsigned InBits = InVT.getScalarSizeInBits();
|
||||
@ -2859,6 +2874,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
|
||||
}
|
||||
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::SIGN_EXTEND_VECTOR_INREG:
|
||||
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
|
||||
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
|
||||
|
||||
|
@ -923,6 +923,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
|
||||
// SSE41 brings specific instructions for doing vector sign extend even in
|
||||
// cases where we don't have SRA.
|
||||
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
|
||||
|
||||
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
|
||||
|
||||
for (MVT VT : MVT::integer_vector_valuetypes()) {
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
|
||||
@ -5137,6 +5145,26 @@ static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
|
||||
return DAG.getBitcast(VT, Vec);
|
||||
}
|
||||
|
||||
static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
|
||||
SelectionDAG &DAG) {
|
||||
EVT InVT = In.getValueType();
|
||||
assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode");
|
||||
|
||||
if (VT.is128BitVector() && InVT.is128BitVector())
|
||||
return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
|
||||
: DAG.getZeroExtendVectorInReg(In, DL, VT);
|
||||
|
||||
// For 256-bit vectors, we only need the lower (128-bit) input half.
|
||||
// For 512-bit vectors, we only need the lower input half or quarter.
|
||||
if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
|
||||
int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
|
||||
In = extractSubVector(In, 0, DAG, DL,
|
||||
std::max(128, (int)VT.getSizeInBits() / Scale));
|
||||
}
|
||||
|
||||
return DAG.getNode(Opc, DL, VT, In);
|
||||
}
|
||||
|
||||
/// Generate unpacklo/unpackhi shuffle mask.
|
||||
static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
|
||||
bool Unary) {
|
||||
@ -5853,6 +5881,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG:
|
||||
case X86ISD::VZEXT: {
|
||||
// TODO - add support for VPMOVZX with smaller input vector types.
|
||||
SDValue Src = N.getOperand(0);
|
||||
@ -9215,14 +9244,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
|
||||
NumElements / Scale);
|
||||
InputV = ShuffleOffset(InputV);
|
||||
|
||||
// For 256-bit vectors, we only need the lower (128-bit) input half.
|
||||
// For 512-bit vectors, we only need the lower input half or quarter.
|
||||
if (VT.getSizeInBits() > 128)
|
||||
InputV = extractSubVector(InputV, 0, DAG, DL,
|
||||
std::max(128, (int)VT.getSizeInBits() / Scale));
|
||||
|
||||
InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV);
|
||||
InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
|
||||
return DAG.getBitcast(VT, InputV);
|
||||
}
|
||||
|
||||
@ -15647,7 +15669,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
// word to byte only under BWI
|
||||
if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT,
|
||||
DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
|
||||
getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, DAG));
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
|
||||
}
|
||||
|
||||
@ -17625,8 +17647,8 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
|
||||
if (VT.is512BitVector() && InVTElt != MVT::i1 &&
|
||||
(NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) {
|
||||
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
|
||||
return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
|
||||
return getExtendInVec(In.getOpcode(), dl, VT, In.getOperand(0), DAG);
|
||||
return getExtendInVec(X86ISD::VSEXT, dl, VT, In, DAG);
|
||||
}
|
||||
|
||||
if (InVTElt != MVT::i1)
|
||||
@ -17638,7 +17660,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
|
||||
|
||||
SDValue V;
|
||||
if (Subtarget.hasDQI()) {
|
||||
V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
|
||||
V = getExtendInVec(X86ISD::VSEXT, dl, ExtVT, In, DAG);
|
||||
assert(!VT.is512BitVector() && "Unexpected vector type");
|
||||
} else {
|
||||
SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
|
||||
@ -17690,11 +17712,15 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
|
||||
assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
|
||||
InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
|
||||
|
||||
// SSE41 targets can use the pmovsx* instructions directly.
|
||||
unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
|
||||
X86ISD::VSEXT : X86ISD::VZEXT;
|
||||
if (Subtarget.hasSSE41())
|
||||
// SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
|
||||
// so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
|
||||
// need to be handled here for 256/512-bit results.
|
||||
if (Subtarget.hasInt256()) {
|
||||
assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
|
||||
unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
|
||||
X86ISD::VSEXT : X86ISD::VZEXT;
|
||||
return DAG.getNode(ExtOpc, dl, VT, In);
|
||||
}
|
||||
|
||||
// We should only get here for sign extend.
|
||||
assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
|
||||
@ -17779,8 +17805,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
|
||||
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
|
||||
VT.getVectorNumElements() / 2);
|
||||
|
||||
OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
|
||||
OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
|
||||
OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
|
||||
OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
|
||||
}
|
||||
@ -18095,7 +18121,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
|
||||
if (Ext == ISD::SEXTLOAD) {
|
||||
// If we have SSE4.1, we can directly emit a VSEXT node.
|
||||
if (Subtarget.hasSSE41()) {
|
||||
SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
|
||||
SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
|
||||
return Sext;
|
||||
}
|
||||
@ -18766,11 +18792,11 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
|
||||
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
|
||||
ShAmt = ShAmt.getOperand(0);
|
||||
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
|
||||
ShAmt = DAG.getNode(X86ISD::VZEXT, SDLoc(ShAmt), MVT::v2i64, ShAmt);
|
||||
ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
|
||||
} else if (Subtarget.hasSSE41() &&
|
||||
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
||||
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
|
||||
ShAmt = DAG.getNode(X86ISD::VZEXT, SDLoc(ShAmt), MVT::v2i64, ShAmt);
|
||||
ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
|
||||
} else {
|
||||
SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
|
||||
DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
|
||||
@ -21061,8 +21087,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// Extract the lo parts and sign extend to i16
|
||||
SDValue ALo, BLo;
|
||||
if (Subtarget.hasSSE41()) {
|
||||
ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A);
|
||||
BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B);
|
||||
ALo = DAG.getSignExtendVectorInReg(A, dl, ExVT);
|
||||
BLo = DAG.getSignExtendVectorInReg(B, dl, ExVT);
|
||||
} else {
|
||||
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
|
||||
-1, 4, -1, 5, -1, 6, -1, 7};
|
||||
@ -21081,8 +21107,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1};
|
||||
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
|
||||
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
|
||||
AHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, AHi);
|
||||
BHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, BHi);
|
||||
AHi = DAG.getSignExtendVectorInReg(AHi, dl, ExVT);
|
||||
BHi = DAG.getSignExtendVectorInReg(BHi, dl, ExVT);
|
||||
} else {
|
||||
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
|
||||
-1, 12, -1, 13, -1, 14, -1, 15};
|
||||
@ -21243,8 +21269,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
|
||||
DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
|
||||
}
|
||||
|
||||
SDValue ExA = DAG.getNode(ExSSE41, dl, MVT::v16i16, A);
|
||||
SDValue ExB = DAG.getNode(ExSSE41, dl, MVT::v16i16, B);
|
||||
SDValue ExA = getExtendInVec(ExSSE41, dl, MVT::v16i16, A, DAG);
|
||||
SDValue ExB = getExtendInVec(ExSSE41, dl, MVT::v16i16, B, DAG);
|
||||
SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
|
||||
SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
|
||||
DAG.getConstant(8, dl, MVT::v16i16));
|
||||
@ -21260,8 +21286,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// Extract the lo parts and zero/sign extend to i16.
|
||||
SDValue ALo, BLo;
|
||||
if (Subtarget.hasSSE41()) {
|
||||
ALo = DAG.getNode(ExSSE41, dl, ExVT, A);
|
||||
BLo = DAG.getNode(ExSSE41, dl, ExVT, B);
|
||||
ALo = getExtendInVec(ExSSE41, dl, ExVT, A, DAG);
|
||||
BLo = getExtendInVec(ExSSE41, dl, ExVT, B, DAG);
|
||||
} else {
|
||||
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
|
||||
-1, 4, -1, 5, -1, 6, -1, 7};
|
||||
@ -21280,8 +21306,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1};
|
||||
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
|
||||
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
|
||||
AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi);
|
||||
BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi);
|
||||
AHi = getExtendInVec(ExSSE41, dl, ExVT, AHi, DAG);
|
||||
BHi = getExtendInVec(ExSSE41, dl, ExVT, BHi, DAG);
|
||||
} else {
|
||||
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
|
||||
-1, 12, -1, 13, -1, 14, -1, 15};
|
||||
@ -26458,7 +26484,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
||||
unsigned NumMaskElts = Mask.size();
|
||||
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
|
||||
|
||||
// Match against a VZEXT instruction.
|
||||
// Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
|
||||
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
|
||||
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
|
||||
(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
|
||||
@ -26477,7 +26503,8 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
||||
V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
|
||||
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
|
||||
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
|
||||
Shuffle = X86ISD::VZEXT;
|
||||
Shuffle = (SrcVT != MaskVT ? X86ISD::VZEXT
|
||||
: ISD::ZERO_EXTEND_VECTOR_INREG);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -32169,7 +32196,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
|
||||
Mld->getBasePtr(), NewMask, WideSrc0,
|
||||
Mld->getMemoryVT(), Mld->getMemOperand(),
|
||||
ISD::NON_EXTLOAD);
|
||||
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
|
||||
SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
|
||||
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
|
||||
}
|
||||
|
||||
|
@ -7481,11 +7481,11 @@ multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDPatternOperator OpNode, SDPatternOperator InVecNode,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
|
||||
v16i8x_info, i64mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i64mem, LdFrag, InVecNode>,
|
||||
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
|
||||
@ -7500,11 +7500,11 @@ multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDPatternOperator OpNode, SDPatternOperator InVecNode,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
|
||||
v16i8x_info, i32mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i32mem, LdFrag, InVecNode>,
|
||||
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
|
||||
@ -7519,11 +7519,11 @@ multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDPatternOperator OpNode, SDPatternOperator InVecNode,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
|
||||
v16i8x_info, i16mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i16mem, LdFrag, InVecNode>,
|
||||
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
|
||||
@ -7538,11 +7538,11 @@ multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDPatternOperator OpNode, SDPatternOperator InVecNode,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
|
||||
v8i16x_info, i64mem, LdFrag, OpNode>,
|
||||
v8i16x_info, i64mem, LdFrag, InVecNode>,
|
||||
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
|
||||
@ -7557,11 +7557,11 @@ multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDPatternOperator OpNode, SDPatternOperator InVecNode,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
|
||||
v8i16x_info, i32mem, LdFrag, OpNode>,
|
||||
v8i16x_info, i32mem, LdFrag, InVecNode>,
|
||||
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
|
||||
@ -7576,12 +7576,12 @@ multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDPatternOperator OpNode, SDPatternOperator InVecNode,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
|
||||
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
|
||||
v4i32x_info, i64mem, LdFrag, OpNode>,
|
||||
v4i32x_info, i64mem, LdFrag, InVecNode>,
|
||||
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
|
||||
@ -7595,19 +7595,19 @@ multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
}
|
||||
|
||||
defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
|
||||
defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
|
||||
defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
|
||||
defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
|
||||
defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
|
||||
defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
|
||||
defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">;
|
||||
defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">;
|
||||
defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">;
|
||||
defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">;
|
||||
defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">;
|
||||
defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">;
|
||||
|
||||
defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
|
||||
defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
|
||||
defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
|
||||
defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
|
||||
defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
|
||||
defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
|
||||
defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">;
|
||||
defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">;
|
||||
defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">;
|
||||
defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">;
|
||||
defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">;
|
||||
defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">;
|
||||
|
||||
// EXTLOAD patterns, implemented using vpmovz
|
||||
multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
|
||||
@ -7650,69 +7650,69 @@ let Predicates = [HasAVX512] in {
|
||||
defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
|
||||
}
|
||||
|
||||
multiclass AVX512_pmovx_patterns<string OpcPrefix,
|
||||
SDNode ExtOp, PatFrag ExtLoad16> {
|
||||
multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
|
||||
SDNode InVecOp, PatFrag ExtLoad16> {
|
||||
// 128-bit patterns
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
|
||||
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
|
||||
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
|
||||
def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
|
||||
def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
|
||||
def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
|
||||
def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
|
||||
def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
|
||||
def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
|
||||
}
|
||||
let Predicates = [HasVLX] in {
|
||||
def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
|
||||
def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
|
||||
def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
|
||||
def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
|
||||
def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
|
||||
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
|
||||
def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
|
||||
def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
|
||||
|
||||
def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
|
||||
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
|
||||
|
||||
def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
|
||||
def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
|
||||
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
|
||||
def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
|
||||
def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
|
||||
def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
|
||||
def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
|
||||
def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
|
||||
def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
|
||||
|
||||
def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
|
||||
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
|
||||
|
||||
def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
|
||||
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
|
||||
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
|
||||
def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
|
||||
def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
|
||||
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
|
||||
}
|
||||
// 256-bit patterns
|
||||
@ -7791,8 +7791,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix,
|
||||
}
|
||||
}
|
||||
|
||||
defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
|
||||
defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
|
||||
defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
|
||||
defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GATHER - SCATTER Operations
|
||||
|
@ -5963,12 +5963,12 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
|
||||
}
|
||||
}
|
||||
|
||||
defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
|
||||
defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
|
||||
defm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec, extloadi32i16>;
|
||||
defm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec, loadi16_anyext>;
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>;
|
||||
defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>;
|
||||
defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec, extloadi32i16>;
|
||||
defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec, loadi16_anyext>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -27,8 +27,6 @@ define void @complex_inreg_work(<2 x float> %a, <2 x float> %b) {
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: movaps %xmm0, %xmm2
|
||||
; X32-NEXT: cmpordps %xmm0, %xmm0
|
||||
; X32-NEXT: pmovsxdq %xmm0, %xmm0
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X32-NEXT: pslld $31, %xmm0
|
||||
; X32-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; X32-NEXT: extractps $1, %xmm1, (%eax)
|
||||
@ -39,8 +37,6 @@ define void @complex_inreg_work(<2 x float> %a, <2 x float> %b) {
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movaps %xmm0, %xmm2
|
||||
; X64-NEXT: cmpordps %xmm0, %xmm0
|
||||
; X64-NEXT: pmovsxdq %xmm0, %xmm0
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-NEXT: pslld $31, %xmm0
|
||||
; X64-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; X64-NEXT: movlps %xmm1, (%rax)
|
||||
@ -82,8 +78,6 @@ define void @full_test() {
|
||||
; X32-NEXT: cvtdq2ps %xmm0, %xmm1
|
||||
; X32-NEXT: xorps %xmm0, %xmm0
|
||||
; X32-NEXT: cmpltps %xmm2, %xmm0
|
||||
; X32-NEXT: pmovsxdq %xmm0, %xmm0
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X32-NEXT: pslld $31, %xmm0
|
||||
; X32-NEXT: movaps {{.*#+}} xmm3 = <1,1,u,u>
|
||||
; X32-NEXT: addps %xmm1, %xmm3
|
||||
|
@ -9,8 +9,6 @@ define void @cmp_2_floats(<2 x float> %a, <2 x float> %b) {
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: movaps %xmm0, %xmm2
|
||||
; CHECK-NEXT: cmpordps %xmm0, %xmm0
|
||||
; CHECK-NEXT: pmovsxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-NEXT: pslld $31, %xmm0
|
||||
; CHECK-NEXT: blendvps %xmm0, %xmm2, %xmm1
|
||||
; CHECK-NEXT: movlps %xmm1, (%rax)
|
||||
|
@ -243,11 +243,11 @@ define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) {
|
||||
define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
|
||||
; SSE-LABEL: combine_vec_shl_zext_lshr0:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_shl_zext_lshr0:
|
||||
@ -270,15 +270,15 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
|
||||
; SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE-NEXT: psrlw $4, %xmm0
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6],xmm1[7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE-NEXT: psrlw $2, %xmm2
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2],xmm0[3,4],xmm2[5,6],xmm0[7]
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE-NEXT: psrlw $1, %xmm1
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
|
||||
; SSE-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: psrlw $2, %xmm1
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3,4],xmm1[5,6],xmm0[7]
|
||||
; SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE-NEXT: psrlw $1, %xmm0
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE-NEXT: pmulld {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
|
@ -83,15 +83,15 @@ define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
|
||||
; X32-LABEL: knownbits_mask_shuffle_sext:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: knownbits_mask_shuffle_sext:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-NEXT: retq
|
||||
%1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
|
||||
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
@ -103,15 +103,15 @@ define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
|
||||
; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-NEXT: retq
|
||||
%1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
|
||||
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
|
@ -1161,15 +1161,15 @@ define <4 x i32> @mul_v4i64_zero_upper(<4 x i32> %val1, <4 x i32> %val2) {
|
||||
;
|
||||
; SSE41-LABEL: mul_v4i64_zero_upper:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
|
||||
; SSE41-NEXT: pmuludq %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmuludq %xmm4, %xmm2
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: pmuludq %xmm3, %xmm0
|
||||
; SSE41-NEXT: pmuludq %xmm2, %xmm4
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: mul_v4i64_zero_upper:
|
||||
@ -1214,21 +1214,21 @@ define <4 x i32> @mul_v4i64_zero_upper_left(<4 x i32> %val1, <4 x i64> %val2) {
|
||||
;
|
||||
; SSE41-LABEL: mul_v4i64_zero_upper_left:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: pmuludq %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlq $32, %xmm2
|
||||
; SSE41-NEXT: pmuludq %xmm0, %xmm2
|
||||
; SSE41-NEXT: psllq $32, %xmm2
|
||||
; SSE41-NEXT: paddq %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE41-NEXT: pmuludq %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlq $32, %xmm1
|
||||
; SSE41-NEXT: pmuludq %xmm4, %xmm1
|
||||
; SSE41-NEXT: psllq $32, %xmm1
|
||||
; SSE41-NEXT: paddq %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmuludq %xmm2, %xmm1
|
||||
; SSE41-NEXT: psrlq $32, %xmm2
|
||||
; SSE41-NEXT: pmuludq %xmm3, %xmm2
|
||||
; SSE41-NEXT: psllq $32, %xmm2
|
||||
; SSE41-NEXT: paddq %xmm1, %xmm2
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1271,17 +1271,16 @@ define <4 x i32> @mul_v4i64_zero_lower(<4 x i32> %val1, <4 x i64> %val2) {
|
||||
;
|
||||
; SSE41-LABEL: mul_v4i64_zero_lower:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; SSE41-NEXT: psrlq $32, %xmm2
|
||||
; SSE41-NEXT: pmuludq %xmm0, %xmm2
|
||||
; SSE41-NEXT: psllq $32, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: psrlq $32, %xmm1
|
||||
; SSE41-NEXT: pmuludq %xmm1, %xmm3
|
||||
; SSE41-NEXT: psllq $32, %xmm3
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm2[1,3]
|
||||
; SSE41-NEXT: movaps %xmm3, %xmm0
|
||||
; SSE41-NEXT: pmuludq %xmm1, %xmm0
|
||||
; SSE41-NEXT: psllq $32, %xmm0
|
||||
; SSE41-NEXT: psrlq $32, %xmm2
|
||||
; SSE41-NEXT: pmuludq %xmm3, %xmm2
|
||||
; SSE41-NEXT: psllq $32, %xmm2
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: mul_v4i64_zero_lower:
|
||||
@ -1331,23 +1330,24 @@ define <8 x i32> @mul_v8i64_zero_upper(<8 x i32> %val1, <8 x i32> %val2) {
|
||||
;
|
||||
; SSE41-LABEL: mul_v8i64_zero_upper:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: pxor %xmm6, %xmm6
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm8 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm6[2],xmm0[3],xmm6[3]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm8 = xmm4[0],zero,xmm4[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm7 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm6[2],xmm2[3],xmm6[3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm3[0],zero,xmm3[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm6[2],xmm3[3],xmm6[3]
|
||||
; SSE41-NEXT: pmuludq %xmm1, %xmm3
|
||||
; SSE41-NEXT: pmuludq %xmm0, %xmm2
|
||||
; SSE41-NEXT: pmuludq %xmm7, %xmm5
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
|
||||
; SSE41-NEXT: pmuludq %xmm7, %xmm1
|
||||
; SSE41-NEXT: pmuludq %xmm6, %xmm2
|
||||
; SSE41-NEXT: pmuludq %xmm5, %xmm0
|
||||
; SSE41-NEXT: pmuludq %xmm8, %xmm4
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm2[1,3]
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm3[1,3]
|
||||
; SSE41-NEXT: movaps %xmm4, %xmm0
|
||||
; SSE41-NEXT: movaps %xmm5, %xmm1
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3]
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: mul_v8i64_zero_upper:
|
||||
|
@ -48,10 +48,10 @@ define <8 x float> @foo2_8(<8 x i8> %src) {
|
||||
; CHECK-LABEL: foo2_8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpand LCPI2_0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; CHECK-NEXT: retl
|
||||
;
|
||||
|
@ -2054,10 +2054,10 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
|
||||
;
|
||||
; AVX1-LABEL: uitofp_8i16_to_4f32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -2484,10 +2484,10 @@ define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
|
||||
;
|
||||
; AVX1-LABEL: uitofp_8i16_to_8f32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -3053,8 +3053,8 @@ define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) {
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -3069,8 +3069,8 @@ define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) {
|
||||
; AVX512VLDQ: # BB#0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7]
|
||||
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
%ld = load <2 x i16>, <2 x i16> *%a
|
||||
@ -3108,7 +3108,8 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) {
|
||||
; AVX512VL-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -3123,7 +3124,8 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) {
|
||||
; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64:
|
||||
; AVX512VLDQ: # BB#0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero
|
||||
; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
%ld = load <2 x i8>, <2 x i8> *%a
|
||||
|
@ -55,18 +55,18 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
|
||||
;
|
||||
; SSE41-LABEL: zext_16i8_to_16i16:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_16i8_to_16i16:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_16i8_to_16i16:
|
||||
@ -110,25 +110,27 @@ define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
|
||||
;
|
||||
; SSE41-LABEL: zext_32i8_to_32i16:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: movdqa %xmm5, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_32i8_to_32i16:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vmovaps %ymm2, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -515,18 +517,18 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; SSE41-LABEL: zext_8i16_to_8i32:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_8i16_to_8i32:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_8i16_to_8i32:
|
||||
@ -570,25 +572,27 @@ define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone
|
||||
;
|
||||
; SSE41-LABEL: zext_16i16_to_16i32:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm5, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_16i16_to_16i32:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vmovaps %ymm2, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -812,18 +816,18 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; SSE41-LABEL: zext_4i32_to_4i64:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_4i32_to_4i64:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_4i32_to_4i64:
|
||||
@ -867,25 +871,27 @@ define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; SSE41-LABEL: zext_8i32_to_8i64:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
|
||||
; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: movdqa %xmm5, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_8i32_to_8i64:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vmovaps %ymm2, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -1523,20 +1529,20 @@ define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
|
||||
;
|
||||
; SSE41-LABEL: zext_8i8_to_8i32:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_8i8_to_8i32:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_8i8_to_8i32:
|
||||
|
Loading…
x
Reference in New Issue
Block a user