mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[X86][SSE] Reapplied: Improve vector ZERO_EXTEND by combining to ZERO_EXTEND_VECTOR_INREG
Generalise the existing SIGN_EXTEND to SIGN_EXTEND_VECTOR_INREG combine to support zero extension as well and get rid of a lot of unnecessary ANY_EXTEND + mask patterns. Reapplied with a fix for PR26870 (avoid premature use of TargetConstant in ZERO_EXTEND_VECTOR_INREG expansion). Differential Revision: http://reviews.llvm.org/D17691 llvm-svn: 263159
This commit is contained in:
parent
7bad97e2f6
commit
74609b7c8b
@ -273,6 +273,7 @@ namespace {
|
||||
SDValue visitANY_EXTEND(SDNode *N);
|
||||
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
|
||||
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
|
||||
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
|
||||
SDValue visitTRUNCATE(SDNode *N);
|
||||
SDValue visitBITCAST(SDNode *N);
|
||||
SDValue visitBUILD_PAIR(SDNode *N);
|
||||
@ -1396,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
|
||||
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
|
||||
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
|
||||
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
|
||||
case ISD::TRUNCATE: return visitTRUNCATE(N);
|
||||
case ISD::BITCAST: return visitBITCAST(N);
|
||||
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
|
||||
@ -5722,7 +5724,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
|
||||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
|
||||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
|
||||
Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
|
||||
&& "Expected EXTEND dag node in input!");
|
||||
|
||||
// fold (sext c1) -> c1
|
||||
@ -7000,6 +7003,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (N0.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
|
||||
LegalOperations))
|
||||
return SDValue(Res, 0);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -863,7 +863,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
|
||||
int NumSrcElements = SrcVT.getVectorNumElements();
|
||||
|
||||
// Build up a zero vector to blend into this one.
|
||||
SDValue Zero = DAG.getTargetConstant(0, DL, SrcVT);
|
||||
SDValue Zero = DAG.getConstant(0, DL, SrcVT);
|
||||
|
||||
// Shuffle the incoming lanes into the correct position, and pull all other
|
||||
// lanes from the zero vector.
|
||||
|
@ -28481,13 +28481,15 @@ static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
|
||||
return R.getValue(1);
|
||||
}
|
||||
|
||||
/// Convert a SEXT of a vector to a SIGN_EXTEND_VECTOR_INREG, this requires
|
||||
/// the splitting (or concatenating with UNDEFs) of the input to vectors of the
|
||||
/// same size as the target type which then extends the lowest elements.
|
||||
/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or
|
||||
/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating
|
||||
/// with UNDEFs) of the input to vectors of the same size as the target type
|
||||
/// which then extends the lowest elements.
|
||||
static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
if (N->getOpcode() != ISD::SIGN_EXTEND)
|
||||
unsigned Opcode = N->getOpcode();
|
||||
if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
|
||||
return SDValue();
|
||||
if (!DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
@ -28508,6 +28510,12 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
|
||||
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
|
||||
return SDValue();
|
||||
|
||||
// On AVX2+ targets, if the input/output types are both legal then we will be
|
||||
// able to use SIGN_EXTEND/ZERO_EXTEND directly.
|
||||
if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
|
||||
DAG.getTargetLoweringInfo().isTypeLegal(InVT))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
|
||||
auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
|
||||
@ -28527,20 +28535,22 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
|
||||
EVT ExVT =
|
||||
EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
|
||||
SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
|
||||
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex);
|
||||
SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
}
|
||||
|
||||
// If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
|
||||
// ISD::SIGN_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::VSEXT.
|
||||
// ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
|
||||
if (VT.is128BitVector() || (VT.is256BitVector() && Subtarget.hasInt256())) {
|
||||
SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
|
||||
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
|
||||
return Opcode == ISD::SIGN_EXTEND
|
||||
? DAG.getSignExtendVectorInReg(ExOp, DL, VT)
|
||||
: DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
|
||||
}
|
||||
|
||||
// On pre-AVX2 targets, split into 128-bit nodes of
|
||||
// ISD::SIGN_EXTEND_VECTOR_INREG.
|
||||
// ISD::*_EXTEND_VECTOR_INREG.
|
||||
if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128)) {
|
||||
unsigned NumVecs = VT.getSizeInBits() / 128;
|
||||
unsigned NumSubElts = 128 / SVT.getSizeInBits();
|
||||
@ -28552,7 +28562,9 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
|
||||
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
|
||||
DAG.getIntPtrConstant(Offset, DL));
|
||||
SrcVec = ExtendVecSize(DL, SrcVec, 128);
|
||||
SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
|
||||
SrcVec = Opcode == ISD::SIGN_EXTEND
|
||||
? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
|
||||
: DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
|
||||
Opnds.push_back(SrcVec);
|
||||
}
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
|
||||
@ -28671,6 +28683,9 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
}
|
||||
|
||||
if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
|
||||
return V;
|
||||
|
||||
if (VT.is256BitVector())
|
||||
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
|
||||
return R;
|
||||
|
37
test/CodeGen/X86/pr26870.ll
Normal file
37
test/CodeGen/X86/pr26870.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: llc < %s -mtriple=i686-pc-windows-msvc18.0.0 -mcpu=pentium4
|
||||
|
||||
define x86_thiscallcc i32* @fn4(i32* %this, i8* dereferenceable(1) %p1) {
|
||||
entry:
|
||||
%DL = getelementptr inbounds i32, i32* %this, i32 0
|
||||
%call.i = tail call x86_thiscallcc i64 @fn1(i32* %DL)
|
||||
%getTypeAllocSize___trans_tmp_2.i = getelementptr inbounds i32, i32* %this, i32 0
|
||||
%0 = load i32, i32* %getTypeAllocSize___trans_tmp_2.i, align 4
|
||||
%call.i8 = tail call x86_thiscallcc i64 @fn1(i32* %DL)
|
||||
%1 = insertelement <2 x i64> undef, i64 %call.i, i32 0
|
||||
%2 = insertelement <2 x i64> %1, i64 %call.i8, i32 1
|
||||
%3 = add nsw <2 x i64> %2, <i64 7, i64 7>
|
||||
%4 = sdiv <2 x i64> %3, <i64 8, i64 8>
|
||||
%5 = add nsw <2 x i64> %4, <i64 1, i64 1>
|
||||
%6 = load i32, i32* %getTypeAllocSize___trans_tmp_2.i, align 4
|
||||
%7 = insertelement <2 x i32> undef, i32 %0, i32 0
|
||||
%8 = insertelement <2 x i32> %7, i32 %6, i32 1
|
||||
%9 = zext <2 x i32> %8 to <2 x i64>
|
||||
%10 = srem <2 x i64> %5, %9
|
||||
%11 = sub <2 x i64> %5, %10
|
||||
%12 = trunc <2 x i64> %11 to <2 x i32>
|
||||
%13 = extractelement <2 x i32> %12, i32 0
|
||||
%14 = extractelement <2 x i32> %12, i32 1
|
||||
%cmp = icmp eq i32 %13, %14
|
||||
br i1 %cmp, label %if.then, label %cleanup
|
||||
|
||||
if.then:
|
||||
%call4 = tail call x86_thiscallcc i32* @fn3(i8* nonnull %p1)
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%retval.0 = phi i32* [ %call4, %if.then ], [ undef, %entry ]
|
||||
ret i32* %retval.0
|
||||
}
|
||||
|
||||
declare x86_thiscallcc i32* @fn3(i8*)
|
||||
declare x86_thiscallcc i64 @fn1(i32*)
|
@ -1428,11 +1428,10 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
|
||||
;
|
||||
; AVX1-LABEL: uitofp_16i8_to_4f32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1752,18 +1751,16 @@ define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
|
||||
;
|
||||
; AVX1-LABEL: uitofp_8i8_to_8f32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: uitofp_8i8_to_8f32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@ -1786,11 +1783,10 @@ define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
|
||||
;
|
||||
; AVX1-LABEL: uitofp_16i8_to_8f32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -143,23 +143,20 @@ define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; AVX1-LABEL: zext_16i8_to_8i32:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_16i8_to_8i32:
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: zext_16i8_to_8i32:
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@ -225,23 +222,20 @@ define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; AVX1-LABEL: zext_16i8_to_4i64:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_16i8_to_4i64:
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: zext_16i8_to_4i64:
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
@ -385,25 +379,20 @@ define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; AVX1-LABEL: zext_8i16_to_4i64:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_8i16_to_4i64:
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: zext_8i16_to_4i64:
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
Loading…
Reference in New Issue
Block a user