mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[X86] Combine vector anyext + and into a vector zext
Vector zext tends to get legalized into a vector anyext, represented as a vector shuffle with an undef vector + a bitcast, that gets ANDed with a mask that zeroes the undef elements. Combine this into an explicit shuffle with a zero vector instead. This allows shuffle lowering to match it as a zext, instead of matching it as an anyext and emitting an explicit AND. This combine only covers a subset of the cases, but it's a start. Differential Revision: http://reviews.llvm.org/D7666 llvm-svn: 229480
This commit is contained in:
parent
5a941203c6
commit
812c46b9de
@ -24903,24 +24903,118 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDLoc DL(N);
|
||||
|
||||
// A vector zext_in_reg may be represented as a shuffle,
|
||||
// feeding into a bitcast (this represents anyext) feeding into
|
||||
// an and with a mask.
|
||||
// We'd like to try to combine that into a shuffle with zero
|
||||
// plus a bitcast, removing the and.
|
||||
if (N0.getOpcode() != ISD::BITCAST ||
|
||||
N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE)
|
||||
return SDValue();
|
||||
|
||||
// The other side of the AND should be a splat of 2^C, where C
|
||||
// is the number of bits in the source type.
|
||||
if (N1.getOpcode() == ISD::BITCAST)
|
||||
N1 = N1.getOperand(0);
|
||||
if (N1.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return SDValue();
|
||||
BuildVectorSDNode *Vector = cast<BuildVectorSDNode>(N1);
|
||||
|
||||
ShuffleVectorSDNode *Shuffle = cast<ShuffleVectorSDNode>(N0.getOperand(0));
|
||||
EVT SrcType = Shuffle->getValueType(0);
|
||||
|
||||
// We expect a single-source shuffle
|
||||
if (Shuffle->getOperand(1)->getOpcode() != ISD::UNDEF)
|
||||
return SDValue();
|
||||
|
||||
unsigned SrcSize = SrcType.getScalarSizeInBits();
|
||||
|
||||
APInt SplatValue, SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
if (!Vector->isConstantSplat(SplatValue, SplatUndef,
|
||||
SplatBitSize, HasAnyUndefs))
|
||||
return SDValue();
|
||||
|
||||
unsigned ResSize = N1.getValueType().getScalarSizeInBits();
|
||||
// Make sure the splat matches the mask we expect
|
||||
if (SplatBitSize > ResSize ||
|
||||
(SplatValue + 1).exactLogBase2() != SrcSize)
|
||||
return SDValue();
|
||||
|
||||
// Make sure the input and output size make sense
|
||||
if (SrcSize >= ResSize || ResSize % SrcSize)
|
||||
return SDValue();
|
||||
|
||||
// We expect a shuffle of the form <0, u, u, u, 1, u, u, u...>
|
||||
// The number of u's between each two values depends on the ratio between
|
||||
// the source and dest type.
|
||||
unsigned ZextRatio = ResSize / SrcSize;
|
||||
bool IsZext = true;
|
||||
for (unsigned i = 0; i < SrcType.getVectorNumElements(); ++i) {
|
||||
if (i % ZextRatio) {
|
||||
if (Shuffle->getMaskElt(i) > 0) {
|
||||
// Expected undef
|
||||
IsZext = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (Shuffle->getMaskElt(i) != (i / ZextRatio)) {
|
||||
// Expected element number
|
||||
IsZext = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!IsZext)
|
||||
return SDValue();
|
||||
|
||||
// Ok, perform the transformation - replace the shuffle with
|
||||
// a shuffle of the form <0, k, k, k, 1, k, k, k> with zero
|
||||
// (instead of undef) where the k elements come from the zero vector.
|
||||
SmallVector<int, 8> Mask;
|
||||
unsigned NumElems = SrcType.getVectorNumElements();
|
||||
for (unsigned i = 0; i < NumElems; ++i)
|
||||
if (i % ZextRatio)
|
||||
Mask.push_back(NumElems);
|
||||
else
|
||||
Mask.push_back(i / ZextRatio);
|
||||
|
||||
SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL,
|
||||
Shuffle->getOperand(0), DAG.getConstant(0, SrcType), Mask);
|
||||
return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle);
|
||||
}
|
||||
|
||||
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget);
|
||||
if (Zext.getNode())
|
||||
return Zext;
|
||||
|
||||
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
|
||||
if (R.getNode())
|
||||
return R;
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDLoc DL(N);
|
||||
|
||||
// Create BEXTR instructions
|
||||
// BEXTR is ((X >> imm) & (2**size-1))
|
||||
if (VT == MVT::i32 || VT == MVT::i64) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDLoc DL(N);
|
||||
|
||||
// Check for BEXTR.
|
||||
if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
|
||||
(N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
|
||||
@ -24948,10 +25042,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if (VT != MVT::v2i64 && VT != MVT::v4i64)
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDLoc DL(N);
|
||||
|
||||
// Check LHS for vnot
|
||||
if (N0.getOpcode() == ISD::XOR &&
|
||||
//ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
|
||||
|
@ -7,34 +7,30 @@
|
||||
define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
||||
; SSE2-LABEL: zext_8i16_to_8i32:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
||||
; SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: # kill
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pand .LCPI0_0(%rip), %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: zext_8i16_to_8i32:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
||||
; SSSE3-NEXT: pand %xmm1, %xmm2
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: pand %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: # kill
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: pand .LCPI0_0(%rip), %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: zext_8i16_to_8i32:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm2
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; SSE41-NEXT: pand %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSE41-NEXT: pand .LCPI0_0(%rip), %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_8i16_to_8i32:
|
||||
@ -158,34 +154,30 @@ entry:
|
||||
define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
|
||||
; SSE2-LABEL: zext_16i8_to_16i16:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
|
||||
; SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: # kill
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: pand .LCPI3_0(%rip), %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: zext_16i8_to_16i16:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
|
||||
; SSSE3-NEXT: pand %xmm1, %xmm2
|
||||
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSSE3-NEXT: pand %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: # kill
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSSE3-NEXT: pand .LCPI3_0(%rip), %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: zext_16i8_to_16i16:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm2
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pand %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovzxbw %xmm1, %xmm0 {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pand .LCPI3_0(%rip), %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: zext_16i8_to_16i16:
|
||||
@ -207,26 +199,24 @@ entry:
|
||||
|
||||
define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
|
||||
; SSE2-LABEL: load_zext_16i8_to_16i16:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: pand .LCPI4_0(%rip), %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: load_zext_16i8_to_16i16:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSSE3-NEXT: pand .LCPI4_0(%rip), %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: load_zext_16i8_to_16i16:
|
||||
; SSE41: # BB#0: # %entry
|
||||
@ -253,26 +243,24 @@ entry:
|
||||
|
||||
define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
|
||||
; SSE2-LABEL: load_zext_8i16_to_8i32:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pand .LCPI5_0(%rip), %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: load_zext_8i16_to_8i32:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: pand .LCPI5_0(%rip), %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: load_zext_8i16_to_8i32:
|
||||
; SSE41: # BB#0: # %entry
|
||||
|
Loading…
Reference in New Issue
Block a user