1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[X86][SSE] pslldq/psrldq shuffle mask decodes

Patch to provide shuffle decodes and asm comments for the sse pslldq/psrldq SSE2/AVX2 byte shift instructions.

Differential Revision: http://reviews.llvm.org/D5598

llvm-svn: 219738
This commit is contained in:
Simon Pilgrim 2014-10-14 22:31:34 +00:00
parent dcd5a0a888
commit 6a516dfd91
7 changed files with 217 additions and 146 deletions

View File

@ -199,6 +199,44 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
break;
case X86::PSLLDQri:
case X86::VPSLLDQri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if(MI->getOperand(MI->getNumOperands()-1).isImm())
DecodePSLLDQMask(MVT::v16i8,
MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::VPSLLDQYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if(MI->getOperand(MI->getNumOperands()-1).isImm())
DecodePSLLDQMask(MVT::v32i8,
MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::PSRLDQri:
case X86::VPSRLDQri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if(MI->getOperand(MI->getNumOperands()-1).isImm())
DecodePSRLDQMask(MVT::v16i8,
MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::VPSRLDQYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if(MI->getOperand(MI->getNumOperands()-1).isImm())
DecodePSRLDQMask(MVT::v32i8,
MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:
Src1Name = getRegName(MI->getOperand(2).getReg());

View File

@ -79,6 +79,35 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
}
}
void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned VectorSizeInBits = VT.getSizeInBits();
unsigned NumElts = VectorSizeInBits / 8;
unsigned NumLanes = VectorSizeInBits / 128;
unsigned NumLaneElts = NumElts / NumLanes;
for (unsigned l = 0; l < NumElts; l += NumLaneElts)
for (unsigned i = 0; i < NumLaneElts; ++i) {
int M = SM_SentinelZero;
if (i >= Imm) M = i - Imm + l;
ShuffleMask.push_back(M);
}
}
void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned VectorSizeInBits = VT.getSizeInBits();
unsigned NumElts = VectorSizeInBits / 8;
unsigned NumLanes = VectorSizeInBits / 128;
unsigned NumLaneElts = NumElts / NumLanes;
for (unsigned l = 0; l < NumElts; l += NumLaneElts)
for (unsigned i = 0; i < NumLaneElts; ++i) {
unsigned Base = i + Imm;
int M = Base + l;
if (Base >= NumLaneElts) M = SM_SentinelZero;
ShuffleMask.push_back(M);
}
}
void DecodePALIGNRMask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();

View File

@ -40,6 +40,10 @@ void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

View File

@ -455,21 +455,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: vpslldq
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: vpslldq
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
@ -551,21 +551,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: vpsrldq
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: vpsrldq
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone

View File

@ -158,21 +158,21 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
; CHECK: vpslldq
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
; CHECK: vpslldq
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
@ -254,21 +254,21 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
; CHECK: vpsrldq
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
; CHECK: vpsrldq
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone

View File

@ -408,21 +408,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: pslldq
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: pslldq
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
@ -504,21 +504,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: psrldq
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: psrldq
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone

View File

@ -1397,77 +1397,77 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $2, %xmm0, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $10, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $10, %xmm0, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $14, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $14, %xmm0, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $4, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $4, %xmm0, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle
}