diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ccbe0af1547..91ca1d6cc6e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7841,6 +7841,13 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, getV4X86ShuffleImm8ForMask(Mask, DAG)); } + // Whenever we can lower this as a zext, that instruction is strictly faster + // than any alternative. + if (Subtarget->hasSSE41()) + if (SDValue ZExt = + lowerVectorShuffleAsZeroExtend(DL, MVT::v4i32, V1, V2, Mask, DAG)) + return ZExt; + // Use dedicated unpack instructions for masks that match their pattern. if (isShuffleEquivalent(Mask, 0, 4, 1, 5)) return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2); @@ -8517,7 +8524,6 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, OrigMask, DAG)) return ZExt; - auto isV1 = [](int M) { return M >= 0 && M < 8; }; auto isV2 = [](int M) { return M >= 8; }; diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 077780416dc..21d74928ebf 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -765,3 +765,47 @@ define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) { %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } + +define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) { +; ALL-LABEL: @shuffle_v4i32_0u1u +; ALL: # BB#0: +; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1] +; ALL-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { +; SSE2-LABEL: @shuffle_v4i32_0z1z +; SSE2: # BB#0: +; SSE2-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] +; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3] +; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: retq +; +; SSE3-LABEL: @shuffle_v4i32_0z1z +; SSE3: # BB#0: +; SSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] +; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3] +; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: @shuffle_v4i32_0z1z +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] +; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3] +; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: @shuffle_v4i32_0z1z +; SSE41: # BB#0: +; SSE41-NEXT: pmovzxdq %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: @shuffle_v4i32_0z1z +; AVX1: # BB#0: +; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0 +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %shuffle +}