diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6eb190fa10a..9c02303ed11 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11765,6 +11765,10 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef Mask, return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1); if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3})) return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1); + + if (V2.isUndef()) + return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1, + getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); } if (SDValue Unpck = diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 5f01909eb31..1f242fef117 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -107,6 +107,33 @@ define <16 x float> @shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15 ret <16 x float> %shuffle } +define <16 x float> @shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13(<16 x float> %a, <16 x float> %b) { +; ALL-LABEL: shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,3,3,4,5,6,6] +; ALL-NEXT: retq + %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + ret <16 x float> %shuffle +} + +define <16 x float> @shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12(<16 x float> %a, <16 x float> %b) { +; ALL-LABEL: shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12: +; ALL: # BB#0: +; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4,8,8,10,8,12,12,14,12] +; ALL-NEXT: retq + %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + ret <16 x float> %shuffle +} + +define <16 x float> @shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12(<16 x float> %a, <16 x float> %b) { +; ALL-LABEL: shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12: +; ALL: # BB#0: +; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,0,3,0,7,4,7,4,11,8,11,8,15,12,15,12] +; ALL-NEXT: retq + %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + ret <16 x float> %shuffle +} + define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; ALL: # BB#0: