From 50151618a645b2110d32a71c8259de0b61ca093e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 6 Jun 2016 05:39:10 +0000 Subject: [PATCH] [AVX512] Add PALIGNR shuffle lowering for v32i16 and v16i32. llvm-svn: 271870 --- lib/Target/X86/X86ISelLowering.cpp | 13 +++++++++++++ test/CodeGen/X86/sad.ll | 13 +++---------- test/CodeGen/X86/vector-shuffle-512-v64.ll | 4 +--- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f861a0c8db0..b29237ae9a5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7679,6 +7679,8 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, // SSSE3 targets can use the palignr instruction. if (Subtarget.hasSSSE3()) { + assert((!VT.is512BitVector() || Subtarget.hasBWI()) && + "512-bit PALIGNR requires BWI instructions"); // Cast the inputs to i8 vector of correct length to match PALIGNR. MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes); Lo = DAG.getBitcast(AlignVT, Lo); @@ -11760,6 +11762,12 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) return Unpck; + // Try to use byte rotation instructions. + if (Subtarget.hasBWI()) + if (SDValue Rotate = lowerVectorShuffleAsByteRotate( + DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG)) + return Rotate; + return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } @@ -11775,6 +11783,11 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); + // Try to use byte rotation instructions. + if (SDValue Rotate = lowerVectorShuffleAsByteRotate( + DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG)) + return Rotate; + return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } diff --git a/test/CodeGen/X86/sad.ll b/test/CodeGen/X86/sad.ll index 47712f0508c..89c3b527ae6 100644 --- a/test/CodeGen/X86/sad.ll +++ b/test/CodeGen/X86/sad.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F @@ -106,9 +105,7 @@ define i32 @sad_16i8() nounwind { ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: movl $1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm1 -; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm1 +; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: retq @@ -359,9 +356,7 @@ define i32 @sad_32i8() nounwind { ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: movl $1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm1 -; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm1 +; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: retq @@ -846,9 +841,7 @@ define i32 @sad_avx64i8() nounwind { ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: movl $1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm1 -; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm1 +; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: retq diff --git a/test/CodeGen/X86/vector-shuffle-512-v64.ll b/test/CodeGen/X86/vector-shuffle-512-v64.ll index 36020e3d537..90892df18b3 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -13,9 +13,7 @@ define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_ ; ; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: movl $1, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm1 -; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpalignr $2, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: