[X86][SSE] Accept any shuffle mask that is all zeroes

Until we have a better way to extract constants through bitcasted build vectors (and how to handle undefs of partial lanes etc.) at least accept build vectors that are all zeroes. llvm-svn: 274833
2024-10-23 13:02:52 +02:00 · 2016-07-08 10:39:12 +00:00 · 2016-07-08 10:39:12 +00:00 · 228451dbc1
commit 228451dbc1
parent 640cb98b90
3 changed files with 9 additions and 4 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -4788,6 +4788,13 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
  if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
    return false;

+  // We can always decode if the buildvector is all zero constants,
+  // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
+  if (llvm::all_of(MaskNode->ops(), X86::isZeroNode)) {
+    RawMask.append(VT.getSizeInBits() / MaskEltSizeInBits, 0);
+    return true;
+  }
+
  // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
  if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
    return false;
--- a/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@ -139,8 +139,7 @@ define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
 define <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) {
 ; ALL-LABEL: combine_vpermilvar_2f64_movddup:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
+; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 ; ALL-NEXT:    retq
  %1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>)
  ret <2 x double> %1
--- a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
@ -393,8 +393,7 @@ define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) {
 define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
 ; CHECK-LABEL: combine_permvar_as_vpbroadcastq512:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1
-; CHECK-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    vpbroadcastq %xmm0, %zmm0
 ; CHECK-NEXT:    retq
  %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
  ret <8 x i64> %1