[X86] Don't use Lower512IntUnary to split bitcasts with v32i16/v64i8 types on targets without AVX512BW.

LowerIntUnary as its name says has an assert for integer types. But for the bitcast case one side might be an FP type. Rather than making sure the function really works for fp types and renaming it. Just do really basic splitting directly. The LowerIntUnary has the advantage that it can peek through BUILD_VECTOR because every other call is during Lowering. But these calls are during legalization and will be followed by a DAG combine round. Revert some change to LowerVectorIntUnary that were originally made just to make these two calls work even in pure integer cases. This was found purely by compiling the avx512f-builtins.c test from clang so I've copied over the offending function from that. llvm-svn: 329616
2024-11-25 04:02:41 +01:00 · 2018-04-09 20:37:14 +00:00 · 2018-04-09 20:37:14 +00:00 · 97482cfcad
commit 97482cfcad
parent 325264aa91
2 changed files with 44 additions and 7 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -22034,15 +22034,16 @@ static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
  MVT VT = Op.getSimpleValueType();
  unsigned NumElems = VT.getVectorNumElements();
  unsigned SizeInBits = VT.getSizeInBits();
+  MVT EltVT = VT.getVectorElementType();
+  SDValue Src = Op.getOperand(0);
+  assert(EltVT == Src.getSimpleValueType().getVectorElementType() &&
+         "Src and Op should have the same element type!");

  // Extract the Lo/Hi vectors
  SDLoc dl(Op);
-  SDValue Src = Op.getOperand(0);
-  unsigned SrcNumElems = Src.getSimpleValueType().getVectorNumElements();
  SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
-  SDValue Hi = extractSubVector(Src, SrcNumElems / 2, DAG, dl, SizeInBits / 2);
+  SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);

-  MVT EltVT = VT.getVectorElementType();
  MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
                     DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
@ -23996,8 +23997,17 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
  }

  // Custom splitting for BWI types when AVX512F is available but BWI isn't.
-  if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector())
-    return Lower512IntUnary(Op, DAG);
+  if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector() &&
+    DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) {
+    SDLoc dl(Op);
+    SDValue Lo, Hi;
+    std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
+    EVT CastVT = MVT::getVectorVT(DstVT.getVectorElementType(),
+                                  DstVT.getVectorNumElements() / 2);
+    Lo = DAG.getBitcast(CastVT, Lo);
+    Hi = DAG.getBitcast(CastVT, Hi);
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi);
+  }

  // Use MOVMSK for vector to scalar conversion to prevent scalarization.
  if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) {
@ -25391,7 +25401,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
    // Custom splitting for BWI types when AVX512F is available but BWI isn't.
    if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) &&
        SrcVT.isVector() && isTypeLegal(SrcVT)) {
-      SDValue Res = Lower512IntUnary(SDValue(N, 0), DAG);
+      SDValue Lo, Hi;
+      std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+      MVT CastVT = (DstVT == MVT::v32i16) ? MVT::v16i16 : MVT::v32i8;
+      Lo = DAG.getBitcast(CastVT, Lo);
+      Hi = DAG.getBitcast(CastVT, Hi);
+      SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi);
      Results.push_back(Res);
      return;
    }
--- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@ -1960,5 +1960,27 @@ define <8 x i64> @test_mm512_mask_mul_epu32(i16 zeroext %__k, <8 x i64> %__A, <8
  ret <8 x i64> %tmp4
 }

+define <8 x double> @test_mm512_set1_epi8(i8 signext %d) nounwind {
+; X32-LABEL: test_mm512_set1_epi8:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    vmovd %eax, %xmm0
+; X32-NEXT:    vpbroadcastb %xmm0, %ymm0
+; X32-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_set1_epi8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovd %edi, %xmm0
+; X64-NEXT:    vpbroadcastb %xmm0, %ymm0
+; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-NEXT:    retq
+entry:
+  %vecinit.i = insertelement <64 x i8> undef, i8 %d, i32 0
+  %vecinit63.i = shufflevector <64 x i8> %vecinit.i, <64 x i8> undef, <64 x i32> zeroinitializer
+  %0 = bitcast <64 x i8> %vecinit63.i to <8 x double>
+  ret <8 x double> %0
+}
+
 !0 = !{i32 1}