[X86] Simplify the code at the end of lowerShuffleAsBroadcast.

The original code could create a bitcast from f64 to i64 and back on 32-bit targets. This was only working because getBitcast was able to fold the casts away to avoid leaving the illegal i64 type. Now we handle the scalar case directly by broadcasting using the scalar type as the element type. Then bitcasting to the final VT. This works since we ensure the scalar type is the same size as the final VT element type. No more casts to i64. For the vector case, we cast to VT or subvector of VT. And then do the broadcast. I think this all matches what we generated before, just in a more readable way.
2024-11-23 03:02:36 +01:00 · 2020-03-04 20:38:44 -08:00 · 2020-03-04 20:38:44 -08:00 · cac4f78ec3
commit cac4f78ec3
parent 7d0a2e9be8
1 changed files with 15 additions and 29 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -12995,8 +12995,6 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
            DL, VT, V, BroadcastIdx, Subtarget, DAG))
      return TruncBroadcast;
  MVT BroadcastVT = VT;
  // Also check the simpler case, where we can directly reuse the scalar.
  if (!BitCastSrc &&
      ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
@ -13015,7 +13013,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
    // Reduce the vector load and shuffle to a broadcasted scalar load.
    LoadSDNode *Ld = cast<LoadSDNode>(V);
    SDValue BaseAddr = Ld->getOperand(1);
-    EVT SVT = BroadcastVT.getScalarType();
+    EVT SVT = VT.getScalarType();
    unsigned Offset = BroadcastIdx * SVT.getStoreSize();
    assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
    SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
@ -13024,7 +13022,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
    // than MOVDDUP.
    // FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX?
    if (Opcode == X86ISD::VBROADCAST) {
-      SDVTList Tys = DAG.getVTList(BroadcastVT, MVT::Other);
+      SDVTList Tys = DAG.getVTList(VT, MVT::Other);
      SDValue Ops[] = {Ld->getChain(), NewAddr};
      V = DAG.getMemIntrinsicNode(
          X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT,
@ -13068,38 +13066,26 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
    V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
                    DAG.getBitcast(MVT::f64, V));
-  // Bitcast back to the same scalar type as BroadcastVT.
+  // If this is a scalar, do the broadcast on this type and bitcast.
-  if (V.getValueType().getScalarType() != BroadcastVT.getScalarType()) {
+  if (!V.getValueType().isVector()) {
-    assert(NumEltBits == BroadcastVT.getScalarSizeInBits() &&
+    assert(V.getScalarValueSizeInBits() == NumEltBits &&
-           "Unexpected vector element size");
+           "Unexpected scalar size");
-    MVT ExtVT;
+    MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(),
-    if (V.getValueType().isVector()) {
+                                       VT.getVectorNumElements());
-      unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
+    return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
      ExtVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
    } else {
      ExtVT = BroadcastVT.getScalarType();
    }
    V = DAG.getBitcast(ExtVT, V);
  }
  // 32-bit targets need to load i64 as a f64 and then bitcast the result.
  if (!Subtarget.is64Bit() && V.getValueType() == MVT::i64) {
    V = DAG.getBitcast(MVT::f64, V);
    unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
    BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
  }
  // We only support broadcasting from 128-bit vectors to minimize the
  // number of patterns we need to deal with in isel. So extract down to
  // 128-bits, removing as many bitcasts as possible.
-  if (V.getValueSizeInBits() > 128) {
+  if (V.getValueSizeInBits() > 128)
    MVT ExtVT = V.getSimpleValueType().getScalarType();
    ExtVT = MVT::getVectorVT(ExtVT, 128 / ExtVT.getScalarSizeInBits());
    V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
    V = DAG.getBitcast(ExtVT, V);
  }
-  return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
+  // Otherwise cast V to a vector with the same element type as VT, but
  // possibly narrower than VT. Then perform the broadcast.
  unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
  MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts);
  return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V));
 }
 // Check for whether we can use INSERTPS to perform the shuffle. We only use