[x86] Inline an SSE4 helper function for INSERT_VECTOR_ELT lowering, no

functionality changed. Separating this into two functions wasn't helping. There was a decent amount of boilerplate duplicated, and some subsequent refactorings here will pull even more common code out. llvm-svn: 216644
2024-10-24 13:33:37 +02:00 · 2014-08-28 03:52:45 +00:00 · 2014-08-28 03:52:45 +00:00 · fd8c86913c
commit fd8c86913c
parent c3b1f18af4
1 changed files with 45 additions and 58 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -10653,59 +10653,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
  return SDValue();
 }
 static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
  MVT VT = Op.getSimpleValueType();
  MVT EltVT = VT.getVectorElementType();
  SDLoc dl(Op);
  SDValue N0 = Op.getOperand(0);
  SDValue N1 = Op.getOperand(1);
  SDValue N2 = Op.getOperand(2);
  if (!VT.is128BitVector())
    return SDValue();
  if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
      isa<ConstantSDNode>(N2)) {
    unsigned Opc;
    if (VT == MVT::v8i16) {
      Opc = X86ISD::PINSRW;
    } else {
      assert(VT == MVT::v16i8);
      Opc = X86ISD::PINSRB;
    }
    // Transform it so it match pinsr{b,w} which expects a GR32 as its second
    // argument.
    if (N1.getValueType() != MVT::i32)
      N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
    if (N2.getValueType() != MVT::i32)
      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
    return DAG.getNode(Opc, dl, VT, N0, N1, N2);
  }
  if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
    // Bits [7:6] of the constant are the source select.  This will always be
    //  zero here.  The DAG Combiner may combine an extract_elt index into these
    //  bits.  For example (insert (extract, 3), 2) could be matched by putting
    //  the '3' into bits [7:6] of X86ISD::INSERTPS.
    // Bits [5:4] of the constant are the destination select.  This is the
    //  value of the incoming immediate.
    // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
    //   combine either bitwise AND or insert of float 0.0 to set these bits.
    N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
    // Create this as a scalar to vector..
    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
    return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
  }
  if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
    // PINSR* works with constant index.
    return Op;
  }
  return SDValue();
 }
 /// Insert one bit to mask vector, like v16i1 or v8i1.
 /// AVX-512 feature.
 SDValue 
@ -10744,7 +10691,7 @@ SDValue
 X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
  MVT VT = Op.getSimpleValueType();
  MVT EltVT = VT.getVectorElementType();
-  
+
  if (EltVT == MVT::i1)
    return InsertBitToMaskVector(Op, DAG);
@ -10753,8 +10700,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
  SDValue N1 = Op.getOperand(1);
  SDValue N2 = Op.getOperand(2);
-  // If this is a 256-bit vector result, first extract the 128-bit vector,
+  // If the vector is wider than 128 bits, extract the 128-bit subvector, insert
-  // insert the element into the extracted half and then place it back.
+  // into that, and then insert the subvector back into the result.
  if (VT.is256BitVector() || VT.is512BitVector()) {
    if (!isa<ConstantSDNode>(N2))
      return SDValue();
@ -10774,8 +10721,48 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
    return Insert128BitVector(N0, V, IdxVal, DAG, dl);
  }
-  if (Subtarget->hasSSE41())
+  if (Subtarget->hasSSE41()) {
-    return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+    if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
        isa<ConstantSDNode>(N2)) {
      unsigned Opc;
      if (VT == MVT::v8i16) {
        Opc = X86ISD::PINSRW;
      } else {
        assert(VT == MVT::v16i8);
        Opc = X86ISD::PINSRB;
      }
      // Transform it so it match pinsr{b,w} which expects a GR32 as its second
      // argument.
      if (N1.getValueType() != MVT::i32)
        N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
      if (N2.getValueType() != MVT::i32)
        N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
      return DAG.getNode(Opc, dl, VT, N0, N1, N2);
    }
    if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
      // Bits [7:6] of the constant are the source select.  This will always be
      //  zero here.  The DAG Combiner may combine an extract_elt index into
      //  these
      //  bits.  For example (insert (extract, 3), 2) could be matched by
      //  putting
      //  the '3' into bits [7:6] of X86ISD::INSERTPS.
      // Bits [5:4] of the constant are the destination select.  This is the
      //  value of the incoming immediate.
      // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
      //   combine either bitwise AND or insert of float 0.0 to set these bits.
      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
      // Create this as a scalar to vector..
      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
      return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
    }
    if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
      // PINSR* works with constant index.
      return Op;
    }
  }
  if (EltVT == MVT::i8)
    return SDValue();