Rip out X86-specific vector SDIV lowering, make the corresponding DAGCombiner transform work on vectors.

llvm-svn: 207316
2024-11-23 11:13:28 +01:00 · 2014-04-26 13:00:53 +00:00 · 2014-04-26 13:00:53 +00:00 · 89fb3dd5a4
commit 89fb3dd5a4
parent 163df6bc62
2 changed files with 24 additions and 77 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -1985,27 +1985,39 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
      return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
                         N0, N1);
  }
+
+  const APInt *Divisor = nullptr;
+  if (N1C) {
+    Divisor = &N1C->getAPIntValue();
+  } else if (N1.getValueType().isVector() &&
+             N1->getOpcode() == ISD::BUILD_VECTOR) {
+    BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N->getOperand(1));
+    if (ConstantSDNode *C = BV->getConstantSplatValue())
+      Divisor = &C->getAPIntValue();
+  }
+
  // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() &&
-      (N1C->getAPIntValue().isPowerOf2() ||
-       (-N1C->getAPIntValue()).isPowerOf2())) {
+  if (Divisor && !!*Divisor &&
+      (Divisor->isPowerOf2() || (-*Divisor).isPowerOf2())) {
    // If dividing by powers of two is cheap, then don't perform the following
    // fold.
    if (TLI.isPow2DivCheap())
      return SDValue();

-    unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+    unsigned lg2 = Divisor->countTrailingZeros();

    // Splat the sign bit into the register
-    SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                              DAG.getConstant(VT.getSizeInBits()-1,
-                                       getShiftAmountTy(N0.getValueType())));
+    SDValue SGN =
+        DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
+                    DAG.getConstant(VT.getScalarSizeInBits() - 1,
+                                    getShiftAmountTy(N0.getValueType())));
    AddToWorkList(SGN.getNode());

    // Add (N0 < 0) ? abs2 - 1 : 0;
-    SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
-                              DAG.getConstant(VT.getSizeInBits() - lg2,
-                                       getShiftAmountTy(SGN.getValueType())));
+    SDValue SRL =
+        DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
+                    DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+                                    getShiftAmountTy(SGN.getValueType())));
    SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
    AddToWorkList(SRL.getNode());
    AddToWorkList(ADD.getNode());    // Divide by pow2
@ -2014,12 +2026,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {

    // If we're dividing by a positive value, we're done.  Otherwise, we must
    // negate the result.
-    if (N1C->getAPIntValue().isNonNegative())
+    if (Divisor->isNonNegative())
      return SRA;

    AddToWorkList(SRA.getNode());
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), SRA);
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
  }

  // if integer divide is expensive and we satisfy the requirements, emit an
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -1108,9 +1108,6 @@ void X86TargetLowering::resetOperationActions() {
    setOperationAction(ISD::SHL,               MVT::v4i32, Custom);

    setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
-
-    setOperationAction(ISD::SDIV,              MVT::v8i16, Custom);
-    setOperationAction(ISD::SDIV,              MVT::v4i32, Custom);
  }

  if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
@ -1175,8 +1172,6 @@ void X86TargetLowering::resetOperationActions() {
    setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
    setOperationAction(ISD::SRA,               MVT::v32i8, Custom);

-    setOperationAction(ISD::SDIV,              MVT::v16i16, Custom);
-
    setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
    setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
    setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
@ -1232,8 +1227,6 @@ void X86TargetLowering::resetOperationActions() {
      setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);

      setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
-
-      setOperationAction(ISD::SDIV,            MVT::v8i32, Custom);
    } else {
      setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
      setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
@ -1342,7 +1335,6 @@ void X86TargetLowering::resetOperationActions() {
    setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
    setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
    setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
-    setOperationAction(ISD::SDIV,               MVT::v16i32, Custom);

    setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
    setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
@ -13193,61 +13185,6 @@ static SDValue LowerUMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows);
 }

-static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getSimpleValueType();
-  MVT EltTy = VT.getVectorElementType();
-  unsigned NumElts = VT.getVectorNumElements();
-  SDValue N0 = Op.getOperand(0);
-  SDLoc dl(Op);
-
-  // Lower sdiv X, pow2-const.
-  BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
-  if (!C)
-    return SDValue();
-
-  APInt SplatValue, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (!C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
-                          HasAnyUndefs) ||
-      EltTy.getSizeInBits() < SplatBitSize)
-    return SDValue();
-
-  if ((SplatValue != 0) &&
-      (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
-    unsigned Lg2 = SplatValue.countTrailingZeros();
-    // Splat the sign bit.
-    SmallVector<SDValue, 16> Sz(NumElts,
-                                DAG.getConstant(EltTy.getSizeInBits() - 1,
-                                                EltTy));
-    SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0,
-                              DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0],
-                                          NumElts));
-    // Add (N0 < 0) ? abs2 - 1 : 0;
-    SmallVector<SDValue, 16> Amt(NumElts,
-                                 DAG.getConstant(EltTy.getSizeInBits() - Lg2,
-                                                 EltTy));
-    SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN,
-                              DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0],
-                                          NumElts));
-    SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
-    SmallVector<SDValue, 16> Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy));
-    SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD,
-                              DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0],
-                                          NumElts));
-
-    // If we're dividing by a positive value, we're done.  Otherwise, we must
-    // negate the result.
-    if (SplatValue.isNonNegative())
-      return SRA;
-
-    SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
-    SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
-    return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
-  }
-  return SDValue();
-}
-
 static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
                                         const X86Subtarget *Subtarget) {
  MVT VT = Op.getSimpleValueType();
@ -14255,7 +14192,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
  case ISD::ADD:                return LowerADD(Op, DAG);
  case ISD::SUB:                return LowerSUB(Op, DAG);
-  case ISD::SDIV:               return LowerSDIV(Op, DAG);
  case ISD::FSINCOS:            return LowerFSINCOS(Op, Subtarget, DAG);
  }
 }