[VectorCombine] add/use pass-level IRBuilder

This saves creating/destroying a builder every time we perform some transform. The tests show instruction ordering diffs resulting from always inserting at the root instruction now, but those should be benign.
2025-01-31 20:51:52 +01:00 · 2020-06-22 08:57:37 -04:00 · 2020-06-22 08:57:37 -04:00 · 4aa4c0ae7d
commit 4aa4c0ae7d
parent f7e3d3a72e
3 changed files with 21 additions and 18 deletions
--- a/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/lib/Transforms/Vectorize/VectorCombine.cpp
@ -51,12 +51,13 @@ class VectorCombine {
 public:
  VectorCombine(Function &F, const TargetTransformInfo &TTI,
                const DominatorTree &DT)
-      : F(F), TTI(TTI), DT(DT) {}
+      : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}

  bool run();

 private:
  Function &F;
+  IRBuilder<> Builder;
  const TargetTransformInfo &TTI;
  const DominatorTree &DT;

@ -64,6 +65,12 @@ private:
                             unsigned Opcode,
                             ExtractElementInst *&ConvertToShuffle,
                             unsigned PreferredExtractIndex);
+  ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
+                                       unsigned NewIndex);
+  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
+                     Instruction &I);
+  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
+                       Instruction &I);
  bool foldExtractExtract(Instruction &I);
  bool foldBitcastShuf(Instruction &I);
  bool scalarizeBinopOrCmp(Instruction &I);
@ -182,12 +189,13 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
 /// the source vector (shift the scalar element) to a NewIndex for extraction.
 /// Return null if the input can be constant folded, so that we are not creating
 /// unnecessary instructions.
-static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
-                                            unsigned NewIndex) {
+ExtractElementInst *VectorCombine::translateExtract(ExtractElementInst *ExtElt,
+                                                    unsigned NewIndex) {
  // If the extract can be constant-folded, this code is unsimplified. Defer
  // to other passes to handle that.
  Value *X = ExtElt->getVectorOperand();
  Value *C = ExtElt->getIndexOperand();
+  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
  if (isa<Constant>(X))
    return nullptr;

@ -196,11 +204,9 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
  // ShufMask = { 2, undef, undef, undef }
  auto *VecTy = cast<FixedVectorType>(X->getType());
  SmallVector<int, 32> Mask(VecTy->getNumElements(), -1);
-  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
  Mask[NewIndex] = cast<ConstantInt>(C)->getZExtValue();

  // extelt X, C --> extelt (shuffle X), NewIndex
-  IRBuilder<> Builder(ExtElt);
  Value *Shuf =
      Builder.CreateShuffleVector(X, UndefValue::get(VecTy), Mask, "shift");
  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
@ -209,8 +215,8 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
 /// Try to reduce extract element costs by converting scalar compares to vector
 /// compares followed by extract.
 /// cmp (ext0 V0, C), (ext1 V1, C)
-static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
-                          Instruction &I) {
+void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
+                                  ExtractElementInst *Ext1, Instruction &I) {
  assert(isa<CmpInst>(&I) && "Expected a compare");
  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
             cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
@ -218,7 +224,6 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,

  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
  ++NumVecCmp;
-  IRBuilder<> Builder(&I);
  CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
  Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
@ -230,8 +235,8 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
 /// Try to reduce extract element costs by converting scalar binops to vector
 /// binops followed by extract.
 /// bo (ext0 V0, C), (ext1 V1, C)
-static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
-                            Instruction &I) {
+void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
+                                    ExtractElementInst *Ext1, Instruction &I) {
  assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
             cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
@ -239,7 +244,6 @@ static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,

  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
  ++NumVecBO;
-  IRBuilder<> Builder(&I);
  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
  Value *VecBO =
      Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
@ -353,7 +357,6 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
  }
  // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
  ++NumShufOfBitcast;
-  IRBuilder<> Builder(&I);
  Value *CastV = Builder.CreateBitCast(V, DestTy);
  Value *Shuf =
      Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
@ -454,7 +457,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
    ++NumScalarBO;

  // For constant cases, extract the scalar element, this should constant fold.
-  IRBuilder<> Builder(&I);
  if (IsConst0)
    V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
  if (IsConst1)
@ -498,6 +500,7 @@ bool VectorCombine::run() {
    for (Instruction &I : BB) {
      if (isa<DbgInfoIntrinsic>(I))
        continue;
+      Builder.SetInsertPoint(&I);
      MadeChange |= foldExtractExtract(I);
      MadeChange |= foldBitcastShuf(I);
      MadeChange |= scalarizeBinopOrCmp(I);
--- a/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@ -41,11 +41,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {

 define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y:%.*]]
+; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]]
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SHIFT2]]
 ; CHECK-NEXT:    [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
 ; CHECK-NEXT:    ret i32 [[X2Y210]]
--- a/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/test/Transforms/VectorCombine/X86/extract-binop.ll
@ -454,12 +454,12 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
 define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @PR34724(
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
 ; CHECK-NEXT:    [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
 ; CHECK-NEXT:    [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
 ; CHECK-NEXT:    [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
 ; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1