[GVN] Add phi-translate support in scalarpre.

Right now scalarpre doesn't have phi-translate support, so it will miss some simple pre opportunities. Like the following testcase, current scalarpre cannot recognize the last "a * b" is fully redundent because a and b used by the last "a * b" expr are both defined by phis. long a[100], b[100], g1, g2, g3; __attribute__((pure)) long goo(); void foo(long a, long b, long c, long d) { g1 = a * b; if (__builtin_expect(g2 > 3, 0)) { a = c; b = d; g2 = a * b; } g3 = a * b; // fully redundant. } The patch adds phi-translate support in scalarpre. This is only a temporary solution before the newpre based on newgvn is available. Differential Revision: https://reviews.llvm.org/D32252 llvm-svn: 303923
2024-11-23 19:23:23 +01:00 · 2017-05-25 21:49:02 +00:00 · 2017-05-25 21:49:02 +00:00 · 6fec4b3a5f
commit 6fec4b3a5f
parent 6f9f9c2fbe
5 changed files with 271 additions and 26 deletions
--- a/include/llvm/Transforms/Scalar/GVN.h
+++ b/include/llvm/Transforms/Scalar/GVN.h
@ -68,6 +68,24 @@ public:
  class ValueTable {
    DenseMap<Value *, uint32_t> valueNumbering;
    DenseMap<Expression, uint32_t> expressionNumbering;
+
+    // Expressions is the vector of Expression. ExprIdx is the mapping from
+    // value number to the index of Expression in Expressions. We use it
+    // instead of a DenseMap because filling such mapping is faster than
+    // filling a DenseMap and the compile time is a little better.
+    uint32_t nextExprNumber;
+    std::vector<Expression> Expressions;
+    std::vector<uint32_t> ExprIdx;
+    // Value number to PHINode mapping. Used for phi-translate in scalarpre.
+    DenseMap<uint32_t, PHINode *> NumberingPhi;
+    // Cache for phi-translate in scalarpre.
+    typedef DenseMap<std::pair<uint32_t, const BasicBlock *>, uint32_t>
+        PhiTranslateMap;
+    PhiTranslateMap PhiTranslateTable;
+    // Map the block to reversed postorder traversal number. It is used to
+    // find back edge easily.
+    DenseMap<const BasicBlock *, uint32_t> BlockRPONumber;
+
    AliasAnalysis *AA;
    MemoryDependenceResults *MD;
    DominatorTree *DT;
@ -79,6 +97,10 @@ public:
                             Value *LHS, Value *RHS);
    Expression createExtractvalueExpr(ExtractValueInst *EI);
    uint32_t lookupOrAddCall(CallInst *C);
+    uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock,
+                              uint32_t Num, GVN &Gvn);
+    std::pair<uint32_t, bool> assignExpNewValueNum(Expression &exp);
+    bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn);

  public:
    ValueTable();
@ -87,9 +109,12 @@ public:
    ~ValueTable();

    uint32_t lookupOrAdd(Value *V);
-    uint32_t lookup(Value *V) const;
+    uint32_t lookup(Value *V, bool Verify = true) const;
    uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred,
                            Value *LHS, Value *RHS);
+    uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock,
+                          uint32_t Num, GVN &Gvn);
+    void assignBlockRPONumber(Function &F);
    bool exists(Value *V) const;
    void add(Value *V, uint32_t num);
    void clear();
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
 struct llvm::GVN::Expression {
  uint32_t opcode;
  Type *type;
+  bool commutative;
  SmallVector<uint32_t, 4> varargs;

-  Expression(uint32_t o = ~2U) : opcode(o) {}
+  Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}

  bool operator==(const Expression &other) const {
    if (opcode != other.opcode)
@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
    assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
    if (e.varargs[0] > e.varargs[1])
      std::swap(e.varargs[0], e.varargs[1]);
+    e.commutative = true;
  }

  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
      Predicate = CmpInst::getSwappedPredicate(Predicate);
    }
    e.opcode = (C->getOpcode() << 8) | Predicate;
+    e.commutative = true;
  } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
    for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
         II != IE; ++II)
@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
    Predicate = CmpInst::getSwappedPredicate(Predicate);
  }
  e.opcode = (Opcode << 8) | Predicate;
+  e.commutative = true;
  return e;
 }

@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default;
 /// add - Insert a value into the table with a specified value number.
 void GVN::ValueTable::add(Value *V, uint32_t num) {
  valueNumbering.insert(std::make_pair(V, num));
+  if (PHINode *PN = dyn_cast<PHINode>(V))
+    NumberingPhi[num] = PN;
 }

 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
  if (AA->doesNotAccessMemory(C)) {
    Expression exp = createExpr(C);
-    uint32_t &e = expressionNumbering[exp];
-    if (!e) e = nextValueNumber++;
+    uint32_t e = assignExpNewValueNum(exp).first;
    valueNumbering[C] = e;
    return e;
  } else if (AA->onlyReadsMemory(C)) {
    Expression exp = createExpr(C);
-    uint32_t &e = expressionNumbering[exp];
-    if (!e) {
-      e = nextValueNumber++;
-      valueNumbering[C] = e;
-      return e;
+    auto ValNum = assignExpNewValueNum(exp);
+    if (ValNum.second) {
+      valueNumbering[C] = ValNum.first;
+      return ValNum.first;
    }
    if (!MD) {
-      e = nextValueNumber++;
+      uint32_t e = assignExpNewValueNum(exp).first;
      valueNumbering[C] = e;
      return e;
    }
@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
    case Instruction::ExtractValue:
      exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
      break;
+    case Instruction::PHI:
+      valueNumbering[V] = nextValueNumber;
+      NumberingPhi[nextValueNumber] = cast<PHINode>(V);
+      return nextValueNumber++;
    default:
      valueNumbering[V] = nextValueNumber;
      return nextValueNumber++;
  }

-  uint32_t& e = expressionNumbering[exp];
-  if (!e) e = nextValueNumber++;
+  uint32_t e = assignExpNewValueNum(exp).first;
  valueNumbering[V] = e;
  return e;
 }

 /// Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V) const {
+uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
-  assert(VI != valueNumbering.end() && "Value not numbered?");
-  return VI->second;
+  if (Verify) {
+    assert(VI != valueNumbering.end() && "Value not numbered?");
+    return VI->second;
+  }
+  return (VI != valueNumbering.end()) ? VI->second : 0;
 }

 /// Returns the value number of the given comparison,
@ -549,21 +559,29 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
                                         CmpInst::Predicate Predicate,
                                         Value *LHS, Value *RHS) {
  Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
-  uint32_t& e = expressionNumbering[exp];
-  if (!e) e = nextValueNumber++;
-  return e;
+  return assignExpNewValueNum(exp).first;
 }

 /// Remove all entries from the ValueTable.
 void GVN::ValueTable::clear() {
  valueNumbering.clear();
  expressionNumbering.clear();
+  NumberingPhi.clear();
+  PhiTranslateTable.clear();
+  BlockRPONumber.clear();
  nextValueNumber = 1;
+  Expressions.clear();
+  ExprIdx.clear();
+  nextExprNumber = 0;
 }

 /// Remove a value from the value numbering.
 void GVN::ValueTable::erase(Value *V) {
+  uint32_t Num = valueNumbering.lookup(V);
  valueNumbering.erase(V);
+  // If V is PHINode, V <--> value number is an one-to-one mapping.
+  if (isa<PHINode>(V))
+    NumberingPhi.erase(Num);
 }

 /// verifyRemoved - Verify that the value is removed from all internal data
@ -1451,6 +1469,97 @@ bool GVN::processLoad(LoadInst *L) {
  return false;
 }

+/// Return a pair the first field showing the value number of \p Exp and the
+/// second field showing whether it is a value number newly created.
+std::pair<uint32_t, bool>
+GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+  uint32_t &e = expressionNumbering[Exp];
+  bool CreateNewValNum = !e;
+  if (CreateNewValNum) {
+    Expressions.push_back(Exp);
+    if (ExprIdx.size() < nextValueNumber + 1)
+      ExprIdx.resize(nextValueNumber * 2);
+    e = nextValueNumber;
+    ExprIdx[nextValueNumber++] = nextExprNumber++;
+  }
+  return {e, CreateNewValNum};
+}
+
+void GVN::ValueTable::assignBlockRPONumber(Function &F) {
+  uint32_t NextBlockNumber = 1;
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+  for (BasicBlock *BB : RPOT)
+    BlockRPONumber[BB] = NextBlockNumber++;
+}
+
+/// Return whether all the values related with the same \p num are
+/// defined in \p BB.
+bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+                                     GVN &Gvn) {
+  LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
+  while (Vals && Vals->BB == BB)
+    Vals = Vals->Next;
+  return !Vals;
+}
+
+/// Wrap phiTranslateImpl to provide caching functionality.
+uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
+                                       const BasicBlock *PhiBlock, uint32_t Num,
+                                       GVN &Gvn) {
+  auto FindRes = PhiTranslateTable.find({Num, Pred});
+  if (FindRes != PhiTranslateTable.end())
+    return FindRes->second;
+  uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
+  PhiTranslateTable.insert({{Num, Pred}, NewNum});
+  return NewNum;
+}
+
+/// Translate value number \p Num using phis, so that it has the values of
+/// the phis in BB.
+uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+                                           const BasicBlock *PhiBlock,
+                                           uint32_t Num, GVN &Gvn) {
+  if (PHINode *PN = NumberingPhi[Num]) {
+    if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock])
+      return Num;
+    for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+      if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
+        if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
+          return TransVal;
+    }
+    return Num;
+  }
+
+  // If there is any value related with Num is defined in a BB other than
+  // PhiBlock, it cannot depend on a phi in PhiBlock without going through
+  // a backedge. We can do an early exit in that case to save compile time.
+  if (!areAllValsInBB(Num, PhiBlock, Gvn))
+    return Num;
+
+  if (ExprIdx[Num] == 0 || Num >= ExprIdx.size())
+    return Num;
+  Expression Exp = Expressions[ExprIdx[Num]];
+
+  for (unsigned i = 0; i < Exp.varargs.size(); i++)
+    Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
+
+  if (Exp.commutative) {
+    assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
+    if (Exp.varargs[0] > Exp.varargs[1]) {
+      std::swap(Exp.varargs[0], Exp.varargs[1]);
+      uint32_t Opcode = Exp.opcode >> 8;
+      if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
+        Exp.opcode = (Opcode << 8) |
+                     CmpInst::getSwappedPredicate(
+                         static_cast<CmpInst::Predicate>(Exp.opcode & 255));
+    }
+  }
+
+  if (uint32_t NewNum = expressionNumbering[Exp])
+    return NewNum;
+  return Num;
+}
+
 // In order to find a leader for a given value number at a
 // specific basic block, we first obtain the list of all Values for that number,
 // and then scan the list to find one whose block dominates the block in
@ -1856,6 +1965,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
    // Fabricate val-num for dead-code in order to suppress assertion in
    // performPRE().
    assignValNumForDeadCode();
+    VN.assignBlockRPONumber(F);
    bool PREChanged = true;
    while (PREChanged) {
      PREChanged = performPRE(F);
@ -1945,7 +2055,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
      success = false;
      break;
    }
-    if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+    uint32_t TValNo =
+        VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this);
+    if (Value *V = findLeader(Pred, TValNo)) {
      Instr->setOperand(i, V);
    } else {
      success = false;
@ -1962,10 +2074,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
  Instr->insertBefore(Pred->getTerminator());
  Instr->setName(Instr->getName() + ".pre");
  Instr->setDebugLoc(Instr->getDebugLoc());
-  VN.add(Instr, ValNo);
+
+  unsigned Num = VN.lookupOrAdd(Instr);
+  VN.add(Instr, Num);

  // Update the availability map to include the new instruction.
-  addToLeaderTable(ValNo, Instr, Pred);
+  addToLeaderTable(Num, Instr, Pred);
  return true;
 }

@ -2014,7 +2128,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
      break;
    }

-    Value *predV = findLeader(P, ValNo);
+    uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
+    Value *predV = findLeader(P, TValNo);
    if (!predV) {
      predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
      PREPred = P;
--- a/test/Transforms/GVN/PRE/phi-translate-2.ll
+++ b/test/Transforms/GVN/PRE/phi-translate-2.ll
@ -0,0 +1,105 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = common global [100 x i64] zeroinitializer, align 16
+@b = common global [100 x i64] zeroinitializer, align 16
+@g1 = common global i64 0, align 8
+@g2 = common global i64 0, align 8
+@g3 = common global i64 0, align 8
+declare i64 @goo(...) local_unnamed_addr #1
+
+define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) {
+entry:
+  %mul = mul nsw i64 %b, %a
+  store i64 %mul, i64* @g1, align 8
+  %t0 = load i64, i64* @g2, align 8
+  %cmp = icmp sgt i64 %t0, 3
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %mul2 = mul nsw i64 %d, %c
+  store i64 %mul2, i64* @g2, align 8
+  br label %if.end
+
+; Check phi-translate works and mul is removed.
+; CHECK-LABEL: @test1(
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end:                                           ; preds = %if.then, %entry
+  %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ]
+  %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ]
+  %mul3 = mul nsw i64 %a.addr.0, %b.addr.0
+  store i64 %mul3, i64* @g3, align 8
+  ret void
+}
+
+define void @test2(i64 %i) {
+entry:
+  %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i
+  %t0 = load i64, i64* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i
+  %t1 = load i64, i64* %arrayidx1, align 8
+  %mul = mul nsw i64 %t1, %t0
+  store i64 %mul, i64* @g1, align 8
+  %cmp = icmp sgt i64 %mul, 3
+  br i1 %cmp, label %if.then, label %if.end
+
+; Check phi-translate works for the phi generated by loadpre. A new mul will be
+; inserted in if.then block.
+; CHECK-LABEL: @test2(
+; CHECK: if.then:
+; CHECK: %[[MUL_THEN:.*]] = mul
+; CHECK: br label %if.end
+if.then:                                          ; preds = %entry
+  %call = tail call i64 (...) @goo() #2
+  store i64 %call, i64* @g2, align 8
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end:                                           ; preds = %if.then, %entry
+  %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ]
+  %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0
+  %t2 = load i64, i64* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0
+  %t3 = load i64, i64* %arrayidx4, align 8
+  %mul5 = mul nsw i64 %t3, %t2
+  store i64 %mul5, i64* @g3, align 8
+  ret void
+}
+
+; Check phi-translate doesn't go through backedge, which may lead to incorrect
+; pre transformation.
+; CHECK: for.end:
+; CHECK-NOT: %{{.*pre-phi}} = phi
+; CHECK: ret void
+define void @test3(i64 %N, i64* nocapture readonly %a) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %add = add nuw nsw i64 %i.0, 1
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add
+  %tmp0 = load i64, i64* %arrayidx, align 8
+  %cmp = icmp slt i64 %i.0, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %call = tail call i64 (...) @goo() #2
+  %add1 = sub nsw i64 0, %call
+  %tobool = icmp eq i64 %tmp0, %add1
+  br i1 %tobool, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.cond
+  %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ]
+  %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa
+  %tmp1 = load i64, i64* %arrayidx2, align 8
+  store i64 %tmp1, i64* @g1, align 8
+  ret void
+}
+
--- a/test/Transforms/GVN/PRE/pre-gep-load.ll
+++ b/test/Transforms/GVN/PRE/pre-gep-load.ll
@ -37,7 +37,7 @@ sw.bb2:                                           ; preds = %if.end, %entry
  %3 = load double, double* %arrayidx5, align 8
 ; CHECK: sw.bb2:
 ; CHECK-NOT: sext
-; CHECK-NEXT: phi double [
+; CHECK: phi double [
 ; CHECK-NOT: load
  %sub6 = fsub double 3.000000e+00, %3
  br label %return
--- a/test/Transforms/GVN/PRE/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
@ -72,7 +72,7 @@ block4:
  %PRE = load i32, i32* %P3
  ret i32 %PRE
 ; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
@ -104,7 +104,7 @@ block4:
  %PRE = load i32, i32* %P3
  ret i32 %PRE
 ; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
@ -263,7 +263,7 @@ block4:
  %PRE = load i32, i32* %P3
  ret i32 %PRE
 ; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }