2008-04-20 22:35:01 +02:00
|
|
|
//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2008-04-20 23:13:06 +02:00
|
|
|
// This file implements the Jump Threading pass.
|
2008-04-20 22:35:01 +02:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-06-14 02:51:09 +02:00
|
|
|
#include "llvm/Transforms/Scalar/JumpThreading.h"
|
2009-05-04 04:28:08 +02:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2010-08-31 09:36:34 +02:00
|
|
|
#include "llvm/ADT/DenseSet.h"
|
2009-05-04 04:28:08 +02:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2017-03-08 16:22:30 +01:00
|
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
2015-10-15 16:59:40 +02:00
|
|
|
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
|
2017-06-06 13:49:48 +02:00
|
|
|
#include "llvm/Analysis/CFG.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "llvm/Analysis/ConstantFolding.h"
|
2017-06-06 13:49:48 +02:00
|
|
|
#include "llvm/Analysis/GlobalsModRef.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
|
|
|
#include "llvm/Analysis/Loads.h"
|
2015-10-15 16:59:40 +02:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2015-10-28 22:27:08 +01:00
|
|
|
#include "llvm/Analysis/ValueTracking.h"
|
2017-06-23 07:41:35 +02:00
|
|
|
#include "llvm/IR/ConstantRange.h"
|
2013-01-02 12:36:10 +01:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/IR/LLVMContext.h"
|
2015-10-15 16:59:40 +02:00
|
|
|
#include "llvm/IR/MDBuilder.h"
|
2014-07-24 14:16:19 +02:00
|
|
|
#include "llvm/IR/Metadata.h"
|
2017-02-17 05:21:14 +01:00
|
|
|
#include "llvm/IR/PatternMatch.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "llvm/Pass.h"
|
2008-04-20 22:35:01 +02:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2008-04-20 23:13:06 +02:00
|
|
|
#include "llvm/Support/Debug.h"
|
2009-07-26 09:49:05 +02:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2017-06-06 13:49:48 +02:00
|
|
|
#include "llvm/Transforms/Scalar.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
2017-02-17 05:21:14 +01:00
|
|
|
#include "llvm/Transforms/Utils/Cloning.h"
|
2012-12-03 17:50:05 +01:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
|
|
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
2015-10-15 16:59:40 +02:00
|
|
|
#include <algorithm>
|
|
|
|
#include <memory>
|
2008-04-20 22:35:01 +02:00
|
|
|
using namespace llvm;
|
2016-06-14 02:51:09 +02:00
|
|
|
using namespace jumpthreading;
|
2008-04-20 22:35:01 +02:00
|
|
|
|
2014-04-22 04:55:47 +02:00
|
|
|
#define DEBUG_TYPE "jump-threading"
|
|
|
|
|
2008-04-21 00:39:42 +02:00
|
|
|
STATISTIC(NumThreads, "Number of jumps threaded");
|
|
|
|
STATISTIC(NumFolds, "Number of terminators folded");
|
2009-10-11 09:24:57 +02:00
|
|
|
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
|
2008-04-20 22:35:01 +02:00
|
|
|
|
2008-04-20 23:13:06 +02:00
|
|
|
static cl::opt<unsigned>
|
2014-09-24 06:59:06 +02:00
|
|
|
BBDuplicateThreshold("jump-threading-threshold",
|
2008-04-20 23:13:06 +02:00
|
|
|
cl::desc("Max block size to duplicate for jump threading"),
|
|
|
|
cl::init(6), cl::Hidden);
|
|
|
|
|
2015-10-28 22:27:08 +01:00
|
|
|
static cl::opt<unsigned>
|
|
|
|
ImplicationSearchThreshold(
|
|
|
|
"jump-threading-implication-search-threshold",
|
|
|
|
cl::desc("The number of predecessors to search for a stronger "
|
|
|
|
"condition to use to thread over a weaker condition"),
|
|
|
|
cl::init(3), cl::Hidden);
|
|
|
|
|
2008-04-20 22:35:01 +02:00
|
|
|
namespace {
|
2008-05-09 06:43:13 +02:00
|
|
|
/// This pass performs 'jump threading', which looks at blocks that have
|
|
|
|
/// multiple predecessors and multiple successors. If one or more of the
|
|
|
|
/// predecessors of the block can be proven to always jump to one of the
|
|
|
|
/// successors, we forward the edge from the predecessor to the successor by
|
|
|
|
/// duplicating the contents of this block.
|
|
|
|
///
|
|
|
|
/// An example of when this can occur is code like this:
|
|
|
|
///
|
|
|
|
/// if () { ...
|
|
|
|
/// X = 4;
|
|
|
|
/// }
|
|
|
|
/// if (X < 3) {
|
|
|
|
///
|
|
|
|
/// In this case, the unconditional branch at the end of the first if can be
|
|
|
|
/// revectored to the false side of the second if.
|
|
|
|
///
|
2009-09-02 08:11:42 +02:00
|
|
|
class JumpThreading : public FunctionPass {
|
2016-06-14 02:51:09 +02:00
|
|
|
JumpThreadingPass Impl;
|
|
|
|
|
2008-04-20 22:35:01 +02:00
|
|
|
public:
|
|
|
|
static char ID; // Pass identification
|
2016-06-14 02:51:09 +02:00
|
|
|
JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
|
2010-10-19 19:21:58 +02:00
|
|
|
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
2008-04-20 22:35:01 +02:00
|
|
|
|
2014-03-05 10:10:37 +01:00
|
|
|
bool runOnFunction(Function &F) override;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-03-05 10:10:37 +01:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
2017-03-08 16:22:30 +01:00
|
|
|
AU.addRequired<AAResultsWrapperPass>();
|
2016-06-14 00:01:25 +02:00
|
|
|
AU.addRequired<LazyValueInfoWrapperPass>();
|
|
|
|
AU.addPreserved<LazyValueInfoWrapperPass>();
|
2015-09-10 12:22:12 +02:00
|
|
|
AU.addPreserved<GlobalsAAWrapperPass>();
|
2015-01-15 11:41:28 +01:00
|
|
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
2009-11-11 03:08:33 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-06-14 02:51:09 +02:00
|
|
|
void releaseMemory() override { Impl.releaseMemory(); }
|
2008-04-20 22:35:01 +02:00
|
|
|
};
|
2015-06-23 11:49:53 +02:00
|
|
|
}
|
2008-04-20 22:35:01 +02:00
|
|
|
|
2008-05-13 02:00:25 +02:00
|
|
|
char JumpThreading::ID = 0;
|
2010-10-12 21:48:12 +02:00
|
|
|
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
|
|
|
|
"Jump Threading", false, false)
|
2016-06-14 00:01:25 +02:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
|
2015-01-15 11:41:28 +01:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
2017-03-08 16:22:30 +01:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
2010-10-12 21:48:12 +02:00
|
|
|
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
|
2010-10-08 00:25:06 +02:00
|
|
|
"Jump Threading", false, false)
|
2008-05-13 02:00:25 +02:00
|
|
|
|
2008-04-20 22:35:01 +02:00
|
|
|
// Public interface to the Jump Threading pass
|
2014-09-24 06:59:06 +02:00
|
|
|
FunctionPass *llvm::createJumpThreadingPass(int Threshold) { return new JumpThreading(Threshold); }
|
2008-04-20 22:35:01 +02:00
|
|
|
|
2016-06-14 02:51:09 +02:00
|
|
|
JumpThreadingPass::JumpThreadingPass(int T) {
|
|
|
|
BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
|
|
|
|
}
|
|
|
|
|
2008-04-20 22:35:01 +02:00
|
|
|
/// runOnFunction - Top level algorithm.
|
|
|
|
///
|
|
|
|
bool JumpThreading::runOnFunction(Function &F) {
|
2016-04-23 00:06:11 +02:00
|
|
|
if (skipFunction(F))
|
2014-02-06 01:07:05 +01:00
|
|
|
return false;
|
2016-06-14 02:51:09 +02:00
|
|
|
auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
|
|
|
|
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
|
2017-03-08 16:22:30 +01:00
|
|
|
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
2016-06-14 02:51:09 +02:00
|
|
|
std::unique_ptr<BlockFrequencyInfo> BFI;
|
|
|
|
std::unique_ptr<BranchProbabilityInfo> BPI;
|
|
|
|
bool HasProfileData = F.getEntryCount().hasValue();
|
|
|
|
if (HasProfileData) {
|
|
|
|
LoopInfo LI{DominatorTree(F)};
|
2017-06-08 11:44:40 +02:00
|
|
|
BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
|
2016-06-14 02:51:09 +02:00
|
|
|
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
|
|
|
|
}
|
2017-03-08 16:22:30 +01:00
|
|
|
|
|
|
|
return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI),
|
2016-06-14 02:51:09 +02:00
|
|
|
std::move(BPI));
|
|
|
|
}
|
|
|
|
|
|
|
|
PreservedAnalyses JumpThreadingPass::run(Function &F,
|
2016-08-09 02:28:15 +02:00
|
|
|
FunctionAnalysisManager &AM) {
|
2016-06-14 02:51:09 +02:00
|
|
|
|
|
|
|
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
|
|
|
|
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
|
2017-03-08 16:22:30 +01:00
|
|
|
auto &AA = AM.getResult<AAManager>(F);
|
|
|
|
|
2016-06-14 02:51:09 +02:00
|
|
|
std::unique_ptr<BlockFrequencyInfo> BFI;
|
|
|
|
std::unique_ptr<BranchProbabilityInfo> BPI;
|
|
|
|
bool HasProfileData = F.getEntryCount().hasValue();
|
|
|
|
if (HasProfileData) {
|
|
|
|
LoopInfo LI{DominatorTree(F)};
|
2017-06-08 11:44:40 +02:00
|
|
|
BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
|
2016-06-14 02:51:09 +02:00
|
|
|
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
|
|
|
|
}
|
2017-03-08 16:22:30 +01:00
|
|
|
|
|
|
|
bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI),
|
|
|
|
std::move(BPI));
|
2016-07-06 21:05:41 +02:00
|
|
|
|
2016-06-14 02:51:09 +02:00
|
|
|
if (!Changed)
|
|
|
|
return PreservedAnalyses::all();
|
|
|
|
PreservedAnalyses PA;
|
|
|
|
PA.preserve<GlobalsAA>();
|
2016-07-02 18:16:44 +02:00
|
|
|
return PA;
|
2016-06-14 02:51:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
|
2017-03-08 16:22:30 +01:00
|
|
|
LazyValueInfo *LVI_, AliasAnalysis *AA_,
|
|
|
|
bool HasProfileData_,
|
2016-06-14 02:51:09 +02:00
|
|
|
std::unique_ptr<BlockFrequencyInfo> BFI_,
|
|
|
|
std::unique_ptr<BranchProbabilityInfo> BPI_) {
|
2014-02-06 01:07:05 +01:00
|
|
|
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
|
2016-06-14 02:51:09 +02:00
|
|
|
TLI = TLI_;
|
|
|
|
LVI = LVI_;
|
2017-03-08 16:22:30 +01:00
|
|
|
AA = AA_;
|
2015-10-15 16:59:40 +02:00
|
|
|
BFI.reset();
|
|
|
|
BPI.reset();
|
|
|
|
// When profile data is available, we need to update edge weights after
|
|
|
|
// successful jump threading, which requires both BPI and BFI being available.
|
2016-06-14 02:51:09 +02:00
|
|
|
HasProfileData = HasProfileData_;
|
2017-02-17 05:21:14 +01:00
|
|
|
auto *GuardDecl = F.getParent()->getFunction(
|
|
|
|
Intrinsic::getName(Intrinsic::experimental_guard));
|
|
|
|
HasGuards = GuardDecl && !GuardDecl->use_empty();
|
2015-10-15 16:59:40 +02:00
|
|
|
if (HasProfileData) {
|
2016-06-14 02:51:09 +02:00
|
|
|
BPI = std::move(BPI_);
|
|
|
|
BFI = std::move(BFI_);
|
2015-10-15 16:59:40 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-06-19 16:11:53 +02:00
|
|
|
// Remove unreachable blocks from function as they may result in infinite
|
|
|
|
// loop. We do threading if we found something profitable. Jump threading a
|
|
|
|
// branch can create other opportunities. If these opportunities form a cycle
|
2015-09-16 15:27:30 +02:00
|
|
|
// i.e. if any jump threading is undoing previous threading in the path, then
|
2014-06-19 16:11:53 +02:00
|
|
|
// we will loop forever. We take care of this issue by not jump threading for
|
|
|
|
// back edges. This works for normal cases but not for unreachable blocks as
|
|
|
|
// they may have cycle with no back edge.
|
2016-01-10 08:13:04 +01:00
|
|
|
bool EverChanged = false;
|
2016-06-16 18:25:53 +02:00
|
|
|
EverChanged |= removeUnreachableBlocks(F, LVI);
|
2014-06-17 16:34:19 +02:00
|
|
|
|
2009-05-04 04:28:08 +02:00
|
|
|
FindLoopHeaders(F);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-01-10 08:13:04 +01:00
|
|
|
bool Changed;
|
2010-01-07 14:50:07 +01:00
|
|
|
do {
|
|
|
|
Changed = false;
|
2008-12-03 08:48:08 +01:00
|
|
|
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
|
2015-10-13 20:26:00 +02:00
|
|
|
BasicBlock *BB = &*I;
|
2010-12-05 20:02:47 +01:00
|
|
|
// Thread all of the branches we can over this block.
|
2008-12-03 08:48:08 +01:00
|
|
|
while (ProcessBlock(BB))
|
2008-04-21 00:39:42 +02:00
|
|
|
Changed = true;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-12-03 08:48:08 +01:00
|
|
|
++I;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-12-03 08:48:08 +01:00
|
|
|
// If the block is trivially dead, zap it. This eliminates the successor
|
|
|
|
// edges which simplifies the CFG.
|
2015-01-13 04:46:47 +01:00
|
|
|
if (pred_empty(BB) &&
|
2008-12-08 23:44:07 +01:00
|
|
|
BB != &BB->getParent()->getEntryBlock()) {
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
|
2009-10-11 09:24:57 +02:00
|
|
|
<< "' with terminator: " << *BB->getTerminator() << '\n');
|
2009-05-04 04:28:08 +02:00
|
|
|
LoopHeaders.erase(BB);
|
2016-06-16 18:25:53 +02:00
|
|
|
LVI->eraseBlock(BB);
|
2009-05-04 18:29:24 +02:00
|
|
|
DeleteDeadBlock(BB);
|
2008-12-03 08:48:08 +01:00
|
|
|
Changed = true;
|
2010-12-13 03:38:13 +01:00
|
|
|
continue;
|
|
|
|
}
|
2011-04-14 23:35:50 +02:00
|
|
|
|
2010-12-13 03:38:13 +01:00
|
|
|
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
|
2011-04-14 23:35:50 +02:00
|
|
|
|
2010-12-13 03:38:13 +01:00
|
|
|
// Can't thread an unconditional jump, but if the block is "almost
|
|
|
|
// empty", we can replace uses of it with uses of the successor and make
|
|
|
|
// this dead.
|
[SimplifyCFG] Defer folding unconditional branches to LateSimplifyCFG if it can destroy canonical loop structure.
Summary:
When simplifying unconditional branches from empty blocks, we pre-test if the
BB belongs to a set of loop headers and keep the block to prevent passes from
destroying canonical loop structure. However, the current algorithm fails if
the destination of the branch is a loop header. Especially when such a loop's
latch block is folded into loop header it results in additional backedges and
LoopSimplify turns it into a nested loop which prevent later optimizations
from being applied (e.g., loop unrolling and loop interleaving).
This patch augments the existing algorithm by further checking if the
destination of the branch belongs to a set of loop headers and defer
eliminating it if yes to LateSimplifyCFG.
Fixes PR33605: https://bugs.llvm.org/show_bug.cgi?id=33605
Reviewers: efriedma, mcrosier, pacxx, hsung, davidxl
Reviewed By: efriedma
Subscribers: ashutosh.nema, gberry, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D35411
llvm-svn: 308422
2017-07-19 10:53:34 +02:00
|
|
|
// We should not eliminate the loop header or latch either, because
|
|
|
|
// eliminating a loop header or latch might later prevent LoopSimplify
|
|
|
|
// from transforming nested loops into simplified form. We will rely on
|
|
|
|
// later passes in backend to clean up empty blocks.
|
2010-12-13 03:38:13 +01:00
|
|
|
if (BI && BI->isUnconditional() &&
|
|
|
|
BB != &BB->getParent()->getEntryBlock() &&
|
2009-11-10 22:40:01 +01:00
|
|
|
// If the terminator is the only non-phi instruction, try to nuke it.
|
[SimplifyCFG] Defer folding unconditional branches to LateSimplifyCFG if it can destroy canonical loop structure.
Summary:
When simplifying unconditional branches from empty blocks, we pre-test if the
BB belongs to a set of loop headers and keep the block to prevent passes from
destroying canonical loop structure. However, the current algorithm fails if
the destination of the branch is a loop header. Especially when such a loop's
latch block is folded into loop header it results in additional backedges and
LoopSimplify turns it into a nested loop which prevent later optimizations
from being applied (e.g., loop unrolling and loop interleaving).
This patch augments the existing algorithm by further checking if the
destination of the branch belongs to a set of loop headers and defer
eliminating it if yes to LateSimplifyCFG.
Fixes PR33605: https://bugs.llvm.org/show_bug.cgi?id=33605
Reviewers: efriedma, mcrosier, pacxx, hsung, davidxl
Reviewed By: efriedma
Subscribers: ashutosh.nema, gberry, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D35411
llvm-svn: 308422
2017-07-19 10:53:34 +02:00
|
|
|
BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) &&
|
|
|
|
!LoopHeaders.count(BI->getSuccessor(0))) {
|
2010-12-13 03:38:13 +01:00
|
|
|
// FIXME: It is always conservatively correct to drop the info
|
|
|
|
// for a block even if it doesn't get erased. This isn't totally
|
|
|
|
// awesome, but it allows us to use AssertingVH to prevent nasty
|
|
|
|
// dangling pointer issues within LazyValueInfo.
|
2016-06-16 18:25:53 +02:00
|
|
|
LVI->eraseBlock(BB);
|
2017-02-01 20:06:55 +01:00
|
|
|
if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
|
2010-12-13 03:38:13 +01:00
|
|
|
Changed = true;
|
2008-12-03 08:48:08 +01:00
|
|
|
}
|
|
|
|
}
|
2008-04-21 00:39:42 +02:00
|
|
|
EverChanged |= Changed;
|
2010-01-07 14:50:07 +01:00
|
|
|
} while (Changed);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-05-04 04:28:08 +02:00
|
|
|
LoopHeaders.clear();
|
2008-04-21 00:39:42 +02:00
|
|
|
return EverChanged;
|
2008-04-20 22:35:01 +02:00
|
|
|
}
|
2008-04-20 23:13:06 +02:00
|
|
|
|
2017-05-23 15:36:25 +02:00
|
|
|
// Replace uses of Cond with ToVal when safe to do so. If all uses are
|
|
|
|
// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
|
|
|
|
// because we may incorrectly replace uses when guards/assumes are uses of
|
|
|
|
// of `Cond` and we used the guards/assume to reason about the `Cond` value
|
|
|
|
// at the end of block. RAUW unconditionally replaces all uses
|
|
|
|
// including the guards/assumes themselves and the uses before the
|
|
|
|
// guard/assume.
|
|
|
|
static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
|
|
|
|
assert(Cond->getType() == ToVal->getType());
|
|
|
|
auto *BB = Cond->getParent();
|
|
|
|
// We can unconditionally replace all uses in non-local blocks (i.e. uses
|
|
|
|
// strictly dominated by BB), since LVI information is true from the
|
|
|
|
// terminator of BB.
|
|
|
|
replaceNonLocalUsesWith(Cond, ToVal);
|
|
|
|
for (Instruction &I : reverse(*BB)) {
|
|
|
|
// Reached the Cond whose uses we are trying to replace, so there are no
|
|
|
|
// more uses.
|
|
|
|
if (&I == Cond)
|
|
|
|
break;
|
|
|
|
// We only replace uses in instructions that are guaranteed to reach the end
|
|
|
|
// of BB, where we know Cond is ToVal.
|
|
|
|
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
|
|
|
|
break;
|
|
|
|
I.replaceUsesOfWith(Cond, ToVal);
|
|
|
|
}
|
|
|
|
if (Cond->use_empty() && !Cond->mayHaveSideEffects())
|
|
|
|
Cond->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2017-02-17 05:21:14 +01:00
|
|
|
/// Return the cost of duplicating a piece of this block from first non-phi
|
|
|
|
/// and before StopAt instruction to thread across it. Stop scanning the block
|
|
|
|
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
|
|
|
|
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
|
|
|
|
Instruction *StopAt,
|
2012-12-03 18:34:44 +01:00
|
|
|
unsigned Threshold) {
|
2017-02-17 05:21:14 +01:00
|
|
|
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
|
2009-10-11 09:24:57 +02:00
|
|
|
/// Ignore PHI nodes, these will be flattened when duplication happens.
|
2015-10-13 20:26:00 +02:00
|
|
|
BasicBlock::const_iterator I(BB->getFirstNonPHI());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-11 01:21:58 +01:00
|
|
|
// FIXME: THREADING will delete values that are just used to compute the
|
|
|
|
// branch, so they shouldn't count against the duplication cost.
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-12-29 19:10:16 +01:00
|
|
|
unsigned Bonus = 0;
|
2017-02-17 05:21:14 +01:00
|
|
|
if (BB->getTerminator() == StopAt) {
|
|
|
|
// Threading through a switch statement is particularly profitable. If this
|
|
|
|
// block ends in a switch, decrease its cost to make it more likely to
|
|
|
|
// happen.
|
|
|
|
if (isa<SwitchInst>(StopAt))
|
|
|
|
Bonus = 6;
|
|
|
|
|
|
|
|
// The same holds for indirect branches, but slightly more so.
|
|
|
|
if (isa<IndirectBrInst>(StopAt))
|
|
|
|
Bonus = 8;
|
|
|
|
}
|
2015-12-29 19:10:16 +01:00
|
|
|
|
|
|
|
// Bump the threshold up so the early exit from the loop doesn't skip the
|
|
|
|
// terminator-based Size adjustment at the end.
|
|
|
|
Threshold += Bonus;
|
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Sum up the cost of each instruction until we get to the terminator. Don't
|
|
|
|
// include the terminator because the copy won't include it.
|
|
|
|
unsigned Size = 0;
|
2017-02-17 05:21:14 +01:00
|
|
|
for (; &*I != StopAt; ++I) {
|
2012-12-03 18:34:44 +01:00
|
|
|
|
|
|
|
// Stop scanning the block if we've reached the threshold.
|
|
|
|
if (Size > Threshold)
|
|
|
|
return Size;
|
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Debugger intrinsics don't incur code size.
|
|
|
|
if (isa<DbgInfoIntrinsic>(I)) continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// If this is a pointer->pointer bitcast, it is free.
|
2010-02-16 12:11:14 +01:00
|
|
|
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
|
2009-10-11 09:24:57 +02:00
|
|
|
continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-08-14 07:09:07 +02:00
|
|
|
// Bail out if this instruction gives back a token type, it is not possible
|
2015-09-16 15:27:30 +02:00
|
|
|
// to duplicate it if it is used outside this BB.
|
2015-08-14 07:09:07 +02:00
|
|
|
if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
|
|
|
|
return ~0U;
|
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// All other instructions count for at least one unit.
|
|
|
|
++Size;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Calls are more expensive. If they are non-intrinsic calls, we model them
|
|
|
|
// as having cost of 4. If they are a non-vector intrinsic, we model them
|
|
|
|
// as having cost of 2 total, and if they are a vector intrinsic, we model
|
|
|
|
// them as having cost 1.
|
|
|
|
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
|
2015-08-31 08:10:27 +02:00
|
|
|
if (CI->cannotDuplicate() || CI->isConvergent())
|
2012-12-20 17:04:27 +01:00
|
|
|
// Blocks with NoDuplicate are modelled as having infinite cost, so they
|
|
|
|
// are never duplicated.
|
|
|
|
return ~0U;
|
|
|
|
else if (!isa<IntrinsicInst>(CI))
|
2009-10-11 09:24:57 +02:00
|
|
|
Size += 3;
|
2010-02-16 12:11:14 +01:00
|
|
|
else if (!CI->getType()->isVectorTy())
|
2009-10-11 09:24:57 +02:00
|
|
|
Size += 1;
|
|
|
|
}
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-12-29 19:10:16 +01:00
|
|
|
return Size > Bonus ? Size - Bonus : 0;
|
2009-10-11 09:24:57 +02:00
|
|
|
}
|
|
|
|
|
2009-05-04 18:29:24 +02:00
|
|
|
/// FindLoopHeaders - We do not want jump threading to turn proper loop
|
2009-05-04 04:28:08 +02:00
|
|
|
/// structures into irreducible loops. Doing this breaks up the loop nesting
|
|
|
|
/// hierarchy and pessimizes later transformations. To prevent this from
|
|
|
|
/// happening, we first have to find the loop headers. Here we approximate this
|
|
|
|
/// by finding targets of backedges in the CFG.
|
|
|
|
///
|
|
|
|
/// Note that there definitely are cases when we want to allow threading of
|
|
|
|
/// edges across a loop header. For example, threading a jump from outside the
|
|
|
|
/// loop (the preheader) to an exit block of the loop is definitely profitable.
|
|
|
|
/// It is also almost always profitable to thread backedges from within the loop
|
|
|
|
/// to exit blocks, and is often profitable to thread backedges to other blocks
|
|
|
|
/// within the loop (forming a nested loop). This simple analysis is not rich
|
|
|
|
/// enough to track all of these properties and keep it up-to-date as the CFG
|
|
|
|
/// mutates, so we don't allow any of these transformations.
|
|
|
|
///
|
2016-06-14 02:51:09 +02:00
|
|
|
void JumpThreadingPass::FindLoopHeaders(Function &F) {
|
2009-05-04 04:28:08 +02:00
|
|
|
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
|
|
|
|
FindFunctionBackedges(F, Edges);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &Edge : Edges)
|
|
|
|
LoopHeaders.insert(Edge.second);
|
2009-05-04 04:28:08 +02:00
|
|
|
}
|
|
|
|
|
2010-12-05 20:06:41 +01:00
|
|
|
/// getKnownConstant - Helper method to determine if we can thread over a
|
|
|
|
/// terminator with the given value as its condition, and if so what value to
|
2010-12-07 00:36:56 +01:00
|
|
|
/// use for that. What kind of value this is depends on whether we want an
|
|
|
|
/// integer or a block address, but an undef is always accepted.
|
2010-12-05 20:06:41 +01:00
|
|
|
/// Returns null if Val is null or not an appropriate constant.
|
2010-12-07 00:36:56 +01:00
|
|
|
static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
|
2010-12-05 20:06:41 +01:00
|
|
|
if (!Val)
|
2014-04-25 07:29:35 +02:00
|
|
|
return nullptr;
|
2010-12-05 20:06:41 +01:00
|
|
|
|
|
|
|
// Undef is "known" enough.
|
|
|
|
if (UndefValue *U = dyn_cast<UndefValue>(Val))
|
|
|
|
return U;
|
|
|
|
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Preference == WantBlockAddress)
|
|
|
|
return dyn_cast<BlockAddress>(Val->stripPointerCasts());
|
2010-12-05 20:06:41 +01:00
|
|
|
|
2010-12-07 00:36:56 +01:00
|
|
|
return dyn_cast<ConstantInt>(Val);
|
2010-08-31 22:26:04 +02:00
|
|
|
}
|
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
|
2010-12-07 00:36:56 +01:00
|
|
|
/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
|
|
|
|
/// in any of our predecessors. If so, return the known list of value and pred
|
|
|
|
/// BB in the result vector.
|
2009-11-07 09:05:03 +01:00
|
|
|
///
|
|
|
|
/// This returns true if there were any known values.
|
|
|
|
///
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::ComputeValueKnownInPredecessors(
|
|
|
|
Value *V, BasicBlock *BB, PredValueInfo &Result,
|
|
|
|
ConstantPreference Preference, Instruction *CxtI) {
|
2010-08-31 21:24:27 +02:00
|
|
|
// This method walks up use-def chains recursively. Because of this, we could
|
|
|
|
// get into an infinite loop going around loops in the use-def chain. To
|
|
|
|
// prevent this, keep track of what (value, block) pairs we've already visited
|
|
|
|
// and terminate the search if we loop back to them
|
2010-08-31 09:36:34 +02:00
|
|
|
if (!RecursionSet.insert(std::make_pair(V, BB)).second)
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-08-31 21:24:27 +02:00
|
|
|
// An RAII help to remove this pair from the recursion set once the recursion
|
|
|
|
// stack pops back out again.
|
|
|
|
RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-12-05 20:06:41 +01:00
|
|
|
// If V is a constant, then it is known in all predecessors.
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Constant *KC = getKnownConstant(V, Preference)) {
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *Pred : predecessors(BB))
|
|
|
|
Result.push_back(std::make_pair(KC, Pred));
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-02-08 18:00:39 +01:00
|
|
|
return !Result.empty();
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// If V is a non-instruction value, or an instruction in a different block,
|
|
|
|
// then it can't be derived from a PHI.
|
2016-03-16 00:38:47 +01:00
|
|
|
Instruction *I = dyn_cast<Instruction>(V);
|
2014-04-25 07:29:35 +02:00
|
|
|
if (!I || I->getParent() != BB) {
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-11 03:08:33 +01:00
|
|
|
// Okay, if this is a live-in value, see if it has a known value at the end
|
|
|
|
// of any of our predecessors.
|
|
|
|
//
|
|
|
|
// FIXME: This should be an edge property, not a block end property.
|
|
|
|
/// TODO: Per PR2563, we could infer value range information about a
|
|
|
|
/// predecessor based on its terminator.
|
|
|
|
//
|
2010-09-14 22:57:41 +02:00
|
|
|
// FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
|
|
|
|
// "I" is a non-local compare-with-a-constant instruction. This would be
|
|
|
|
// able to handle value inequalities better, for example if the compare is
|
|
|
|
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
|
|
|
|
// Perhaps getConstantOnEdge should be smart enough to do this?
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *P : predecessors(BB)) {
|
2010-09-14 22:57:41 +02:00
|
|
|
// If the value is known by LazyValueInfo to be a constant in a
|
|
|
|
// predecessor, use that information to try to thread this block.
|
2014-09-07 22:29:59 +02:00
|
|
|
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Constant *KC = getKnownConstant(PredCst, Preference))
|
2010-12-05 20:06:41 +01:00
|
|
|
Result.push_back(std::make_pair(KC, P));
|
2009-11-11 03:08:33 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-09-14 22:57:41 +02:00
|
|
|
return !Result.empty();
|
2009-11-11 03:08:33 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
/// If I is a PHI node, then we know the incoming values for any constants.
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(I)) {
|
|
|
|
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
|
|
|
Value *InVal = PN->getIncomingValue(i);
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Constant *KC = getKnownConstant(InVal, Preference)) {
|
2010-12-05 20:06:41 +01:00
|
|
|
Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
|
2010-09-14 22:57:41 +02:00
|
|
|
} else {
|
2010-08-26 19:40:24 +02:00
|
|
|
Constant *CI = LVI->getConstantOnEdge(InVal,
|
2014-09-07 22:29:59 +02:00
|
|
|
PN->getIncomingBlock(i),
|
|
|
|
BB, CxtI);
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Constant *KC = getKnownConstant(CI, Preference))
|
|
|
|
Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
return !Result.empty();
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-03-16 05:52:52 +01:00
|
|
|
// Handle Cast instructions. Only see through Cast when the source operand is
|
|
|
|
// PHI or Cmp and the source type is i1 to save the compilation time.
|
|
|
|
if (CastInst *CI = dyn_cast<CastInst>(I)) {
|
|
|
|
Value *Source = CI->getOperand(0);
|
|
|
|
if (!Source->getType()->isIntegerTy(1))
|
|
|
|
return false;
|
|
|
|
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
|
|
|
|
return false;
|
|
|
|
ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
|
|
|
|
if (Result.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Convert the known values.
|
|
|
|
for (auto &R : Result)
|
|
|
|
R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2010-12-05 20:06:41 +01:00
|
|
|
PredValueInfoTy LHSVals, RHSVals;
|
2009-11-07 09:05:03 +01:00
|
|
|
|
|
|
|
// Handle some boolean conditions.
|
2010-12-05 20:02:47 +01:00
|
|
|
if (I->getType()->getPrimitiveSizeInBits() == 1) {
|
2010-12-07 00:36:56 +01:00
|
|
|
assert(Preference == WantInteger && "One-bit non-integer type?");
|
2009-11-07 09:05:03 +01:00
|
|
|
// X | true -> true
|
|
|
|
// X & false -> false
|
|
|
|
if (I->getOpcode() == Instruction::Or ||
|
|
|
|
I->getOpcode() == Instruction::And) {
|
2010-12-07 00:36:56 +01:00
|
|
|
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
|
2016-03-16 00:38:47 +01:00
|
|
|
WantInteger, CxtI);
|
2010-12-07 00:36:56 +01:00
|
|
|
ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
|
2016-03-16 00:38:47 +01:00
|
|
|
WantInteger, CxtI);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-08-31 21:24:27 +02:00
|
|
|
if (LHSVals.empty() && RHSVals.empty())
|
2009-11-07 09:05:03 +01:00
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
ConstantInt *InterestingVal;
|
|
|
|
if (I->getOpcode() == Instruction::Or)
|
|
|
|
InterestingVal = ConstantInt::getTrue(I->getContext());
|
|
|
|
else
|
|
|
|
InterestingVal = ConstantInt::getFalse(I->getContext());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-08-18 05:14:36 +02:00
|
|
|
SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-02-11 05:40:44 +01:00
|
|
|
// Scan for the sentinel. If we find an undef, force it to the
|
|
|
|
// interesting value: x|undef -> true and x&undef -> false.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &LHSVal : LHSVals)
|
|
|
|
if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
|
|
|
|
Result.emplace_back(InterestingVal, LHSVal.second);
|
|
|
|
LHSKnownBBs.insert(LHSVal.second);
|
2010-02-11 05:40:44 +01:00
|
|
|
}
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &RHSVal : RHSVals)
|
|
|
|
if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
|
2010-07-12 02:47:34 +02:00
|
|
|
// If we already inferred a value for this block on the LHS, don't
|
|
|
|
// re-add it.
|
2016-01-09 19:43:01 +01:00
|
|
|
if (!LHSKnownBBs.count(RHSVal.second))
|
|
|
|
Result.emplace_back(InterestingVal, RHSVal.second);
|
2010-02-11 05:40:44 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
return !Result.empty();
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-10 23:39:16 +01:00
|
|
|
// Handle the NOT form of XOR.
|
|
|
|
if (I->getOpcode() == Instruction::Xor &&
|
|
|
|
isa<ConstantInt>(I->getOperand(1)) &&
|
|
|
|
cast<ConstantInt>(I->getOperand(1))->isOne()) {
|
2016-03-16 00:38:47 +01:00
|
|
|
ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
|
|
|
|
WantInteger, CxtI);
|
2010-08-31 21:24:27 +02:00
|
|
|
if (Result.empty())
|
2009-11-10 23:39:16 +01:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Invert the known values.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (auto &R : Result)
|
|
|
|
R.first = ConstantExpr::getNot(R.first);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-10 23:39:16 +01:00
|
|
|
return true;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-08-26 19:40:24 +02:00
|
|
|
// Try to simplify some other binary operator values.
|
|
|
|
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
|
2010-12-07 00:36:56 +01:00
|
|
|
assert(Preference != WantBlockAddress
|
|
|
|
&& "A binary operator creating a block address?");
|
2010-08-31 22:26:04 +02:00
|
|
|
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
|
2010-12-05 20:06:41 +01:00
|
|
|
PredValueInfoTy LHSVals;
|
2010-12-07 00:36:56 +01:00
|
|
|
ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
|
2016-03-16 00:38:47 +01:00
|
|
|
WantInteger, CxtI);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-08-31 09:36:34 +02:00
|
|
|
// Try to use constant folding to simplify the binary operator.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &LHSVal : LHSVals) {
|
|
|
|
Constant *V = LHSVal.first;
|
2010-08-31 22:26:04 +02:00
|
|
|
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Constant *KC = getKnownConstant(Folded, WantInteger))
|
2016-01-09 19:43:01 +01:00
|
|
|
Result.push_back(std::make_pair(KC, LHSVal.second));
|
2010-08-31 09:36:34 +02:00
|
|
|
}
|
2010-08-26 19:40:24 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-08-31 09:36:34 +02:00
|
|
|
return !Result.empty();
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Handle compare with phi operand, where the PHI is defined in this block.
|
|
|
|
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
|
2010-12-07 00:36:56 +01:00
|
|
|
assert(Preference == WantInteger && "Compares only produce integers");
|
2017-06-23 07:41:32 +02:00
|
|
|
Type *CmpType = Cmp->getType();
|
|
|
|
Value *CmpLHS = Cmp->getOperand(0);
|
|
|
|
Value *CmpRHS = Cmp->getOperand(1);
|
|
|
|
CmpInst::Predicate Pred = Cmp->getPredicate();
|
|
|
|
|
|
|
|
PHINode *PN = dyn_cast<PHINode>(CmpLHS);
|
2009-11-07 09:05:03 +01:00
|
|
|
if (PN && PN->getParent() == BB) {
|
2015-03-10 03:37:25 +01:00
|
|
|
const DataLayout &DL = PN->getModule()->getDataLayout();
|
2009-11-07 09:05:03 +01:00
|
|
|
// We can do this simplification if any comparisons fold to true or false.
|
|
|
|
// See if any do.
|
|
|
|
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
|
|
|
BasicBlock *PredBB = PN->getIncomingBlock(i);
|
|
|
|
Value *LHS = PN->getIncomingValue(i);
|
2017-06-23 07:41:32 +02:00
|
|
|
Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-06-23 07:41:32 +02:00
|
|
|
Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
|
2014-04-25 07:29:35 +02:00
|
|
|
if (!Res) {
|
2010-09-14 22:57:41 +02:00
|
|
|
if (!isa<Constant>(RHS))
|
2009-11-12 06:24:05 +01:00
|
|
|
continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
|
|
|
LazyValueInfo::Tristate
|
2017-06-23 07:41:32 +02:00
|
|
|
ResT = LVI->getPredicateOnEdge(Pred, LHS,
|
2014-09-07 22:29:59 +02:00
|
|
|
cast<Constant>(RHS), PredBB, BB,
|
|
|
|
CxtI ? CxtI : Cmp);
|
2009-11-12 06:24:05 +01:00
|
|
|
if (ResT == LazyValueInfo::Unknown)
|
|
|
|
continue;
|
|
|
|
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Constant *KC = getKnownConstant(Res, WantInteger))
|
|
|
|
Result.push_back(std::make_pair(KC, PredBB));
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
return !Result.empty();
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-11 23:31:38 +01:00
|
|
|
// If comparing a live-in value against a constant, see if we know the
|
|
|
|
// live-in value on any predecessors.
|
2017-06-23 07:41:32 +02:00
|
|
|
if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
|
|
|
|
Constant *CmpConst = cast<Constant>(CmpRHS);
|
2017-05-04 23:45:45 +02:00
|
|
|
|
2017-06-23 07:41:32 +02:00
|
|
|
if (!isa<Instruction>(CmpLHS) ||
|
|
|
|
cast<Instruction>(CmpLHS)->getParent() != BB) {
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *P : predecessors(BB)) {
|
2010-08-26 19:40:24 +02:00
|
|
|
// If the value is known by LazyValueInfo to be a constant in a
|
|
|
|
// predecessor, use that information to try to thread this block.
|
|
|
|
LazyValueInfo::Tristate Res =
|
2017-06-23 07:41:32 +02:00
|
|
|
LVI->getPredicateOnEdge(Pred, CmpLHS,
|
2017-05-04 23:45:45 +02:00
|
|
|
CmpConst, P, BB, CxtI ? CxtI : Cmp);
|
2010-08-26 19:40:24 +02:00
|
|
|
if (Res == LazyValueInfo::Unknown)
|
|
|
|
continue;
|
2009-11-12 05:37:50 +01:00
|
|
|
|
2017-06-23 07:41:32 +02:00
|
|
|
Constant *ResC = ConstantInt::get(CmpType, Res);
|
2010-12-05 20:06:41 +01:00
|
|
|
Result.push_back(std::make_pair(ResC, P));
|
2010-08-26 19:40:24 +02:00
|
|
|
}
|
2010-07-12 16:10:24 +02:00
|
|
|
|
2010-08-26 19:40:24 +02:00
|
|
|
return !Result.empty();
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-06-23 07:41:35 +02:00
|
|
|
// InstCombine can fold some forms of constant range checks into
|
|
|
|
// (icmp (add (x, C1)), C2). See if we have we have such a thing with
|
|
|
|
// x as a live-in.
|
|
|
|
{
|
|
|
|
using namespace PatternMatch;
|
|
|
|
Value *AddLHS;
|
|
|
|
ConstantInt *AddConst;
|
|
|
|
if (isa<ConstantInt>(CmpConst) &&
|
|
|
|
match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
|
|
|
|
if (!isa<Instruction>(AddLHS) ||
|
|
|
|
cast<Instruction>(AddLHS)->getParent() != BB) {
|
|
|
|
for (BasicBlock *P : predecessors(BB)) {
|
|
|
|
// If the value is known by LazyValueInfo to be a ConstantRange in
|
|
|
|
// a predecessor, use that information to try to thread this
|
|
|
|
// block.
|
|
|
|
ConstantRange CR = LVI->getConstantRangeOnEdge(
|
|
|
|
AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
|
|
|
|
// Propagate the range through the addition.
|
|
|
|
CR = CR.add(AddConst->getValue());
|
|
|
|
|
|
|
|
// Get the range where the compare returns true.
|
|
|
|
ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(
|
|
|
|
Pred, cast<ConstantInt>(CmpConst)->getValue());
|
|
|
|
|
|
|
|
Constant *ResC;
|
|
|
|
if (CmpRange.contains(CR))
|
|
|
|
ResC = ConstantInt::getTrue(CmpType);
|
|
|
|
else if (CmpRange.inverse().contains(CR))
|
|
|
|
ResC = ConstantInt::getFalse(CmpType);
|
|
|
|
else
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Result.push_back(std::make_pair(ResC, P));
|
|
|
|
}
|
|
|
|
|
|
|
|
return !Result.empty();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-31 09:36:34 +02:00
|
|
|
// Try to find a constant value for the LHS of a comparison,
|
2010-08-26 19:40:24 +02:00
|
|
|
// and evaluate it statically if we can.
|
2017-05-04 23:45:45 +02:00
|
|
|
PredValueInfoTy LHSVals;
|
|
|
|
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
|
|
|
|
WantInteger, CxtI);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-05-04 23:45:45 +02:00
|
|
|
for (const auto &LHSVal : LHSVals) {
|
|
|
|
Constant *V = LHSVal.first;
|
2017-06-23 07:41:32 +02:00
|
|
|
Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
|
2017-05-04 23:45:45 +02:00
|
|
|
if (Constant *KC = getKnownConstant(Folded, WantInteger))
|
|
|
|
Result.push_back(std::make_pair(KC, LHSVal.second));
|
2010-08-26 19:40:24 +02:00
|
|
|
}
|
2017-05-04 23:45:45 +02:00
|
|
|
|
|
|
|
return !Result.empty();
|
2009-11-11 23:31:38 +01:00
|
|
|
}
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-12-15 10:51:20 +01:00
|
|
|
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
|
|
|
|
// Handle select instructions where at least one operand is a known constant
|
|
|
|
// and we can figure out the condition value for any predecessor block.
|
|
|
|
Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
|
|
|
|
Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
|
|
|
|
PredValueInfoTy Conds;
|
|
|
|
if ((TrueVal || FalseVal) &&
|
|
|
|
ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
|
2016-03-16 00:38:47 +01:00
|
|
|
WantInteger, CxtI)) {
|
2016-01-09 19:43:01 +01:00
|
|
|
for (auto &C : Conds) {
|
|
|
|
Constant *Cond = C.first;
|
2010-12-15 10:51:20 +01:00
|
|
|
|
|
|
|
// Figure out what value to use for the condition.
|
|
|
|
bool KnownCond;
|
|
|
|
if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
|
|
|
|
// A known boolean.
|
|
|
|
KnownCond = CI->isOne();
|
|
|
|
} else {
|
|
|
|
assert(isa<UndefValue>(Cond) && "Unexpected condition value");
|
|
|
|
// Either operand will do, so be sure to pick the one that's a known
|
|
|
|
// constant.
|
|
|
|
// FIXME: Do this more cleverly if both values are known constants?
|
2014-04-25 07:29:35 +02:00
|
|
|
KnownCond = (TrueVal != nullptr);
|
2010-12-15 10:51:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// See if the select has a known constant value for this predecessor.
|
|
|
|
if (Constant *Val = KnownCond ? TrueVal : FalseVal)
|
2016-01-09 19:43:01 +01:00
|
|
|
Result.push_back(std::make_pair(Val, C.second));
|
2010-12-15 10:51:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return !Result.empty();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-09-14 22:57:41 +02:00
|
|
|
// If all else fails, see if LVI can figure out a constant value for us.
|
2014-09-07 22:29:59 +02:00
|
|
|
Constant *CI = LVI->getConstant(V, BB, CxtI);
|
2010-12-07 00:36:56 +01:00
|
|
|
if (Constant *KC = getKnownConstant(CI, Preference)) {
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *Pred : predecessors(BB))
|
|
|
|
Result.push_back(std::make_pair(KC, Pred));
|
2010-08-26 19:40:24 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-09-14 22:57:41 +02:00
|
|
|
return !Result.empty();
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
Teach jump threading to thread through blocks like:
br (and X, phi(Y, Z, false)), label L1, label L2
This triggers once on 252.eon and 6 times on 176.gcc. Blocks
in question often look like this:
bb262: ; preds = %bb261, %bb248
%iftmp.251.0 = phi i1 [ true, %bb261 ], [ false, %bb248 ] ; <i1> [#uses=4]
%tmp270 = icmp eq %struct.rtx_def* %tmp.0.i, null ; <i1> [#uses=1]
%bothcond = or i1 %iftmp.251.0, %tmp270 ; <i1> [#uses=1]
br i1 %bothcond, label %bb288, label %bb273
In this case, it is clear that it doesn't matter if tmp.0.i is null when coming from bb261. When coming from bb248, it is all that matters.
Another random example:
check_asm_operands.exit: ; preds = %check_asm_operands.exit.thr_comm, %bb30.i, %bb12.i, %bb6.i413
%tmp.0.i420 = phi i1 [ true, %bb6.i413 ], [ true, %bb12.i ], [ true, %bb30.i ], [ false, %check_asm_operands.exit.thr_comm ; <i1> [#uses=1]
call void @llvm.stackrestore( i8* %savedstack ) nounwind
%tmp4389 = icmp eq i32 %added_sets_1.0, 0 ; <i1> [#uses=1]
%tmp4394 = icmp eq i32 %added_sets_2.0, 0 ; <i1> [#uses=1]
%bothcond80 = and i1 %tmp4389, %tmp4394 ; <i1> [#uses=1]
%bothcond81 = and i1 %bothcond80, %tmp.0.i420 ; <i1> [#uses=1]
br i1 %bothcond81, label %bb4398, label %bb4397
Here is the case from 252.eon:
bb290.i.i: ; preds = %bb23.i57.i.i, %bb8.i39.i.i, %bb100.i.i, %bb100.i.i, %bb85.i.i110
%myEOF.1.i.i = phi i1 [ true, %bb100.i.i ], [ true, %bb100.i.i ], [ true, %bb85.i.i110 ], [ true, %bb8.i39.i.i ], [ false, %bb23.i57.i.i ] ; <i1> [#uses=2]
%i.4.i.i = phi i32 [ %i.1.i.i, %bb85.i.i110 ], [ %i.0.i.i, %bb100.i.i ], [ %i.0.i.i, %bb100.i.i ], [ %i.3.i.i, %bb8.i39.i.i ], [ %i.3.i.i, %bb23.i57.i.i ] ; <i32> [#uses=3]
%tmp292.i.i = load i8* %tmp16.i.i100, align 1 ; <i8> [#uses=1]
%tmp293.not.i.i = icmp ne i8 %tmp292.i.i, 0 ; <i1> [#uses=1]
%bothcond.i.i = and i1 %tmp293.not.i.i, %myEOF.1.i.i ; <i1> [#uses=1]
br i1 %bothcond.i.i, label %bb202.i.i, label %bb301.i.i
Factoring out 3 common predecessors.
On the path from any blocks other than bb23.i57.i.i, the load and compare
are dead.
llvm-svn: 50096
2008-04-22 09:05:46 +02:00
|
|
|
|
2009-10-11 06:18:15 +02:00
|
|
|
/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
|
|
|
|
/// in an undefined jump, decide which block is best to revector to.
|
|
|
|
///
|
|
|
|
/// Since we can pick an arbitrary destination, we pick the successor with the
|
|
|
|
/// fewest predecessors. This should reduce the in-degree of the others.
|
|
|
|
///
|
|
|
|
static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
|
|
|
|
TerminatorInst *BBTerm = BB->getTerminator();
|
|
|
|
unsigned MinSucc = 0;
|
|
|
|
BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
|
|
|
|
// Compute the successor with the minimum number of predecessors.
|
|
|
|
unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
|
|
|
|
for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
|
|
|
|
TestBB = BBTerm->getSuccessor(i);
|
|
|
|
unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
|
2011-06-27 23:51:12 +02:00
|
|
|
if (NumPreds < MinNumPreds) {
|
2009-10-11 06:18:15 +02:00
|
|
|
MinSucc = i;
|
2011-06-27 23:51:12 +02:00
|
|
|
MinNumPreds = NumPreds;
|
|
|
|
}
|
2009-10-11 06:18:15 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 06:18:15 +02:00
|
|
|
return MinSucc;
|
|
|
|
}
|
|
|
|
|
2011-02-18 05:43:06 +01:00
|
|
|
static bool hasAddressTakenAndUsed(BasicBlock *BB) {
|
|
|
|
if (!BB->hasAddressTaken()) return false;
|
2011-04-14 23:35:50 +02:00
|
|
|
|
2011-02-18 05:43:06 +01:00
|
|
|
// If the block has its address taken, it may be a tree of dead constants
|
|
|
|
// hanging off of it. These shouldn't keep the block alive.
|
|
|
|
BlockAddress *BA = BlockAddress::get(BB);
|
|
|
|
BA->removeDeadConstantUsers();
|
|
|
|
return !BA->use_empty();
|
|
|
|
}
|
|
|
|
|
2008-11-27 08:20:04 +01:00
|
|
|
/// ProcessBlock - If there are any predecessors whose control can be threaded
|
2008-04-20 23:13:06 +02:00
|
|
|
/// through to a successor, transform them now.
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
|
2010-01-23 19:56:07 +01:00
|
|
|
// If the block is trivially dead, just return and let the caller nuke it.
|
|
|
|
// This simplifies other transformations.
|
2015-01-13 04:46:47 +01:00
|
|
|
if (pred_empty(BB) &&
|
2010-01-23 19:56:07 +01:00
|
|
|
BB != &BB->getParent()->getEntryBlock())
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// If this block has a single predecessor, and if that pred has a single
|
|
|
|
// successor, merge the blocks. This encourages recursive jump threading
|
|
|
|
// because now the condition in this block can be threaded through
|
|
|
|
// predecessors of our predecessor block.
|
2009-11-07 09:05:03 +01:00
|
|
|
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
|
2015-07-31 19:58:14 +02:00
|
|
|
const TerminatorInst *TI = SinglePred->getTerminator();
|
|
|
|
if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
|
2011-02-18 05:43:06 +01:00
|
|
|
SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
|
2009-05-04 04:28:08 +02:00
|
|
|
// If SinglePred was a loop header, BB becomes one.
|
|
|
|
if (LoopHeaders.erase(SinglePred))
|
|
|
|
LoopHeaders.insert(BB);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-06-16 18:25:53 +02:00
|
|
|
LVI->eraseBlock(SinglePred);
|
2008-11-27 06:07:53 +01:00
|
|
|
MergeBasicBlockIntoOnlyPred(BB);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-06-19 17:23:33 +02:00
|
|
|
// Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
|
|
|
|
// BB code within one basic block `BB`), we need to invalidate the LVI
|
|
|
|
// information associated with BB, because the LVI information need not be
|
|
|
|
// true for all of BB after the merge. For example,
|
|
|
|
// Before the merge, LVI info and code is as follows:
|
|
|
|
// SinglePred: <LVI info1 for %p val>
|
|
|
|
// %y = use of %p
|
|
|
|
// call @exit() // need not transfer execution to successor.
|
|
|
|
// assume(%p) // from this point on %p is true
|
|
|
|
// br label %BB
|
|
|
|
// BB: <LVI info2 for %p val, i.e. %p is true>
|
|
|
|
// %x = use of %p
|
|
|
|
// br label exit
|
|
|
|
//
|
|
|
|
// Note that this LVI info for blocks BB and SinglPred is correct for %p
|
|
|
|
// (info2 and info1 respectively). After the merge and the deletion of the
|
|
|
|
// LVI info1 for SinglePred. We have the following code:
|
|
|
|
// BB: <LVI info2 for %p val>
|
|
|
|
// %y = use of %p
|
|
|
|
// call @exit()
|
|
|
|
// assume(%p)
|
|
|
|
// %x = use of %p <-- LVI info2 is correct from here onwards.
|
|
|
|
// br label exit
|
|
|
|
// LVI info2 for BB is incorrect at the beginning of BB.
|
|
|
|
|
|
|
|
// Invalidate LVI information for BB if the LVI is not provably true for
|
|
|
|
// all of BB.
|
|
|
|
if (any_of(*BB, [](Instruction &I) {
|
|
|
|
return !isGuaranteedToTransferExecutionToSuccessor(&I);
|
|
|
|
}))
|
|
|
|
LVI->eraseBlock(BB);
|
2008-11-27 06:07:53 +01:00
|
|
|
return true;
|
|
|
|
}
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
|
|
|
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
if (TryToUnfoldSelectInCurrBB(BB))
|
|
|
|
return true;
|
|
|
|
|
2017-02-17 05:21:14 +01:00
|
|
|
// Look if we can propagate guards to predecessors.
|
|
|
|
if (HasGuards && ProcessGuards(BB))
|
|
|
|
return true;
|
|
|
|
|
2010-12-07 00:36:56 +01:00
|
|
|
// What kind of constant we're looking for.
|
|
|
|
ConstantPreference Preference = WantInteger;
|
|
|
|
|
|
|
|
// Look to see if the terminator is a conditional branch, switch or indirect
|
|
|
|
// branch, if not we can't thread it.
|
2008-04-20 23:13:06 +02:00
|
|
|
Value *Condition;
|
2010-12-07 00:36:56 +01:00
|
|
|
Instruction *Terminator = BB->getTerminator();
|
|
|
|
if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
|
2008-04-21 00:39:42 +02:00
|
|
|
// Can't thread an unconditional jump.
|
|
|
|
if (BI->isUnconditional()) return false;
|
2008-04-20 23:13:06 +02:00
|
|
|
Condition = BI->getCondition();
|
2010-12-07 00:36:56 +01:00
|
|
|
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
|
2008-04-20 23:13:06 +02:00
|
|
|
Condition = SI->getCondition();
|
2010-12-07 00:36:56 +01:00
|
|
|
} else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
|
2012-07-20 12:36:17 +02:00
|
|
|
// Can't thread indirect branch with no successors.
|
|
|
|
if (IB->getNumSuccessors() == 0) return false;
|
2010-12-07 00:36:56 +01:00
|
|
|
Condition = IB->getAddress()->stripPointerCasts();
|
|
|
|
Preference = WantBlockAddress;
|
|
|
|
} else {
|
2008-04-20 23:13:06 +02:00
|
|
|
return false; // Must be an invoke.
|
2010-12-07 00:36:56 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2011-04-14 23:35:50 +02:00
|
|
|
// Run constant folding to see if we can reduce the condition to a simple
|
|
|
|
// constant.
|
|
|
|
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
|
2015-03-10 03:37:25 +01:00
|
|
|
Value *SimpleVal =
|
|
|
|
ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
|
2011-04-14 23:35:50 +02:00
|
|
|
if (SimpleVal) {
|
|
|
|
I->replaceAllUsesWith(SimpleVal);
|
2016-07-22 06:54:44 +02:00
|
|
|
if (isInstructionTriviallyDead(I, TLI))
|
|
|
|
I->eraseFromParent();
|
2011-04-14 23:35:50 +02:00
|
|
|
Condition = SimpleVal;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-03 08:48:08 +01:00
|
|
|
// If the terminator is branching on an undef, we can pick any of the
|
2009-10-11 06:18:15 +02:00
|
|
|
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
|
2008-12-03 08:48:08 +01:00
|
|
|
if (isa<UndefValue>(Condition)) {
|
2009-10-11 06:18:15 +02:00
|
|
|
unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-12-03 08:48:08 +01:00
|
|
|
// Fold the branch/switch.
|
2009-10-11 06:18:15 +02:00
|
|
|
TerminatorInst *BBTerm = BB->getTerminator();
|
2008-12-03 08:48:08 +01:00
|
|
|
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
|
2009-10-11 06:18:15 +02:00
|
|
|
if (i == BestSucc) continue;
|
2010-09-29 22:34:41 +02:00
|
|
|
BBTerm->getSuccessor(i)->removePredecessor(BB, true);
|
2008-12-03 08:48:08 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " In block '" << BB->getName()
|
2009-10-11 09:24:57 +02:00
|
|
|
<< "' folding undef terminator: " << *BBTerm << '\n');
|
2009-10-11 06:18:15 +02:00
|
|
|
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
|
2008-12-03 08:48:08 +01:00
|
|
|
BBTerm->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-12-05 20:06:41 +01:00
|
|
|
// If the terminator of this block is branching on a constant, simplify the
|
|
|
|
// terminator to an unconditional branch. This can occur due to threading in
|
|
|
|
// other blocks.
|
2010-12-07 00:36:56 +01:00
|
|
|
if (getKnownConstant(Condition, Preference)) {
|
2010-12-05 20:06:41 +01:00
|
|
|
DEBUG(dbgs() << " In block '" << BB->getName()
|
|
|
|
<< "' folding terminator: " << *BB->getTerminator() << '\n');
|
|
|
|
++NumFolds;
|
2011-05-22 18:24:18 +02:00
|
|
|
ConstantFoldTerminator(BB, true);
|
2010-12-05 20:06:41 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2008-12-03 08:48:08 +01:00
|
|
|
Instruction *CondInst = dyn_cast<Instruction>(Condition);
|
|
|
|
|
|
|
|
// All the rest of our checks depend on the condition being an instruction.
|
2014-04-25 07:29:35 +02:00
|
|
|
if (!CondInst) {
|
2009-11-12 02:41:34 +01:00
|
|
|
// FIXME: Unify this with code below.
|
2014-09-07 22:29:59 +02:00
|
|
|
if (ProcessThreadableEdges(Condition, BB, Preference, Terminator))
|
2009-11-12 02:41:34 +01:00
|
|
|
return true;
|
2008-12-03 08:48:08 +01:00
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
}
|
|
|
|
|
2009-06-19 06:56:29 +02:00
|
|
|
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
|
2015-06-16 02:49:59 +02:00
|
|
|
// If we're branching on a conditional, LVI might be able to determine
|
2015-06-19 03:53:21 +02:00
|
|
|
// it's value at the branch instruction. We only handle comparisons
|
2015-06-16 02:49:59 +02:00
|
|
|
// against a constant at this time.
|
2015-09-18 21:14:35 +02:00
|
|
|
// TODO: This should be extended to handle switches as well.
|
2010-08-27 19:12:29 +02:00
|
|
|
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
|
|
|
|
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
|
2017-03-07 19:59:09 +01:00
|
|
|
if (CondBr && CondConst) {
|
|
|
|
// We should have returned as soon as we turn a conditional branch to
|
|
|
|
// unconditional. Because its no longer interesting as far as jump
|
|
|
|
// threading is concerned.
|
|
|
|
assert(CondBr->isConditional() && "Threading on unconditional terminator");
|
|
|
|
|
2014-09-07 22:29:59 +02:00
|
|
|
LazyValueInfo::Tristate Ret =
|
|
|
|
LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
|
2015-06-16 02:49:59 +02:00
|
|
|
CondConst, CondBr);
|
2014-09-07 22:29:59 +02:00
|
|
|
if (Ret != LazyValueInfo::Unknown) {
|
|
|
|
unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
|
|
|
|
unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
|
|
|
|
CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
|
|
|
|
BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
|
|
|
|
CondBr->eraseFromParent();
|
2015-06-16 02:49:59 +02:00
|
|
|
if (CondCmp->use_empty())
|
|
|
|
CondCmp->eraseFromParent();
|
2017-05-23 15:36:25 +02:00
|
|
|
// We can safely replace *some* uses of the CondInst if it has
|
2017-05-18 15:12:18 +02:00
|
|
|
// exactly one value as returned by LVI. RAUW is incorrect in the
|
|
|
|
// presence of guards and assumes, that have the `Cond` as the use. This
|
|
|
|
// is because we use the guards/assume to reason about the `Cond` value
|
|
|
|
// at the end of block, but RAUW unconditionally replaces all uses
|
|
|
|
// including the guards/assumes themselves and the uses before the
|
|
|
|
// guard/assume.
|
2017-05-23 15:36:25 +02:00
|
|
|
else if (CondCmp->getParent() == BB) {
|
|
|
|
auto *CI = Ret == LazyValueInfo::True ?
|
|
|
|
ConstantInt::getTrue(CondCmp->getType()) :
|
|
|
|
ConstantInt::getFalse(CondCmp->getType());
|
|
|
|
ReplaceFoldableUses(CondCmp, CI);
|
|
|
|
}
|
2014-09-07 22:29:59 +02:00
|
|
|
return true;
|
|
|
|
}
|
JumpThreading: Turn a select instruction into branching if it allows to thread one half of the select.
This is a common pattern coming out of simplifycfg generating gross code.
a: ; preds = %entry
%sel = select i1 %cmp1, double %add, double 0.000000e+00
br label %b
b:
%cond5 = phi double [ %sel, %a ], [ %sub, %entry ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
becomes
a:
br i1 %cmp1, label %b, label %if.then
b:
%cond5 = phi double [ %sub, %entry ], [ %add, %a ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
Skipping block b completely if possible.
llvm-svn: 187880
2013-08-07 12:29:38 +02:00
|
|
|
|
2017-03-07 19:59:09 +01:00
|
|
|
// We did not manage to simplify this branch, try to see whether
|
|
|
|
// CondCmp depends on a known phi-select pattern.
|
|
|
|
if (TryToUnfoldSelect(CondCmp, BB))
|
|
|
|
return true;
|
|
|
|
}
|
2009-06-19 06:56:29 +02:00
|
|
|
}
|
2008-11-27 06:07:53 +01:00
|
|
|
|
|
|
|
// Check for some cases that are worth simplifying. Right now we want to look
|
|
|
|
// for loads that are used by a switch or by the condition for the branch. If
|
|
|
|
// we see one, check to see if it's partially redundant. If so, insert a PHI
|
|
|
|
// which can then be used to thread the values.
|
|
|
|
//
|
2008-12-03 08:48:08 +01:00
|
|
|
Value *SimplifyValue = CondInst;
|
2008-11-27 06:07:53 +01:00
|
|
|
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
|
|
|
|
if (isa<Constant>(CondCmp->getOperand(1)))
|
|
|
|
SimplifyValue = CondCmp->getOperand(0);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-15 20:58:31 +01:00
|
|
|
// TODO: There are other places where load PRE would be profitable, such as
|
|
|
|
// more complex comparisons.
|
2008-11-27 06:07:53 +01:00
|
|
|
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
|
|
|
|
if (SimplifyPartiallyRedundantLoad(LI))
|
|
|
|
return true;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Handle a variety of cases where we are branching on something derived from
|
|
|
|
// a PHI node in the current block. If we can prove that any predecessors
|
|
|
|
// compute a predictable value based on a PHI node, thread those predecessors.
|
|
|
|
//
|
2014-09-07 22:29:59 +02:00
|
|
|
if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator))
|
2009-11-11 03:08:33 +01:00
|
|
|
return true;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 00:41:09 +01:00
|
|
|
// If this is an otherwise-unfoldable branch on a phi node in the current
|
|
|
|
// block, see if we can simplify.
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
|
|
|
|
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
|
|
|
|
return ProcessBranchOnPHI(PN);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
|
|
|
|
if (CondInst->getOpcode() == Instruction::Xor &&
|
|
|
|
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
|
|
|
|
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-10-28 22:27:08 +01:00
|
|
|
// Search for a stronger dominating condition that can be used to simplify a
|
|
|
|
// conditional branch leaving BB.
|
|
|
|
if (ProcessImpliedCondition(BB))
|
|
|
|
return true;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-10-28 22:27:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
|
2015-10-28 22:27:08 +01:00
|
|
|
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
|
|
|
|
if (!BI || !BI->isConditional())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Value *Cond = BI->getCondition();
|
|
|
|
BasicBlock *CurrentBB = BB;
|
|
|
|
BasicBlock *CurrentPred = BB->getSinglePredecessor();
|
|
|
|
unsigned Iter = 0;
|
|
|
|
|
2015-11-06 20:01:08 +01:00
|
|
|
auto &DL = BB->getModule()->getDataLayout();
|
|
|
|
|
2015-10-28 22:27:08 +01:00
|
|
|
while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
|
|
|
|
auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
|
2016-04-25 19:23:36 +02:00
|
|
|
if (!PBI || !PBI->isConditional())
|
|
|
|
return false;
|
|
|
|
if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
|
2015-10-28 22:27:08 +01:00
|
|
|
return false;
|
|
|
|
|
2016-04-25 19:23:36 +02:00
|
|
|
bool FalseDest = PBI->getSuccessor(1) == CurrentBB;
|
2016-04-20 21:15:26 +02:00
|
|
|
Optional<bool> Implication =
|
2016-04-25 19:23:36 +02:00
|
|
|
isImpliedCondition(PBI->getCondition(), Cond, DL, FalseDest);
|
2016-04-20 21:15:26 +02:00
|
|
|
if (Implication) {
|
|
|
|
BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB);
|
|
|
|
BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI);
|
2015-10-28 22:27:08 +01:00
|
|
|
BI->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
CurrentBB = CurrentPred;
|
|
|
|
CurrentPred = CurrentBB->getSinglePredecessor();
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-04-22 08:36:15 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-03-19 16:30:53 +01:00
|
|
|
/// Return true if Op is an instruction defined in the given block.
|
|
|
|
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
|
|
|
|
if (Instruction *OpInst = dyn_cast<Instruction>(Op))
|
|
|
|
if (OpInst->getParent() == BB)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
|
|
|
|
/// load instruction, eliminate it by replacing it with a PHI node. This is an
|
|
|
|
/// important optimization that encourages jump threading, and needs to be run
|
|
|
|
/// interlaced with other jump threading tasks.
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
|
2016-07-14 21:21:15 +02:00
|
|
|
// Don't hack volatile and ordered loads.
|
|
|
|
if (!LI->isUnordered()) return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// If the load is defined in a block with exactly one predecessor, it can't be
|
|
|
|
// partially redundant.
|
|
|
|
BasicBlock *LoadBB = LI->getParent();
|
|
|
|
if (LoadBB->getSinglePredecessor())
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-08-04 10:21:40 +02:00
|
|
|
// If the load is defined in an EH pad, it can't be partially redundant,
|
|
|
|
// because the edges between the invoke and the EH pad cannot have other
|
2013-10-21 06:09:17 +02:00
|
|
|
// instructions between them.
|
2015-08-04 10:21:40 +02:00
|
|
|
if (LoadBB->isEHPad())
|
2013-10-21 06:09:17 +02:00
|
|
|
return false;
|
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
Value *LoadedPtr = LI->getOperand(0);
|
|
|
|
|
2017-03-19 16:30:53 +01:00
|
|
|
// If the loaded operand is defined in the LoadBB and its not a phi,
|
|
|
|
// it can't be available in predecessors.
|
|
|
|
if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// Scan a few instructions up from the load, to see if it is obviously live at
|
|
|
|
// the entry to its block.
|
2015-10-13 20:26:00 +02:00
|
|
|
BasicBlock::iterator BBIt(LI);
|
2016-08-08 06:10:22 +02:00
|
|
|
bool IsLoadCSE;
|
2017-03-08 16:22:30 +01:00
|
|
|
if (Value *AvailableVal = FindAvailableLoadedValue(
|
|
|
|
LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
|
2015-09-16 15:27:30 +02:00
|
|
|
// If the value of the load is locally available within the block, just use
|
2008-11-27 06:07:53 +01:00
|
|
|
// it. This frequently occurs for reg2mem'd allocas.
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-08-08 06:10:22 +02:00
|
|
|
if (IsLoadCSE) {
|
|
|
|
LoadInst *NLI = cast<LoadInst>(AvailableVal);
|
|
|
|
combineMetadataForCSE(NLI, LI);
|
|
|
|
};
|
|
|
|
|
2009-01-09 07:08:12 +01:00
|
|
|
// If the returned value is the load itself, replace with an undef. This can
|
|
|
|
// only happen in dead loops.
|
2009-07-31 01:03:37 +02:00
|
|
|
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
|
2014-10-20 02:24:14 +02:00
|
|
|
if (AvailableVal->getType() != LI->getType())
|
2014-11-25 09:20:27 +01:00
|
|
|
AvailableVal =
|
|
|
|
CastInst::CreateBitOrPointerCast(AvailableVal, LI->getType(), "", LI);
|
2008-11-27 06:07:53 +01:00
|
|
|
LI->replaceAllUsesWith(AvailableVal);
|
|
|
|
LI->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, if we scanned the whole block and got to the top of the block,
|
|
|
|
// we know the block is locally transparent to the load. If not, something
|
|
|
|
// might clobber its value.
|
|
|
|
if (BBIt != LoadBB->begin())
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-07-24 14:16:19 +02:00
|
|
|
// If all of the loads and stores that feed the value have the same AA tags,
|
|
|
|
// then we can propagate them onto any newly inserted loads.
|
|
|
|
AAMDNodes AATags;
|
|
|
|
LI->getAAMetadata(AATags);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
SmallPtrSet<BasicBlock*, 8> PredsScanned;
|
|
|
|
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
|
|
|
|
AvailablePredsTy AvailablePreds;
|
2014-04-25 07:29:35 +02:00
|
|
|
BasicBlock *OneUnavailablePred = nullptr;
|
2016-08-08 06:10:22 +02:00
|
|
|
SmallVector<LoadInst*, 8> CSELoads;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// If we got here, the loaded value is transparent through to the start of the
|
|
|
|
// block. Check to see if it is available in any of the predecessor blocks.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *PredBB : predecessors(LoadBB)) {
|
2008-11-27 06:07:53 +01:00
|
|
|
// If we already scanned this predecessor, skip it.
|
2014-11-19 08:49:26 +01:00
|
|
|
if (!PredsScanned.insert(PredBB).second)
|
2008-11-27 06:07:53 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
BBIt = PredBB->end();
|
2017-02-02 16:12:34 +01:00
|
|
|
unsigned NumScanedInst = 0;
|
2017-03-19 16:30:53 +01:00
|
|
|
Value *PredAvailable = nullptr;
|
|
|
|
// NOTE: We don't CSE load that is volatile or anything stronger than
|
|
|
|
// unordered, that should have been checked when we entered the function.
|
|
|
|
assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads");
|
|
|
|
// If this is a load on a phi pointer, phi-translate it and search
|
|
|
|
// for available load/store to the pointer in predecessors.
|
|
|
|
Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB);
|
|
|
|
PredAvailable = FindAvailablePtrLoadStore(
|
|
|
|
Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
|
2017-03-19 16:41:46 +01:00
|
|
|
AA, &IsLoadCSE, &NumScanedInst);
|
2017-03-19 16:30:53 +01:00
|
|
|
|
|
|
|
// If PredBB has a single predecessor, continue scanning through the
|
|
|
|
// single precessor.
|
2017-02-02 16:12:34 +01:00
|
|
|
BasicBlock *SinglePredBB = PredBB;
|
|
|
|
while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
|
|
|
|
NumScanedInst < DefMaxInstsToScan) {
|
|
|
|
SinglePredBB = SinglePredBB->getSinglePredecessor();
|
|
|
|
if (SinglePredBB) {
|
|
|
|
BBIt = SinglePredBB->end();
|
2017-03-19 16:30:53 +01:00
|
|
|
PredAvailable = FindAvailablePtrLoadStore(
|
|
|
|
Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt,
|
2017-03-19 16:41:46 +01:00
|
|
|
(DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
|
2017-03-19 16:30:53 +01:00
|
|
|
&NumScanedInst);
|
2017-02-02 16:12:34 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
if (!PredAvailable) {
|
|
|
|
OneUnavailablePred = PredBB;
|
|
|
|
continue;
|
|
|
|
}
|
2012-07-24 12:51:42 +02:00
|
|
|
|
2016-08-08 06:10:22 +02:00
|
|
|
if (IsLoadCSE)
|
|
|
|
CSELoads.push_back(cast<LoadInst>(PredAvailable));
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// If so, this load is partially redundant. Remember this info so that we
|
|
|
|
// can create a PHI node.
|
|
|
|
AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// If the loaded value isn't available in any predecessor, it isn't partially
|
|
|
|
// redundant.
|
|
|
|
if (AvailablePreds.empty()) return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// Okay, the loaded value is available in at least one (and maybe all!)
|
|
|
|
// predecessors. If the value is unavailable in more than one unique
|
|
|
|
// predecessor, we want to insert a merge block for those common predecessors.
|
|
|
|
// This ensures that we only have to insert one reload, thus not increasing
|
|
|
|
// code size.
|
2014-04-25 07:29:35 +02:00
|
|
|
BasicBlock *UnavailablePred = nullptr;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// If there is exactly one predecessor where the value is unavailable, the
|
|
|
|
// already computed 'OneUnavailablePred' block is it. If it ends in an
|
|
|
|
// unconditional branch, we know that it isn't a critical edge.
|
|
|
|
if (PredsScanned.size() == AvailablePreds.size()+1 &&
|
|
|
|
OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
|
|
|
|
UnavailablePred = OneUnavailablePred;
|
|
|
|
} else if (PredsScanned.size() != AvailablePreds.size()) {
|
|
|
|
// Otherwise, we had multiple unavailable predecessors or we had a critical
|
|
|
|
// edge from the one.
|
|
|
|
SmallVector<BasicBlock*, 8> PredsToSplit;
|
|
|
|
SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
|
|
|
|
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &AvailablePred : AvailablePreds)
|
|
|
|
AvailablePredSet.insert(AvailablePred.first);
|
2008-11-27 06:07:53 +01:00
|
|
|
|
|
|
|
// Add all the unavailable predecessors to the PredsToSplit list.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *P : predecessors(LoadBB)) {
|
2010-06-14 21:45:43 +02:00
|
|
|
// If the predecessor is an indirect goto, we can't split the edge.
|
2010-07-12 16:10:24 +02:00
|
|
|
if (isa<IndirectBrInst>(P->getTerminator()))
|
2010-06-14 21:45:43 +02:00
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-07-12 16:10:24 +02:00
|
|
|
if (!AvailablePredSet.count(P))
|
|
|
|
PredsToSplit.push_back(P);
|
2010-06-14 21:45:43 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// Split them out to their own block.
|
2015-10-15 16:59:40 +02:00
|
|
|
UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
|
2008-11-27 06:07:53 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// If the value isn't available in all predecessors, then there will be
|
|
|
|
// exactly one where it isn't available. Insert a load on that edge and add
|
|
|
|
// it to the AvailablePreds list.
|
|
|
|
if (UnavailablePred) {
|
|
|
|
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
|
|
|
|
"Can't handle critical edge here!");
|
2017-03-19 16:30:53 +01:00
|
|
|
LoadInst *NewVal = new LoadInst(
|
|
|
|
LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
|
|
|
|
LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
|
2017-07-12 00:23:00 +02:00
|
|
|
LI->getSyncScopeID(), UnavailablePred->getTerminator());
|
2011-05-05 00:48:19 +02:00
|
|
|
NewVal->setDebugLoc(LI->getDebugLoc());
|
2014-07-24 14:16:19 +02:00
|
|
|
if (AATags)
|
|
|
|
NewVal->setAAMetadata(AATags);
|
2012-07-24 12:51:42 +02:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// Now we know that each predecessor of this block has a value in
|
|
|
|
// AvailablePreds, sort them for efficient access as we're walking the preds.
|
2008-12-01 07:52:57 +01:00
|
|
|
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// Create a PHI node at the start of the block for the PRE'd load value.
|
2011-03-30 13:19:20 +02:00
|
|
|
pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
|
2011-03-30 13:28:46 +02:00
|
|
|
PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
|
2015-10-13 20:26:00 +02:00
|
|
|
&LoadBB->front());
|
2008-11-27 06:07:53 +01:00
|
|
|
PN->takeName(LI);
|
2011-05-05 00:48:19 +02:00
|
|
|
PN->setDebugLoc(LI->getDebugLoc());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
// Insert new entries into the PHI for each predecessor. A single block may
|
|
|
|
// have multiple entries here.
|
2011-03-30 13:19:20 +02:00
|
|
|
for (pred_iterator PI = PB; PI != PE; ++PI) {
|
2010-07-12 16:10:24 +02:00
|
|
|
BasicBlock *P = *PI;
|
2010-12-05 20:02:47 +01:00
|
|
|
AvailablePredsTy::iterator I =
|
2008-11-27 06:07:53 +01:00
|
|
|
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
|
2014-04-25 07:29:35 +02:00
|
|
|
std::make_pair(P, (Value*)nullptr));
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-07-12 16:10:24 +02:00
|
|
|
assert(I != AvailablePreds.end() && I->first == P &&
|
2008-11-27 06:07:53 +01:00
|
|
|
"Didn't find entry for predecessor!");
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-10-20 02:24:14 +02:00
|
|
|
// If we have an available predecessor but it requires casting, insert the
|
2014-10-20 07:34:36 +02:00
|
|
|
// cast in the predecessor and use the cast. Note that we have to update the
|
|
|
|
// AvailablePreds vector as we go so that all of the PHI entries for this
|
|
|
|
// predecessor use the same bitcast.
|
|
|
|
Value *&PredV = I->second;
|
2014-10-20 02:24:14 +02:00
|
|
|
if (PredV->getType() != LI->getType())
|
2014-11-25 09:20:27 +01:00
|
|
|
PredV = CastInst::CreateBitOrPointerCast(PredV, LI->getType(), "",
|
|
|
|
P->getTerminator());
|
2014-10-20 02:24:14 +02:00
|
|
|
|
|
|
|
PN->addIncoming(PredV, I->first);
|
2008-11-27 06:07:53 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-08-08 06:10:22 +02:00
|
|
|
for (LoadInst *PredLI : CSELoads) {
|
|
|
|
combineMetadataForCSE(PredLI, LI);
|
|
|
|
}
|
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
LI->replaceAllUsesWith(PN);
|
|
|
|
LI->eraseFromParent();
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-11-27 06:07:53 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
/// FindMostPopularDest - The specified list contains multiple possible
|
|
|
|
/// threadable destinations. Pick the one that occurs the most frequently in
|
|
|
|
/// the list.
|
|
|
|
static BasicBlock *
|
|
|
|
FindMostPopularDest(BasicBlock *BB,
|
|
|
|
const SmallVectorImpl<std::pair<BasicBlock*,
|
|
|
|
BasicBlock*> > &PredToDestList) {
|
|
|
|
assert(!PredToDestList.empty());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Determine popularity. If there are multiple possible destinations, we
|
|
|
|
// explicitly choose to ignore 'undef' destinations. We prefer to thread
|
|
|
|
// blocks with known and real destinations to threading undef. We'll handle
|
|
|
|
// them later if interesting.
|
|
|
|
DenseMap<BasicBlock*, unsigned> DestPopularity;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &PredToDest : PredToDestList)
|
|
|
|
if (PredToDest.second)
|
|
|
|
DestPopularity[PredToDest.second]++;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Find the most popular dest.
|
|
|
|
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
|
|
|
|
BasicBlock *MostPopularDest = DPI->first;
|
|
|
|
unsigned Popularity = DPI->second;
|
|
|
|
SmallVector<BasicBlock*, 4> SamePopularity;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
for (++DPI; DPI != DestPopularity.end(); ++DPI) {
|
|
|
|
// If the popularity of this entry isn't higher than the popularity we've
|
|
|
|
// seen so far, ignore it.
|
|
|
|
if (DPI->second < Popularity)
|
|
|
|
; // ignore.
|
|
|
|
else if (DPI->second == Popularity) {
|
|
|
|
// If it is the same as what we've seen so far, keep track of it.
|
|
|
|
SamePopularity.push_back(DPI->first);
|
|
|
|
} else {
|
|
|
|
// If it is more popular, remember it.
|
|
|
|
SamePopularity.clear();
|
|
|
|
MostPopularDest = DPI->first;
|
|
|
|
Popularity = DPI->second;
|
2010-12-05 20:02:47 +01:00
|
|
|
}
|
2009-10-11 09:24:57 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-12-16 13:16:00 +01:00
|
|
|
// Okay, now we know the most popular destination. If there is more than one
|
2009-11-07 09:05:03 +01:00
|
|
|
// destination, we need to determine one. This is arbitrary, but we need
|
|
|
|
// to make a deterministic decision. Pick the first one that appears in the
|
|
|
|
// successor list.
|
|
|
|
if (!SamePopularity.empty()) {
|
|
|
|
SamePopularity.push_back(MostPopularDest);
|
|
|
|
TerminatorInst *TI = BB->getTerminator();
|
|
|
|
for (unsigned i = 0; ; ++i) {
|
|
|
|
assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-08-12 00:21:41 +02:00
|
|
|
if (!is_contained(SamePopularity, TI->getSuccessor(i)))
|
2009-11-07 09:05:03 +01:00
|
|
|
continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
MostPopularDest = TI->getSuccessor(i);
|
2009-11-07 02:32:59 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Okay, we have finally picked the most popular destination.
|
|
|
|
return MostPopularDest;
|
|
|
|
}
|
|
|
|
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
|
|
|
|
ConstantPreference Preference,
|
|
|
|
Instruction *CxtI) {
|
2009-11-07 09:05:03 +01:00
|
|
|
// If threading this would thread across a loop header, don't even try to
|
|
|
|
// thread the edge.
|
|
|
|
if (LoopHeaders.count(BB))
|
2009-11-07 02:32:59 +01:00
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-12-05 20:06:41 +01:00
|
|
|
PredValueInfoTy PredValues;
|
2016-03-16 00:38:47 +01:00
|
|
|
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
assert(!PredValues.empty() &&
|
|
|
|
"ComputeValueKnownInPredecessors returned true with no values");
|
|
|
|
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << "IN BB: " << *BB;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &PredValue : PredValues) {
|
2010-12-05 20:06:41 +01:00
|
|
|
dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
|
2016-01-09 19:43:01 +01:00
|
|
|
<< *PredValue.first
|
|
|
|
<< " for pred '" << PredValue.second->getName() << "'.\n";
|
2009-11-07 09:05:03 +01:00
|
|
|
});
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Decide what we want to thread through. Convert our list of known values to
|
|
|
|
// a list of known destinations for each pred. This also discards duplicate
|
|
|
|
// predecessors and keeps track of the undefined inputs (which are represented
|
2009-11-09 01:41:49 +01:00
|
|
|
// as a null dest in the PredToDestList).
|
2009-11-07 09:05:03 +01:00
|
|
|
SmallPtrSet<BasicBlock*, 16> SeenPreds;
|
|
|
|
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-04-25 07:29:35 +02:00
|
|
|
BasicBlock *OnlyDest = nullptr;
|
2009-11-07 09:05:03 +01:00
|
|
|
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
|
2017-05-01 17:34:17 +02:00
|
|
|
Constant *OnlyVal = nullptr;
|
|
|
|
Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-05-01 19:15:37 +02:00
|
|
|
unsigned PredWithKnownDest = 0;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &PredValue : PredValues) {
|
|
|
|
BasicBlock *Pred = PredValue.second;
|
2014-11-19 08:49:26 +01:00
|
|
|
if (!SeenPreds.insert(Pred).second)
|
2009-11-07 09:05:03 +01:00
|
|
|
continue; // Duplicate predecessor entry.
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-01-09 19:43:01 +01:00
|
|
|
Constant *Val = PredValue.first;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
BasicBlock *DestBB;
|
2010-12-05 20:06:41 +01:00
|
|
|
if (isa<UndefValue>(Val))
|
2014-04-25 07:29:35 +02:00
|
|
|
DestBB = nullptr;
|
2017-05-01 18:19:59 +02:00
|
|
|
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
|
|
|
|
assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
|
2010-12-05 20:06:41 +01:00
|
|
|
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
|
2017-05-01 18:19:59 +02:00
|
|
|
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
|
|
|
|
assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
|
2017-04-12 09:27:28 +02:00
|
|
|
DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
|
SwitchInst refactoring.
The purpose of refactoring is to hide operand roles from SwitchInst user (programmer). If you want to play with operands directly, probably you will need lower level methods than SwitchInst ones (TerminatorInst or may be User). After this patch we can reorganize SwitchInst operands and successors as we want.
What was done:
1. Changed semantics of index inside the getCaseValue method:
getCaseValue(0) means "get first case", not a condition. Use getCondition() if you want to resolve the condition. I propose don't mix SwitchInst case indexing with low level indexing (TI successors indexing, User's operands indexing), since it may be dangerous.
2. By the same reason findCaseValue(ConstantInt*) returns actual number of case value. 0 means first case, not default. If there is no case with given value, ErrorIndex will returned.
3. Added getCaseSuccessor method. I propose to avoid usage of TerminatorInst::getSuccessor if you want to resolve case successor BB. Use getCaseSuccessor instead, since internal SwitchInst organization of operands/successors is hidden and may be changed in any moment.
4. Added resolveSuccessorIndex and resolveCaseIndex. The main purpose of these methods is to see how case successors are really mapped in TerminatorInst.
4.1 "resolveSuccessorIndex" was created if you need to level down from SwitchInst to TerminatorInst. It returns TerminatorInst's successor index for given case successor.
4.2 "resolveCaseIndex" converts low level successors index to case index that curresponds to the given successor.
Note: There are also related compatability fix patches for dragonegg, klee, llvm-gcc-4.0, llvm-gcc-4.2, safecode, clang.
llvm-svn: 149481
2012-02-01 08:49:51 +01:00
|
|
|
} else {
|
2010-12-07 00:36:56 +01:00
|
|
|
assert(isa<IndirectBrInst>(BB->getTerminator())
|
|
|
|
&& "Unexpected terminator");
|
2017-05-01 18:19:59 +02:00
|
|
|
assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
|
2010-12-07 00:36:56 +01:00
|
|
|
DestBB = cast<BlockAddress>(Val)->getBasicBlock();
|
2009-11-07 02:32:59 +01:00
|
|
|
}
|
2009-11-07 09:05:03 +01:00
|
|
|
|
|
|
|
// If we have exactly one destination, remember it for efficiency below.
|
2017-05-01 17:34:17 +02:00
|
|
|
if (PredToDestList.empty()) {
|
2009-11-07 09:05:03 +01:00
|
|
|
OnlyDest = DestBB;
|
2017-05-01 17:34:17 +02:00
|
|
|
OnlyVal = Val;
|
|
|
|
} else {
|
|
|
|
if (OnlyDest != DestBB)
|
|
|
|
OnlyDest = MultipleDestSentinel;
|
|
|
|
// It possible we have same destination, but different value, e.g. default
|
|
|
|
// case in switchinst.
|
|
|
|
if (Val != OnlyVal)
|
|
|
|
OnlyVal = MultipleVal;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-05-01 19:15:37 +02:00
|
|
|
// We know where this predecessor is going.
|
|
|
|
++PredWithKnownDest;
|
|
|
|
|
|
|
|
// If the predecessor ends with an indirect goto, we can't change its
|
|
|
|
// destination.
|
|
|
|
if (isa<IndirectBrInst>(Pred->getTerminator()))
|
|
|
|
continue;
|
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
PredToDestList.push_back(std::make_pair(Pred, DestBB));
|
2009-11-07 02:32:59 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// If all edges were unthreadable, we fail.
|
|
|
|
if (PredToDestList.empty())
|
2016-03-16 00:38:47 +01:00
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-04-23 22:56:29 +02:00
|
|
|
// If all the predecessors go to a single known successor, we want to fold,
|
|
|
|
// not thread. By doing so, we do not need to duplicate the current block and
|
|
|
|
// also miss potential opportunities in case we dont/cant duplicate.
|
|
|
|
if (OnlyDest && OnlyDest != MultipleDestSentinel) {
|
2017-05-01 19:15:37 +02:00
|
|
|
if (PredWithKnownDest ==
|
2017-04-23 22:56:29 +02:00
|
|
|
(size_t)std::distance(pred_begin(BB), pred_end(BB))) {
|
|
|
|
bool SeenFirstBranchToOnlyDest = false;
|
|
|
|
for (BasicBlock *SuccBB : successors(BB)) {
|
|
|
|
if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest)
|
|
|
|
SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
|
|
|
|
else
|
|
|
|
SuccBB->removePredecessor(BB, true); // This is unreachable successor.
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finally update the terminator.
|
|
|
|
TerminatorInst *Term = BB->getTerminator();
|
|
|
|
BranchInst::Create(OnlyDest, Term);
|
|
|
|
Term->eraseFromParent();
|
|
|
|
|
|
|
|
// If the condition is now dead due to the removal of the old terminator,
|
|
|
|
// erase it.
|
2017-05-01 17:34:17 +02:00
|
|
|
if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
|
|
|
|
if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
|
|
|
|
CondInst->eraseFromParent();
|
2017-05-23 15:36:25 +02:00
|
|
|
// We can safely replace *some* uses of the CondInst if it has
|
2017-05-18 15:12:18 +02:00
|
|
|
// exactly one value as returned by LVI. RAUW is incorrect in the
|
|
|
|
// presence of guards and assumes, that have the `Cond` as the use. This
|
|
|
|
// is because we use the guards/assume to reason about the `Cond` value
|
|
|
|
// at the end of block, but RAUW unconditionally replaces all uses
|
|
|
|
// including the guards/assumes themselves and the uses before the
|
|
|
|
// guard/assume.
|
2017-05-23 15:36:25 +02:00
|
|
|
else if (OnlyVal && OnlyVal != MultipleVal &&
|
|
|
|
CondInst->getParent() == BB)
|
|
|
|
ReplaceFoldableUses(CondInst, OnlyVal);
|
2017-05-01 17:34:17 +02:00
|
|
|
}
|
2017-04-23 22:56:29 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Determine which is the most common successor. If we have many inputs and
|
|
|
|
// this block is a switch, we want to start by threading the batch that goes
|
|
|
|
// to the most popular destination first. If we only know about one
|
|
|
|
// threadable destination (the common case) we can avoid this.
|
|
|
|
BasicBlock *MostPopularDest = OnlyDest;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
if (MostPopularDest == MultipleDestSentinel)
|
|
|
|
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// Now that we know what the most popular destination is, factor all
|
|
|
|
// predecessors that will jump to it into a single predecessor.
|
|
|
|
SmallVector<BasicBlock*, 16> PredsToFactor;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &PredToDest : PredToDestList)
|
|
|
|
if (PredToDest.second == MostPopularDest) {
|
|
|
|
BasicBlock *Pred = PredToDest.first;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// This predecessor may be a switch or something else that has multiple
|
|
|
|
// edges to the block. Factor each of these edges by listing them
|
|
|
|
// according to # occurrences in PredsToFactor.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *Succ : successors(Pred))
|
|
|
|
if (Succ == BB)
|
2009-11-07 09:05:03 +01:00
|
|
|
PredsToFactor.push_back(Pred);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the threadable edges are branching on an undefined value, we get to pick
|
|
|
|
// the destination that these predecessors should get to.
|
2014-04-25 07:29:35 +02:00
|
|
|
if (!MostPopularDest)
|
2009-11-07 09:05:03 +01:00
|
|
|
MostPopularDest = BB->getTerminator()->
|
|
|
|
getSuccessor(GetBestDestForJumpOnUndef(BB));
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 02:32:59 +01:00
|
|
|
// Ok, try to thread it!
|
2016-03-16 00:38:47 +01:00
|
|
|
return ThreadEdge(BB, PredsToFactor, MostPopularDest);
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
|
|
|
|
2010-01-12 00:41:09 +01:00
|
|
|
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
|
|
|
|
/// a PHI node in the current block. See if there are any simplifications we
|
|
|
|
/// can do based on inputs to the phi node.
|
2010-12-05 20:02:47 +01:00
|
|
|
///
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
|
2009-11-07 09:05:03 +01:00
|
|
|
BasicBlock *BB = PN->getParent();
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// TODO: We could make use of this to do it once for blocks with common PHI
|
|
|
|
// values.
|
|
|
|
SmallVector<BasicBlock*, 1> PredBBs;
|
|
|
|
PredBBs.resize(1);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
// If any of the predecessor blocks end in an unconditional branch, we can
|
2010-01-12 00:41:09 +01:00
|
|
|
// *duplicate* the conditional branch into that block in order to further
|
|
|
|
// encourage jump threading and to eliminate cases where we have branch on a
|
|
|
|
// phi of an icmp (branch on icmp is much better).
|
2009-11-07 09:05:03 +01:00
|
|
|
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
|
|
|
BasicBlock *PredBB = PN->getIncomingBlock(i);
|
|
|
|
if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
|
2010-01-12 03:07:17 +01:00
|
|
|
if (PredBr->isUnconditional()) {
|
|
|
|
PredBBs[0] = PredBB;
|
|
|
|
// Try to duplicate BB into PredBB.
|
|
|
|
if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
|
|
|
|
return true;
|
|
|
|
}
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2009-11-07 02:32:59 +01:00
|
|
|
}
|
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
|
|
|
|
/// a xor instruction in the current block. See if there are any
|
|
|
|
/// simplifications we can do based on inputs to the xor.
|
2010-12-05 20:02:47 +01:00
|
|
|
///
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
|
2010-01-12 03:07:17 +01:00
|
|
|
BasicBlock *BB = BO->getParent();
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// If either the LHS or RHS of the xor is a constant, don't do this
|
|
|
|
// optimization.
|
|
|
|
if (isa<ConstantInt>(BO->getOperand(0)) ||
|
|
|
|
isa<ConstantInt>(BO->getOperand(1)))
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-23 20:16:25 +01:00
|
|
|
// If the first instruction in BB isn't a phi, we won't be able to infer
|
|
|
|
// anything special about any particular predecessor.
|
|
|
|
if (!isa<PHINode>(BB->front()))
|
|
|
|
return false;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-10-03 20:18:04 +02:00
|
|
|
// If this BB is a landing pad, we won't be able to split the edge into it.
|
|
|
|
if (BB->isEHPad())
|
|
|
|
return false;
|
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// If we have a xor as the branch input to this block, and we know that the
|
|
|
|
// LHS or RHS of the xor in any predecessor is true/false, then we can clone
|
|
|
|
// the condition into the predecessor and fix that value to true, saving some
|
|
|
|
// logical ops on that path and encouraging other paths to simplify.
|
|
|
|
//
|
|
|
|
// This copies something like this:
|
|
|
|
//
|
|
|
|
// BB:
|
|
|
|
// %X = phi i1 [1], [%X']
|
|
|
|
// %Y = icmp eq i32 %A, %B
|
|
|
|
// %Z = xor i1 %X, %Y
|
|
|
|
// br i1 %Z, ...
|
|
|
|
//
|
|
|
|
// Into:
|
|
|
|
// BB':
|
|
|
|
// %Y = icmp ne i32 %A, %B
|
2015-09-16 15:27:30 +02:00
|
|
|
// br i1 %Y, ...
|
2010-01-12 03:07:17 +01:00
|
|
|
|
2010-12-05 20:06:41 +01:00
|
|
|
PredValueInfoTy XorOpValues;
|
2010-01-12 03:07:17 +01:00
|
|
|
bool isLHS = true;
|
2010-12-07 00:36:56 +01:00
|
|
|
if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
|
2016-03-16 00:38:47 +01:00
|
|
|
WantInteger, BO)) {
|
2010-01-12 03:07:17 +01:00
|
|
|
assert(XorOpValues.empty());
|
2010-12-07 00:36:56 +01:00
|
|
|
if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
|
2016-03-16 00:38:47 +01:00
|
|
|
WantInteger, BO))
|
|
|
|
return false;
|
2010-01-12 03:07:17 +01:00
|
|
|
isLHS = false;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
assert(!XorOpValues.empty() &&
|
|
|
|
"ComputeValueKnownInPredecessors returned true with no values");
|
|
|
|
|
|
|
|
// Scan the information to see which is most popular: true or false. The
|
|
|
|
// predecessors can be of the set true, false, or undef.
|
|
|
|
unsigned NumTrue = 0, NumFalse = 0;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &XorOpValue : XorOpValues) {
|
|
|
|
if (isa<UndefValue>(XorOpValue.first))
|
2010-12-05 20:06:41 +01:00
|
|
|
// Ignore undefs for the count.
|
|
|
|
continue;
|
2016-01-09 19:43:01 +01:00
|
|
|
if (cast<ConstantInt>(XorOpValue.first)->isZero())
|
2010-01-12 03:07:17 +01:00
|
|
|
++NumFalse;
|
|
|
|
else
|
|
|
|
++NumTrue;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// Determine which value to split on, true, false, or undef if neither.
|
2014-04-25 07:29:35 +02:00
|
|
|
ConstantInt *SplitVal = nullptr;
|
2010-01-12 03:07:17 +01:00
|
|
|
if (NumTrue > NumFalse)
|
|
|
|
SplitVal = ConstantInt::getTrue(BB->getContext());
|
|
|
|
else if (NumTrue != 0 || NumFalse != 0)
|
|
|
|
SplitVal = ConstantInt::getFalse(BB->getContext());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// Collect all of the blocks that this can be folded into so that we can
|
|
|
|
// factor this once and clone it once.
|
|
|
|
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (const auto &XorOpValue : XorOpValues) {
|
|
|
|
if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
|
2010-12-05 20:06:41 +01:00
|
|
|
continue;
|
2010-01-12 03:07:17 +01:00
|
|
|
|
2016-01-09 19:43:01 +01:00
|
|
|
BlocksToFoldInto.push_back(XorOpValue.second);
|
2010-01-12 03:07:17 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-23 20:16:25 +01:00
|
|
|
// If we inferred a value for all of the predecessors, then duplication won't
|
|
|
|
// help us. However, we can just replace the LHS or RHS with the constant.
|
|
|
|
if (BlocksToFoldInto.size() ==
|
|
|
|
cast<PHINode>(BB->front()).getNumIncomingValues()) {
|
2014-04-25 07:29:35 +02:00
|
|
|
if (!SplitVal) {
|
2010-01-23 20:16:25 +01:00
|
|
|
// If all preds provide undef, just nuke the xor, because it is undef too.
|
|
|
|
BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
|
|
|
|
BO->eraseFromParent();
|
|
|
|
} else if (SplitVal->isZero()) {
|
|
|
|
// If all preds provide 0, replace the xor with the other input.
|
|
|
|
BO->replaceAllUsesWith(BO->getOperand(isLHS));
|
|
|
|
BO->eraseFromParent();
|
|
|
|
} else {
|
|
|
|
// If all preds provide 1, set the computed value to 1.
|
|
|
|
BO->setOperand(!isLHS, SplitVal);
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-23 20:16:25 +01:00
|
|
|
return true;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// Try to duplicate BB into PredBB.
|
2016-03-16 00:38:47 +01:00
|
|
|
return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
|
2010-01-12 03:07:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
|
|
|
|
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
|
|
|
|
/// NewPred using the entries from OldPred (suitably mapped).
|
|
|
|
static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
|
|
|
|
BasicBlock *OldPred,
|
|
|
|
BasicBlock *NewPred,
|
|
|
|
DenseMap<Instruction*, Value*> &ValueMap) {
|
|
|
|
for (BasicBlock::iterator PNI = PHIBB->begin();
|
|
|
|
PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
|
|
|
|
// Ok, we have a PHI node. Figure out what the incoming value was for the
|
|
|
|
// DestBlock.
|
|
|
|
Value *IV = PN->getIncomingValueForBlock(OldPred);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Remap the value if necessary.
|
|
|
|
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
|
|
|
|
DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
|
|
|
|
if (I != ValueMap.end())
|
|
|
|
IV = I->second;
|
2009-10-11 06:33:43 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
PN->addIncoming(IV, NewPred);
|
2009-10-11 06:33:43 +02:00
|
|
|
}
|
2009-05-04 04:28:08 +02:00
|
|
|
}
|
|
|
|
|
2009-11-07 09:05:03 +01:00
|
|
|
/// ThreadEdge - We have decided that it is safe and profitable to factor the
|
|
|
|
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
|
|
|
|
/// across BB. Transform the IR to reflect this change.
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
|
|
|
|
const SmallVectorImpl<BasicBlock *> &PredBBs,
|
|
|
|
BasicBlock *SuccBB) {
|
Don't infininitely thread branches when a threaded edge
goes back to the block, e.g.:
Threading edge through bool from 'bb37.us.thread3829' to 'bb37.us' with cost: 1, across block:
bb37.us: ; preds = %bb37.us.thread3829, %bb37.us, %bb33
%D1361.1.us = phi i32 [ %tmp36, %bb33 ], [ %D1361.1.us, %bb37.us ], [ 0, %bb37.us.thread3829 ] ; <i32> [#uses=2]
%tmp39.us = icmp eq i32 %D1361.1.us, 0 ; <i1> [#uses=1]
br i1 %tmp39.us, label %bb37.us, label %bb42.us
llvm-svn: 50251
2008-04-25 06:12:29 +02:00
|
|
|
// If threading to the same block as we come from, we would infinite loop.
|
|
|
|
if (SuccBB == BB) {
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
|
2009-07-26 09:49:05 +02:00
|
|
|
<< "' - would thread to self!\n");
|
Don't infininitely thread branches when a threaded edge
goes back to the block, e.g.:
Threading edge through bool from 'bb37.us.thread3829' to 'bb37.us' with cost: 1, across block:
bb37.us: ; preds = %bb37.us.thread3829, %bb37.us, %bb33
%D1361.1.us = phi i32 [ %tmp36, %bb33 ], [ %D1361.1.us, %bb37.us ], [ 0, %bb37.us.thread3829 ] ; <i32> [#uses=2]
%tmp39.us = icmp eq i32 %D1361.1.us, 0 ; <i1> [#uses=1]
br i1 %tmp39.us, label %bb37.us, label %bb42.us
llvm-svn: 50251
2008-04-25 06:12:29 +02:00
|
|
|
return false;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-05-04 04:28:08 +02:00
|
|
|
// If threading this would thread across a loop header, don't thread the edge.
|
|
|
|
// See the comments above FindLoopHeaders for justifications and caveats.
|
|
|
|
if (LoopHeaders.count(BB)) {
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
|
2009-07-26 09:49:05 +02:00
|
|
|
<< "' to dest BB '" << SuccBB->getName()
|
|
|
|
<< "' - it might create an irreducible loop!\n");
|
2009-05-04 04:28:08 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-02-17 05:21:14 +01:00
|
|
|
unsigned JumpThreadCost =
|
|
|
|
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
|
2014-09-24 06:59:06 +02:00
|
|
|
if (JumpThreadCost > BBDupThreshold) {
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
|
2009-10-11 09:24:57 +02:00
|
|
|
<< "' - Cost is too high: " << JumpThreadCost << "\n");
|
|
|
|
return false;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-09-16 15:27:30 +02:00
|
|
|
// And finally, do it! Start by factoring the predecessors if needed.
|
2009-11-07 09:05:03 +01:00
|
|
|
BasicBlock *PredBB;
|
|
|
|
if (PredBBs.size() == 1)
|
|
|
|
PredBB = PredBBs[0];
|
|
|
|
else {
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
|
2009-11-07 09:05:03 +01:00
|
|
|
<< " common predecessors.\n");
|
2015-10-15 16:59:40 +02:00
|
|
|
PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
|
2009-11-07 09:05:03 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
Start doing the significantly useful part of jump threading: handle cases
where a comparison has a phi input and that phi is a constant. For example,
stuff like:
Threading edge through bool from 'bb2149' to 'bb2231' with cost: 1, across block:
bb2237: ; preds = %bb2231, %bb2149
%tmp2328.rle = phi i32 [ %tmp2232, %bb2231 ], [ %tmp2232439, %bb2149 ] ; <i32> [#uses=2]
%done.0 = phi i32 [ %done.2, %bb2231 ], [ 0, %bb2149 ] ; <i32> [#uses=1]
%tmp2239 = icmp eq i32 %done.0, 0 ; <i1> [#uses=1]
br i1 %tmp2239, label %bb2231, label %bb2327
or
bb38.i298: ; preds = %bb33.i295, %bb1693
%tmp39.i296.rle = phi %struct.ibox* [ null, %bb1693 ], [ %tmp39.i296.rle1109, %bb33.i295 ] ; <%struct.ibox*> [#uses=2]
%minspan.1.i291.reg2mem.1 = phi i32 [ 32000, %bb1693 ], [ %minspan.0.i288, %bb33.i295 ] ; <i32> [#uses=1]
%tmp40.i297 = icmp eq %struct.ibox* %tmp39.i296.rle, null ; <i1> [#uses=1]
br i1 %tmp40.i297, label %implfeeds.exit311, label %bb43.i301
This triggers thousands of times in spec.
llvm-svn: 50110
2008-04-22 23:40:39 +02:00
|
|
|
// And finally, do it!
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
|
2009-07-26 11:48:23 +02:00
|
|
|
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
|
2009-07-26 09:49:05 +02:00
|
|
|
<< ", across block:\n "
|
|
|
|
<< *BB << "\n");
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-09-14 22:57:41 +02:00
|
|
|
LVI->threadEdge(PredBB, BB, SuccBB);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-04-21 00:39:42 +02:00
|
|
|
// We are going to have to map operands from the original BB block to the new
|
|
|
|
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
|
|
|
|
// account for entry from PredBB.
|
|
|
|
DenseMap<Instruction*, Value*> ValueMapping;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
|
|
|
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
|
|
|
|
BB->getName()+".thread",
|
2009-08-13 23:58:54 +02:00
|
|
|
BB->getParent(), BB);
|
2008-04-21 00:39:42 +02:00
|
|
|
NewBB->moveAfter(PredBB);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-10-15 16:59:40 +02:00
|
|
|
// Set the block frequency of NewBB.
|
|
|
|
if (HasProfileData) {
|
|
|
|
auto NewBBFreq =
|
|
|
|
BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
|
|
|
|
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
|
|
|
|
}
|
|
|
|
|
2008-04-21 00:39:42 +02:00
|
|
|
BasicBlock::iterator BI = BB->begin();
|
|
|
|
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
|
|
|
|
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-04-21 00:39:42 +02:00
|
|
|
// Clone the non-phi instructions of BB into NewBB, keeping track of the
|
|
|
|
// mapping and using it to remap operands in the cloned instructions.
|
|
|
|
for (; !isa<TerminatorInst>(BI); ++BI) {
|
2009-09-27 09:38:41 +02:00
|
|
|
Instruction *New = BI->clone();
|
2009-07-26 11:48:23 +02:00
|
|
|
New->setName(BI->getName());
|
2008-04-21 00:39:42 +02:00
|
|
|
NewBB->getInstList().push_back(New);
|
2015-10-13 20:26:00 +02:00
|
|
|
ValueMapping[&*BI] = New;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-04-21 00:39:42 +02:00
|
|
|
// Remap operands to patch up intra-block references.
|
|
|
|
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
|
2009-07-02 02:17:47 +02:00
|
|
|
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
|
|
|
|
DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
|
|
|
|
if (I != ValueMapping.end())
|
|
|
|
New->setOperand(i, I->second);
|
|
|
|
}
|
2008-04-21 00:39:42 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-04-21 00:39:42 +02:00
|
|
|
// We didn't copy the terminator from BB over to NewBB, because there is now
|
|
|
|
// an unconditional jump to SuccBB. Insert the unconditional jump.
|
2015-10-15 16:59:40 +02:00
|
|
|
BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
|
2011-05-05 00:48:19 +02:00
|
|
|
NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-04-21 00:39:42 +02:00
|
|
|
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
|
|
|
|
// PHI nodes for NewBB now.
|
2009-10-11 09:24:57 +02:00
|
|
|
AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-10 11:05:58 +02:00
|
|
|
// If there were values defined in BB that are used outside the block, then we
|
|
|
|
// now have to update all uses of the value to use either the original value,
|
|
|
|
// the cloned value, or some PHI derived value. This can require arbitrary
|
|
|
|
// PHI insertion, of which we are prepared to do, clean these up now.
|
|
|
|
SSAUpdater SSAUpdate;
|
|
|
|
SmallVector<Use*, 16> UsesToRename;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (Instruction &I : *BB) {
|
2009-10-10 11:05:58 +02:00
|
|
|
// Scan all uses of this instruction to see if it is used outside of its
|
|
|
|
// block, and if so, record them in UsesToRename.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (Use &U : I.uses()) {
|
2014-03-09 04:16:01 +01:00
|
|
|
Instruction *User = cast<Instruction>(U.getUser());
|
2009-10-10 11:05:58 +02:00
|
|
|
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
|
2014-03-09 04:16:01 +01:00
|
|
|
if (UserPN->getIncomingBlock(U) == BB)
|
2009-10-10 11:05:58 +02:00
|
|
|
continue;
|
|
|
|
} else if (User->getParent() == BB)
|
|
|
|
continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-03-09 04:16:01 +01:00
|
|
|
UsesToRename.push_back(&U);
|
2009-10-10 11:05:58 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-10 11:05:58 +02:00
|
|
|
// If there are no uses outside the block, we're done with this instruction.
|
|
|
|
if (UsesToRename.empty())
|
|
|
|
continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-01-09 19:43:01 +01:00
|
|
|
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
|
2009-10-10 11:05:58 +02:00
|
|
|
|
|
|
|
// We found a use of I outside of BB. Rename all uses of I that are outside
|
|
|
|
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
|
|
|
|
// with the two values we know.
|
2016-01-09 19:43:01 +01:00
|
|
|
SSAUpdate.Initialize(I.getType(), I.getName());
|
|
|
|
SSAUpdate.AddAvailableValue(BB, &I);
|
|
|
|
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-10 11:05:58 +02:00
|
|
|
while (!UsesToRename.empty())
|
|
|
|
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << "\n");
|
2009-10-10 11:05:58 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
|
|
|
|
2008-12-01 05:48:07 +01:00
|
|
|
// Ok, NewBB is good to go. Update the terminator of PredBB to jump to
|
2008-04-21 00:39:42 +02:00
|
|
|
// NewBB instead of BB. This eliminates predecessors from BB, which requires
|
|
|
|
// us to simplify any PHI nodes in BB.
|
|
|
|
TerminatorInst *PredTerm = PredBB->getTerminator();
|
|
|
|
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
|
|
|
|
if (PredTerm->getSuccessor(i) == BB) {
|
2010-09-29 22:34:41 +02:00
|
|
|
BB->removePredecessor(PredBB, true);
|
2008-04-21 00:39:42 +02:00
|
|
|
PredTerm->setSuccessor(i, NewBB);
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2008-12-01 05:48:07 +01:00
|
|
|
// At this point, the IR is fully up to date and consistent. Do a quick scan
|
|
|
|
// over the new instructions and zap any that are constants or dead. This
|
|
|
|
// frequently happens because of phi translation.
|
2015-03-10 03:37:25 +01:00
|
|
|
SimplifyInstructionsInBlock(NewBB, TLI);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-10-15 16:59:40 +02:00
|
|
|
// Update the edge weight from BB to SuccBB, which should be less than before.
|
|
|
|
UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
|
|
|
|
|
2009-05-04 04:28:08 +02:00
|
|
|
// Threaded an edge!
|
|
|
|
++NumThreads;
|
|
|
|
return true;
|
2008-04-20 23:13:06 +02:00
|
|
|
}
|
2009-10-11 09:24:57 +02:00
|
|
|
|
2015-10-15 16:59:40 +02:00
|
|
|
/// Create a new basic block that will be the predecessor of BB and successor of
|
2016-11-20 14:19:49 +01:00
|
|
|
/// all blocks in Preds. When profile data is available, update the frequency of
|
2015-10-15 16:59:40 +02:00
|
|
|
/// this new block.
|
2016-06-14 02:51:09 +02:00
|
|
|
BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
|
|
|
|
ArrayRef<BasicBlock *> Preds,
|
|
|
|
const char *Suffix) {
|
2015-10-15 16:59:40 +02:00
|
|
|
// Collect the frequencies of all predecessors of BB, which will be used to
|
|
|
|
// update the edge weight on BB->SuccBB.
|
|
|
|
BlockFrequency PredBBFreq(0);
|
|
|
|
if (HasProfileData)
|
|
|
|
for (auto Pred : Preds)
|
|
|
|
PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
|
|
|
|
|
|
|
|
BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
|
|
|
|
|
|
|
|
// Set the block frequency of the newly created PredBB, which is the sum of
|
|
|
|
// frequencies of Preds.
|
|
|
|
if (HasProfileData)
|
|
|
|
BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
|
|
|
|
return PredBB;
|
|
|
|
}
|
|
|
|
|
2016-09-06 18:08:33 +02:00
|
|
|
bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
|
|
|
|
const TerminatorInst *TI = BB->getTerminator();
|
|
|
|
assert(TI->getNumSuccessors() > 1 && "not a split");
|
|
|
|
|
|
|
|
MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
|
|
|
|
if (!WeightsNode)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
|
|
|
|
if (MDName->getString() != "branch_weights")
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Ensure there are weights for all of the successors. Note that the first
|
|
|
|
// operand to the metadata node is a name, not a weight.
|
|
|
|
return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
|
|
|
|
}
|
|
|
|
|
2015-10-15 16:59:40 +02:00
|
|
|
/// Update the block frequency of BB and branch weight and the metadata on the
|
|
|
|
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
|
|
|
|
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
|
2016-06-14 02:51:09 +02:00
|
|
|
void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
|
|
|
|
BasicBlock *BB,
|
|
|
|
BasicBlock *NewBB,
|
|
|
|
BasicBlock *SuccBB) {
|
2015-10-15 16:59:40 +02:00
|
|
|
if (!HasProfileData)
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(BFI && BPI && "BFI & BPI should have been created here");
|
|
|
|
|
|
|
|
// As the edge from PredBB to BB is deleted, we have to update the block
|
|
|
|
// frequency of BB.
|
|
|
|
auto BBOrigFreq = BFI->getBlockFreq(BB);
|
|
|
|
auto NewBBFreq = BFI->getBlockFreq(NewBB);
|
|
|
|
auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
|
|
|
|
auto BBNewFreq = BBOrigFreq - NewBBFreq;
|
|
|
|
BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
|
|
|
|
|
|
|
|
// Collect updated outgoing edges' frequencies from BB and use them to update
|
2015-12-22 19:56:14 +01:00
|
|
|
// edge probabilities.
|
2015-10-15 16:59:40 +02:00
|
|
|
SmallVector<uint64_t, 4> BBSuccFreq;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (BasicBlock *Succ : successors(BB)) {
|
|
|
|
auto SuccFreq = (Succ == SuccBB)
|
2015-10-15 16:59:40 +02:00
|
|
|
? BB2SuccBBFreq - NewBBFreq
|
2016-01-09 19:43:01 +01:00
|
|
|
: BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
|
2015-10-15 16:59:40 +02:00
|
|
|
BBSuccFreq.push_back(SuccFreq.getFrequency());
|
|
|
|
}
|
|
|
|
|
2015-12-22 19:56:14 +01:00
|
|
|
uint64_t MaxBBSuccFreq =
|
|
|
|
*std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
|
2015-10-15 16:59:40 +02:00
|
|
|
|
2015-12-23 00:45:55 +01:00
|
|
|
SmallVector<BranchProbability, 4> BBSuccProbs;
|
|
|
|
if (MaxBBSuccFreq == 0)
|
|
|
|
BBSuccProbs.assign(BBSuccFreq.size(),
|
|
|
|
{1, static_cast<unsigned>(BBSuccFreq.size())});
|
|
|
|
else {
|
|
|
|
for (uint64_t Freq : BBSuccFreq)
|
|
|
|
BBSuccProbs.push_back(
|
|
|
|
BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
|
|
|
|
// Normalize edge probabilities so that they sum up to one.
|
|
|
|
BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
|
|
|
|
BBSuccProbs.end());
|
|
|
|
}
|
2015-10-15 16:59:40 +02:00
|
|
|
|
2015-12-22 19:56:14 +01:00
|
|
|
// Update edge probabilities in BPI.
|
|
|
|
for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
|
|
|
|
BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
|
|
|
|
|
2016-09-06 18:08:33 +02:00
|
|
|
// Update the profile metadata as well.
|
|
|
|
//
|
|
|
|
// Don't do this if the profile of the transformed blocks was statically
|
|
|
|
// estimated. (This could occur despite the function having an entry
|
|
|
|
// frequency in completely cold parts of the CFG.)
|
|
|
|
//
|
|
|
|
// In this case we don't want to suggest to subsequent passes that the
|
|
|
|
// calculated weights are fully consistent. Consider this graph:
|
|
|
|
//
|
|
|
|
// check_1
|
|
|
|
// 50% / |
|
|
|
|
// eq_1 | 50%
|
|
|
|
// \ |
|
|
|
|
// check_2
|
|
|
|
// 50% / |
|
|
|
|
// eq_2 | 50%
|
|
|
|
// \ |
|
|
|
|
// check_3
|
|
|
|
// 50% / |
|
|
|
|
// eq_3 | 50%
|
|
|
|
// \ |
|
|
|
|
//
|
|
|
|
// Assuming the blocks check_* all compare the same value against 1, 2 and 3,
|
|
|
|
// the overall probabilities are inconsistent; the total probability that the
|
|
|
|
// value is either 1, 2 or 3 is 150%.
|
|
|
|
//
|
|
|
|
// As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
|
|
|
|
// becomes 0%. This is even worse if the edge whose probability becomes 0% is
|
|
|
|
// the loop exit edge. Then based solely on static estimation we would assume
|
|
|
|
// the loop was extremely hot.
|
|
|
|
//
|
|
|
|
// FIXME this locally as well so that BPI and BFI are consistent as well. We
|
|
|
|
// shouldn't make edges extremely likely or unlikely based solely on static
|
|
|
|
// estimation.
|
|
|
|
if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
|
2015-12-22 19:56:14 +01:00
|
|
|
SmallVector<uint32_t, 4> Weights;
|
|
|
|
for (auto Prob : BBSuccProbs)
|
|
|
|
Weights.push_back(Prob.getNumerator());
|
2015-10-15 16:59:40 +02:00
|
|
|
|
|
|
|
auto TI = BB->getTerminator();
|
|
|
|
TI->setMetadata(
|
|
|
|
LLVMContext::MD_prof,
|
|
|
|
MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
|
|
|
|
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
|
|
|
|
/// If we can duplicate the contents of BB up into PredBB do so now, this
|
|
|
|
/// improves the odds that the branch will be on an analyzable instruction like
|
|
|
|
/// a compare.
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
|
|
|
|
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
|
2010-01-12 03:07:17 +01:00
|
|
|
assert(!PredBBs.empty() && "Can't handle an empty set");
|
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// If BB is a loop header, then duplicating this block outside the loop would
|
|
|
|
// cause us to transform this into an irreducible loop, don't do this.
|
|
|
|
// See the comments above FindLoopHeaders for justifications and caveats.
|
|
|
|
if (LoopHeaders.count(BB)) {
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
|
2010-01-12 03:07:17 +01:00
|
|
|
<< "' into predecessor block '" << PredBBs[0]->getName()
|
2009-10-11 09:24:57 +02:00
|
|
|
<< "' - it might create an irreducible loop!\n");
|
|
|
|
return false;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2017-02-17 05:21:14 +01:00
|
|
|
unsigned DuplicationCost =
|
|
|
|
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
|
2014-09-24 06:59:06 +02:00
|
|
|
if (DuplicationCost > BBDupThreshold) {
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
|
2009-10-11 09:24:57 +02:00
|
|
|
<< "' - Cost is too high: " << DuplicationCost << "\n");
|
|
|
|
return false;
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2015-09-16 15:27:30 +02:00
|
|
|
// And finally, do it! Start by factoring the predecessors if needed.
|
2010-01-12 03:07:17 +01:00
|
|
|
BasicBlock *PredBB;
|
|
|
|
if (PredBBs.size() == 1)
|
|
|
|
PredBB = PredBBs[0];
|
|
|
|
else {
|
|
|
|
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
|
|
|
|
<< " common predecessors.\n");
|
2015-10-15 16:59:40 +02:00
|
|
|
PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
|
2010-01-12 03:07:17 +01:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Okay, we decided to do this! Clone all the instructions in BB onto the end
|
|
|
|
// of PredBB.
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
|
2009-10-11 09:24:57 +02:00
|
|
|
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
|
|
|
|
<< DuplicationCost << " block is:" << *BB << "\n");
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2010-01-12 03:07:17 +01:00
|
|
|
// Unless PredBB ends with an unconditional branch, split the edge so that we
|
|
|
|
// can just clone the bits from BB into the end of the new PredBB.
|
2010-01-23 20:21:31 +01:00
|
|
|
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-04-25 07:29:35 +02:00
|
|
|
if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
|
2015-01-19 13:36:53 +01:00
|
|
|
PredBB = SplitEdge(PredBB, BB);
|
2010-01-12 03:07:17 +01:00
|
|
|
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
|
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// We are going to have to map operands from the original BB block into the
|
|
|
|
// PredBB block. Evaluate PHI nodes in BB.
|
|
|
|
DenseMap<Instruction*, Value*> ValueMapping;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
BasicBlock::iterator BI = BB->begin();
|
|
|
|
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
|
|
|
|
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
|
|
|
|
// Clone the non-phi instructions of BB into PredBB, keeping track of the
|
|
|
|
// mapping and using it to remap operands in the cloned instructions.
|
|
|
|
for (; BI != BB->end(); ++BI) {
|
|
|
|
Instruction *New = BI->clone();
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Remap operands to patch up intra-block references.
|
|
|
|
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
|
|
|
|
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
|
|
|
|
DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
|
|
|
|
if (I != ValueMapping.end())
|
|
|
|
New->setOperand(i, I->second);
|
|
|
|
}
|
2010-01-12 21:41:47 +01:00
|
|
|
|
|
|
|
// If this instruction can be simplified after the operands are updated,
|
|
|
|
// just use the simplified value instead. This frequently happens due to
|
|
|
|
// phi translation.
|
2017-04-28 21:55:38 +02:00
|
|
|
if (Value *IV = SimplifyInstruction(
|
|
|
|
New,
|
|
|
|
{BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
|
2015-10-13 20:26:00 +02:00
|
|
|
ValueMapping[&*BI] = IV;
|
2016-06-25 02:04:10 +02:00
|
|
|
if (!New->mayHaveSideEffects()) {
|
[IR] De-virtualize ~Value to save a vptr
Summary:
Implements PR889
Removing the virtual table pointer from Value saves 1% of RSS when doing
LTO of llc on Linux. The impact on time was positive, but too noisy to
conclusively say that performance improved. Here is a link to the
spreadsheet with the original data:
https://docs.google.com/spreadsheets/d/1F4FHir0qYnV0MEp2sYYp_BuvnJgWlWPhWOwZ6LbW7W4/edit?usp=sharing
This change makes it invalid to directly delete a Value, User, or
Instruction pointer. Instead, such code can be rewritten to a null check
and a call Value::deleteValue(). Value objects tend to have their
lifetimes managed through iplist, so for the most part, this isn't a big
deal. However, there are some places where LLVM deletes values, and
those places had to be migrated to deleteValue. I have also created
llvm::unique_value, which has a custom deleter, so it can be used in
place of std::unique_ptr<Value>.
I had to add the "DerivedUser" Deleter escape hatch for MemorySSA, which
derives from User outside of lib/IR. Code in IR cannot include MemorySSA
headers or call the MemoryAccess object destructors without introducing
a circular dependency, so we need some level of indirection.
Unfortunately, no class derived from User may have any virtual methods,
because adding a virtual method would break User::getHungOffOperands(),
which assumes that it can find the use list immediately prior to the
User object. I've added a static_assert to the appropriate OperandTraits
templates to help people avoid this trap.
Reviewers: chandlerc, mehdi_amini, pete, dberlin, george.burgess.iv
Reviewed By: chandlerc
Subscribers: krytarowski, eraman, george.burgess.iv, mzolotukhin, Prazek, nlewycky, hans, inglorion, pcc, tejohnson, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D31261
llvm-svn: 303362
2017-05-18 19:24:10 +02:00
|
|
|
New->deleteValue();
|
2016-06-25 02:04:10 +02:00
|
|
|
New = nullptr;
|
|
|
|
}
|
2010-01-12 21:41:47 +01:00
|
|
|
} else {
|
2016-06-25 02:04:10 +02:00
|
|
|
ValueMapping[&*BI] = New;
|
|
|
|
}
|
|
|
|
if (New) {
|
2010-01-12 21:41:47 +01:00
|
|
|
// Otherwise, insert the new instruction into the block.
|
|
|
|
New->setName(BI->getName());
|
2015-10-13 20:26:00 +02:00
|
|
|
PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
|
2010-01-12 21:41:47 +01:00
|
|
|
}
|
2009-10-11 09:24:57 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Check to see if the targets of the branch had PHI nodes. If so, we need to
|
|
|
|
// add entries to the PHI nodes for branch from PredBB now.
|
|
|
|
BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
|
|
|
|
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
|
|
|
|
ValueMapping);
|
|
|
|
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
|
|
|
|
ValueMapping);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// If there were values defined in BB that are used outside the block, then we
|
|
|
|
// now have to update all uses of the value to use either the original value,
|
|
|
|
// the cloned value, or some PHI derived value. This can require arbitrary
|
|
|
|
// PHI insertion, of which we are prepared to do, clean these up now.
|
|
|
|
SSAUpdater SSAUpdate;
|
|
|
|
SmallVector<Use*, 16> UsesToRename;
|
2016-01-09 19:43:01 +01:00
|
|
|
for (Instruction &I : *BB) {
|
2009-10-11 09:24:57 +02:00
|
|
|
// Scan all uses of this instruction to see if it is used outside of its
|
|
|
|
// block, and if so, record them in UsesToRename.
|
2016-01-09 19:43:01 +01:00
|
|
|
for (Use &U : I.uses()) {
|
2014-03-09 04:16:01 +01:00
|
|
|
Instruction *User = cast<Instruction>(U.getUser());
|
2009-10-11 09:24:57 +02:00
|
|
|
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
|
2014-03-09 04:16:01 +01:00
|
|
|
if (UserPN->getIncomingBlock(U) == BB)
|
2009-10-11 09:24:57 +02:00
|
|
|
continue;
|
|
|
|
} else if (User->getParent() == BB)
|
|
|
|
continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2014-03-09 04:16:01 +01:00
|
|
|
UsesToRename.push_back(&U);
|
2009-10-11 09:24:57 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// If there are no uses outside the block, we're done with this instruction.
|
|
|
|
if (UsesToRename.empty())
|
|
|
|
continue;
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2016-01-09 19:43:01 +01:00
|
|
|
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// We found a use of I outside of BB. Rename all uses of I that are outside
|
|
|
|
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
|
|
|
|
// with the two values we know.
|
2016-01-09 19:43:01 +01:00
|
|
|
SSAUpdate.Initialize(I.getType(), I.getName());
|
|
|
|
SSAUpdate.AddAvailableValue(BB, &I);
|
|
|
|
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
while (!UsesToRename.empty())
|
|
|
|
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
|
2010-01-05 02:27:19 +01:00
|
|
|
DEBUG(dbgs() << "\n");
|
2009-10-11 09:24:57 +02:00
|
|
|
}
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
|
|
|
|
// that we nuked.
|
2010-09-29 22:34:41 +02:00
|
|
|
BB->removePredecessor(PredBB, true);
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
// Remove the unconditional branch at the end of the PredBB block.
|
|
|
|
OldPredBranch->eraseFromParent();
|
2010-12-05 20:02:47 +01:00
|
|
|
|
2009-10-11 09:24:57 +02:00
|
|
|
++NumDupes;
|
|
|
|
return true;
|
|
|
|
}
|
JumpThreading: Turn a select instruction into branching if it allows to thread one half of the select.
This is a common pattern coming out of simplifycfg generating gross code.
a: ; preds = %entry
%sel = select i1 %cmp1, double %add, double 0.000000e+00
br label %b
b:
%cond5 = phi double [ %sel, %a ], [ %sub, %entry ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
becomes
a:
br i1 %cmp1, label %b, label %if.then
b:
%cond5 = phi double [ %sub, %entry ], [ %add, %a ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
Skipping block b completely if possible.
llvm-svn: 187880
2013-08-07 12:29:38 +02:00
|
|
|
|
|
|
|
/// TryToUnfoldSelect - Look for blocks of the form
|
|
|
|
/// bb1:
|
|
|
|
/// %a = select
|
2017-02-26 20:08:44 +01:00
|
|
|
/// br bb2
|
JumpThreading: Turn a select instruction into branching if it allows to thread one half of the select.
This is a common pattern coming out of simplifycfg generating gross code.
a: ; preds = %entry
%sel = select i1 %cmp1, double %add, double 0.000000e+00
br label %b
b:
%cond5 = phi double [ %sel, %a ], [ %sub, %entry ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
becomes
a:
br i1 %cmp1, label %b, label %if.then
b:
%cond5 = phi double [ %sub, %entry ], [ %add, %a ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
Skipping block b completely if possible.
llvm-svn: 187880
2013-08-07 12:29:38 +02:00
|
|
|
///
|
|
|
|
/// bb2:
|
2017-02-26 20:08:44 +01:00
|
|
|
/// %p = phi [%a, %bb1] ...
|
JumpThreading: Turn a select instruction into branching if it allows to thread one half of the select.
This is a common pattern coming out of simplifycfg generating gross code.
a: ; preds = %entry
%sel = select i1 %cmp1, double %add, double 0.000000e+00
br label %b
b:
%cond5 = phi double [ %sel, %a ], [ %sub, %entry ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
becomes
a:
br i1 %cmp1, label %b, label %if.then
b:
%cond5 = phi double [ %sub, %entry ], [ %add, %a ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
Skipping block b completely if possible.
llvm-svn: 187880
2013-08-07 12:29:38 +02:00
|
|
|
/// %c = icmp %p
|
|
|
|
/// br i1 %c
|
|
|
|
///
|
|
|
|
/// And expand the select into a branch structure if one of its arms allows %c
|
|
|
|
/// to be folded. This later enables threading from bb1 over bb2.
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
|
JumpThreading: Turn a select instruction into branching if it allows to thread one half of the select.
This is a common pattern coming out of simplifycfg generating gross code.
a: ; preds = %entry
%sel = select i1 %cmp1, double %add, double 0.000000e+00
br label %b
b:
%cond5 = phi double [ %sel, %a ], [ %sub, %entry ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
becomes
a:
br i1 %cmp1, label %b, label %if.then
b:
%cond5 = phi double [ %sub, %entry ], [ %add, %a ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
Skipping block b completely if possible.
llvm-svn: 187880
2013-08-07 12:29:38 +02:00
|
|
|
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
|
|
|
|
PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
|
|
|
|
Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
|
|
|
|
|
|
|
|
if (!CondBr || !CondBr->isConditional() || !CondLHS ||
|
|
|
|
CondLHS->getParent() != BB)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
|
|
|
|
BasicBlock *Pred = CondLHS->getIncomingBlock(I);
|
|
|
|
SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
|
|
|
|
|
|
|
|
// Look if one of the incoming values is a select in the corresponding
|
|
|
|
// predecessor.
|
|
|
|
if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
|
|
|
|
if (!PredTerm || !PredTerm->isUnconditional())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Now check if one of the select values would allow us to constant fold the
|
|
|
|
// terminator in BB. We don't do the transform if both sides fold, those
|
|
|
|
// cases will be threaded in any case.
|
|
|
|
LazyValueInfo::Tristate LHSFolds =
|
|
|
|
LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
|
2014-09-07 22:29:59 +02:00
|
|
|
CondRHS, Pred, BB, CondCmp);
|
JumpThreading: Turn a select instruction into branching if it allows to thread one half of the select.
This is a common pattern coming out of simplifycfg generating gross code.
a: ; preds = %entry
%sel = select i1 %cmp1, double %add, double 0.000000e+00
br label %b
b:
%cond5 = phi double [ %sel, %a ], [ %sub, %entry ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
becomes
a:
br i1 %cmp1, label %b, label %if.then
b:
%cond5 = phi double [ %sub, %entry ], [ %add, %a ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
Skipping block b completely if possible.
llvm-svn: 187880
2013-08-07 12:29:38 +02:00
|
|
|
LazyValueInfo::Tristate RHSFolds =
|
|
|
|
LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
|
2014-09-07 22:29:59 +02:00
|
|
|
CondRHS, Pred, BB, CondCmp);
|
JumpThreading: Turn a select instruction into branching if it allows to thread one half of the select.
This is a common pattern coming out of simplifycfg generating gross code.
a: ; preds = %entry
%sel = select i1 %cmp1, double %add, double 0.000000e+00
br label %b
b:
%cond5 = phi double [ %sel, %a ], [ %sub, %entry ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
becomes
a:
br i1 %cmp1, label %b, label %if.then
b:
%cond5 = phi double [ %sub, %entry ], [ %add, %a ]
%cmp6 = fcmp oeq double %cond5, 0.000000e+00
br i1 %cmp6, label %if.then, label %if.end
Skipping block b completely if possible.
llvm-svn: 187880
2013-08-07 12:29:38 +02:00
|
|
|
if ((LHSFolds != LazyValueInfo::Unknown ||
|
|
|
|
RHSFolds != LazyValueInfo::Unknown) &&
|
|
|
|
LHSFolds != RHSFolds) {
|
|
|
|
// Expand the select.
|
|
|
|
//
|
|
|
|
// Pred --
|
|
|
|
// | v
|
|
|
|
// | NewBB
|
|
|
|
// | |
|
|
|
|
// |-----
|
|
|
|
// v
|
|
|
|
// BB
|
|
|
|
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
|
|
|
|
BB->getParent(), BB);
|
|
|
|
// Move the unconditional branch to NewBB.
|
|
|
|
PredTerm->removeFromParent();
|
|
|
|
NewBB->getInstList().insert(NewBB->end(), PredTerm);
|
|
|
|
// Create a conditional branch and update PHI nodes.
|
|
|
|
BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
|
|
|
|
CondLHS->setIncomingValue(I, SI->getFalseValue());
|
|
|
|
CondLHS->addIncoming(SI->getTrueValue(), NewBB);
|
|
|
|
// The select is now dead.
|
|
|
|
SI->eraseFromParent();
|
|
|
|
|
|
|
|
// Update any other PHI nodes in BB.
|
|
|
|
for (BasicBlock::iterator BI = BB->begin();
|
|
|
|
PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
|
|
|
|
if (Phi != CondLHS)
|
|
|
|
Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
|
|
|
|
/// same BB in the form
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
/// bb:
|
|
|
|
/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
/// %s = select %p, trueval, falseval
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
///
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
/// or
|
|
|
|
///
|
|
|
|
/// bb:
|
|
|
|
/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
|
|
|
|
/// %c = cmp %p, 0
|
|
|
|
/// %s = select %c, trueval, falseval
|
|
|
|
//
|
2016-11-15 16:42:23 +01:00
|
|
|
/// And expand the select into a branch structure. This later enables
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
/// jump-threading over bb in this pass.
|
|
|
|
///
|
2016-11-15 16:42:23 +01:00
|
|
|
/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
|
|
|
|
/// select if the associated PHI has at least one constant. If the unfolded
|
|
|
|
/// select is not jump-threaded, it will be folded again in the later
|
|
|
|
/// optimizations.
|
2016-06-14 02:51:09 +02:00
|
|
|
bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
// If threading this would thread across a loop header, don't thread the edge.
|
|
|
|
// See the comments above FindLoopHeaders for justifications and caveats.
|
|
|
|
if (LoopHeaders.count(BB))
|
|
|
|
return false;
|
|
|
|
|
2016-11-15 16:42:23 +01:00
|
|
|
for (BasicBlock::iterator BI = BB->begin();
|
|
|
|
PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
// Look for a Phi having at least one constant incoming value.
|
|
|
|
if (llvm::all_of(PN->incoming_values(),
|
|
|
|
[](Value *V) { return !isa<ConstantInt>(V); }))
|
2016-11-15 16:42:23 +01:00
|
|
|
continue;
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
|
|
|
|
// Check if SI is in BB and use V as condition.
|
|
|
|
if (SI->getParent() != BB)
|
2016-11-15 16:42:23 +01:00
|
|
|
return false;
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
Value *Cond = SI->getCondition();
|
|
|
|
return (Cond && Cond == V && Cond->getType()->isIntegerTy(1));
|
|
|
|
};
|
2016-11-15 16:42:17 +01:00
|
|
|
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
SelectInst *SI = nullptr;
|
|
|
|
for (Use &U : PN->uses()) {
|
|
|
|
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
|
|
|
|
// Look for a ICmp in BB that compares PN with a constant and is the
|
|
|
|
// condition of a Select.
|
|
|
|
if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
|
|
|
|
isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
|
|
|
|
if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
|
|
|
|
if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
|
|
|
|
SI = SelectI;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
|
|
|
|
// Look for a Select in BB that uses PN as condtion.
|
|
|
|
if (isUnfoldCandidate(SelectI, U.get())) {
|
|
|
|
SI = SelectI;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
}
|
[JumpThreading] Add a pattern to TryToUnfoldSelectInCurrBB()
Add the following pattern to TryToUnfoldSelectInCurrBB()
bb:
%p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
%c = cmp %p, 0
%s = select %c, trueval, falseval
The Select in the above pattern will be unfolded and then jump-threaded. The
current implementation does not allow CMP in the middle of PHI and Select.
Differential Revision: https://reviews.llvm.org/D34762
llvm-svn: 308050
2017-07-14 21:16:47 +02:00
|
|
|
|
|
|
|
if (!SI)
|
|
|
|
continue;
|
|
|
|
// Expand the select.
|
|
|
|
TerminatorInst *Term =
|
|
|
|
SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
|
|
|
|
PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
|
|
|
|
NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
|
|
|
|
NewPN->addIncoming(SI->getFalseValue(), BB);
|
|
|
|
SI->replaceAllUsesWith(NewPN);
|
|
|
|
SI->eraseFromParent();
|
|
|
|
return true;
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
}
|
2016-11-15 16:42:23 +01:00
|
|
|
return false;
|
[JumpThreading] Split select that has constant conditions coming from the PHI node
Look for PHI/Select in the same BB of the form
bb:
%p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
%s = select p, trueval, falseval
And expand the select into a branch structure. This later enables
jump-threading over bb in this pass.
Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
select if the associated PHI has at least one constant. If the unfolded
select is not jump-threaded, it will be folded again in the later
optimizations.
llvm-svn: 257198
2016-01-08 20:39:39 +01:00
|
|
|
}
|
2017-02-17 05:21:14 +01:00
|
|
|
|
|
|
|
/// Try to propagate a guard from the current BB into one of its predecessors
|
|
|
|
/// in case if another branch of execution implies that the condition of this
|
|
|
|
/// guard is always true. Currently we only process the simplest case that
|
|
|
|
/// looks like:
|
|
|
|
///
|
|
|
|
/// Start:
|
|
|
|
/// %cond = ...
|
|
|
|
/// br i1 %cond, label %T1, label %F1
|
|
|
|
/// T1:
|
|
|
|
/// br label %Merge
|
|
|
|
/// F1:
|
|
|
|
/// br label %Merge
|
|
|
|
/// Merge:
|
|
|
|
/// %condGuard = ...
|
|
|
|
/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
|
|
|
|
///
|
|
|
|
/// And cond either implies condGuard or !condGuard. In this case all the
|
|
|
|
/// instructions before the guard can be duplicated in both branches, and the
|
|
|
|
/// guard is then threaded to one of them.
|
|
|
|
bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
|
|
|
|
using namespace PatternMatch;
|
|
|
|
// We only want to deal with two predecessors.
|
|
|
|
BasicBlock *Pred1, *Pred2;
|
|
|
|
auto PI = pred_begin(BB), PE = pred_end(BB);
|
|
|
|
if (PI == PE)
|
|
|
|
return false;
|
|
|
|
Pred1 = *PI++;
|
|
|
|
if (PI == PE)
|
|
|
|
return false;
|
|
|
|
Pred2 = *PI++;
|
|
|
|
if (PI != PE)
|
|
|
|
return false;
|
|
|
|
if (Pred1 == Pred2)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Try to thread one of the guards of the block.
|
|
|
|
// TODO: Look up deeper than to immediate predecessor?
|
|
|
|
auto *Parent = Pred1->getSinglePredecessor();
|
|
|
|
if (!Parent || Parent != Pred2->getSinglePredecessor())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
|
|
|
|
for (auto &I : *BB)
|
|
|
|
if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
|
|
|
|
if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Try to propagate the guard from BB which is the lower block of a diamond
|
|
|
|
/// to one of its branches, in case if diamond's condition implies guard's
|
|
|
|
/// condition.
|
|
|
|
bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
|
|
|
|
BranchInst *BI) {
|
|
|
|
assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
|
|
|
|
assert(BI->isConditional() && "Unconditional branch has 2 successors?");
|
|
|
|
Value *GuardCond = Guard->getArgOperand(0);
|
|
|
|
Value *BranchCond = BI->getCondition();
|
|
|
|
BasicBlock *TrueDest = BI->getSuccessor(0);
|
|
|
|
BasicBlock *FalseDest = BI->getSuccessor(1);
|
|
|
|
|
|
|
|
auto &DL = BB->getModule()->getDataLayout();
|
|
|
|
bool TrueDestIsSafe = false;
|
|
|
|
bool FalseDestIsSafe = false;
|
|
|
|
|
|
|
|
// True dest is safe if BranchCond => GuardCond.
|
|
|
|
auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
|
|
|
|
if (Impl && *Impl)
|
|
|
|
TrueDestIsSafe = true;
|
|
|
|
else {
|
|
|
|
// False dest is safe if !BranchCond => GuardCond.
|
|
|
|
Impl =
|
|
|
|
isImpliedCondition(BranchCond, GuardCond, DL, /* InvertAPred */ true);
|
|
|
|
if (Impl && *Impl)
|
|
|
|
FalseDestIsSafe = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!TrueDestIsSafe && !FalseDestIsSafe)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
|
|
|
|
BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
|
|
|
|
|
|
|
|
ValueToValueMapTy UnguardedMapping, GuardedMapping;
|
|
|
|
Instruction *AfterGuard = Guard->getNextNode();
|
|
|
|
unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
|
|
|
|
if (Cost > BBDupThreshold)
|
|
|
|
return false;
|
|
|
|
// Duplicate all instructions before the guard and the guard itself to the
|
|
|
|
// branch where implication is not proved.
|
|
|
|
GuardedBlock = DuplicateInstructionsInSplitBetween(
|
|
|
|
BB, GuardedBlock, AfterGuard, GuardedMapping);
|
|
|
|
assert(GuardedBlock && "Could not create the guarded block?");
|
|
|
|
// Duplicate all instructions before the guard in the unguarded branch.
|
|
|
|
// Since we have successfully duplicated the guarded block and this block
|
|
|
|
// has fewer instructions, we expect it to succeed.
|
|
|
|
UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock,
|
|
|
|
Guard, UnguardedMapping);
|
|
|
|
assert(UnguardedBlock && "Could not create the unguarded block?");
|
|
|
|
DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
|
|
|
|
<< GuardedBlock->getName() << "\n");
|
|
|
|
|
|
|
|
// Some instructions before the guard may still have uses. For them, we need
|
|
|
|
// to create Phi nodes merging their copies in both guarded and unguarded
|
|
|
|
// branches. Those instructions that have no uses can be just removed.
|
|
|
|
SmallVector<Instruction *, 4> ToRemove;
|
|
|
|
for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
|
|
|
|
if (!isa<PHINode>(&*BI))
|
|
|
|
ToRemove.push_back(&*BI);
|
|
|
|
|
|
|
|
Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
|
|
|
|
assert(InsertionPoint && "Empty block?");
|
|
|
|
// Substitute with Phis & remove.
|
|
|
|
for (auto *Inst : reverse(ToRemove)) {
|
|
|
|
if (!Inst->use_empty()) {
|
|
|
|
PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
|
|
|
|
NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
|
|
|
|
NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
|
|
|
|
NewPN->insertBefore(InsertionPoint);
|
|
|
|
Inst->replaceAllUsesWith(NewPN);
|
|
|
|
}
|
|
|
|
Inst->eraseFromParent();
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|