mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
CodeGen: Allow small copyable blocks to "break" the CFG.
When choosing the best successor for a block, ordinarily we would have preferred a block that preserves the CFG unless there is a strong probability the other direction. For small blocks that can be duplicated we now skip that requirement as well, subject to some simple frequency calculations. Differential Revision: https://reviews.llvm.org/D28583 llvm-svn: 293716
This commit is contained in:
parent
4c57348a28
commit
9386601a22
@ -500,6 +500,11 @@ BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
|
||||
|
||||
void BranchFolder::MBFIWrapper::view(bool isSimple) { MBFI.view(isSimple); }
|
||||
|
||||
uint64_t
|
||||
BranchFolder::MBFIWrapper::getEntryFreq() const {
|
||||
return MBFI.getEntryFreq();
|
||||
}
|
||||
|
||||
/// CountTerminators - Count the number of terminators in the given
|
||||
/// block and set I to the position of the first non-terminator, if there
|
||||
/// is one, or MBB->end() otherwise.
|
||||
|
@ -123,6 +123,7 @@ namespace llvm {
|
||||
raw_ostream &printBlockFreq(raw_ostream &OS,
|
||||
const BlockFrequency Freq) const;
|
||||
void view(bool isSimple = true);
|
||||
uint64_t getEntryFreq() const;
|
||||
|
||||
private:
|
||||
const MachineBlockFrequencyInfo &MBFI;
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
||||
#include "llvm/CodeGen/TailDuplicator.h"
|
||||
#include "llvm/Support/Allocator.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
@ -50,6 +51,8 @@
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "block-placement"
|
||||
@ -137,13 +140,23 @@ BranchFoldPlacement("branch-fold-placement",
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
// Heuristic for tail duplication.
|
||||
static cl::opt<unsigned> TailDuplicatePlacementThreshold(
|
||||
static cl::opt<unsigned> TailDupPlacementThreshold(
|
||||
"tail-dup-placement-threshold",
|
||||
cl::desc("Instruction cutoff for tail duplication during layout. "
|
||||
"Tail merging during layout is forced to have a threshold "
|
||||
"that won't conflict."), cl::init(2),
|
||||
cl::Hidden);
|
||||
|
||||
// Heuristic for tail duplication.
|
||||
static cl::opt<unsigned> TailDupPlacementPenalty(
|
||||
"tail-dup-placement-penalty",
|
||||
cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. "
|
||||
"Copying can increase fallthrough, but it also increases icache "
|
||||
"pressure. This parameter controls the penalty to account for that. "
|
||||
"Percent as integer."),
|
||||
cl::init(2),
|
||||
cl::Hidden);
|
||||
|
||||
extern cl::opt<unsigned> StaticLikelyProb;
|
||||
extern cl::opt<unsigned> ProfileLikelyProb;
|
||||
|
||||
@ -272,6 +285,12 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||
/// \brief A typedef for a block filter set.
|
||||
typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
|
||||
|
||||
/// Pair struct containing basic block and taildup profitiability
|
||||
struct BlockAndTailDupResult {
|
||||
MachineBasicBlock * BB;
|
||||
bool ShouldTailDup;
|
||||
};
|
||||
|
||||
/// \brief work lists of blocks that are ready to be laid out
|
||||
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
|
||||
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
|
||||
@ -299,9 +318,12 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||
/// \brief A handle to the target's lowering info.
|
||||
const TargetLoweringBase *TLI;
|
||||
|
||||
/// \brief A handle to the post dominator tree.
|
||||
/// \brief A handle to the dominator tree.
|
||||
MachineDominatorTree *MDT;
|
||||
|
||||
/// \brief A handle to the post dominator tree.
|
||||
MachinePostDominatorTree *MPDT;
|
||||
|
||||
/// \brief Duplicator used to duplicate tails during placement.
|
||||
///
|
||||
/// Placement decisions can open up new tail duplication opportunities, but
|
||||
@ -374,9 +396,9 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||
BlockChain &SuccChain, BranchProbability SuccProb,
|
||||
BranchProbability RealSuccProb, BlockChain &Chain,
|
||||
const BlockFilterSet *BlockFilter);
|
||||
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
|
||||
BlockChain &Chain,
|
||||
const BlockFilterSet *BlockFilter);
|
||||
BlockAndTailDupResult selectBestSuccessor(MachineBasicBlock *BB,
|
||||
BlockChain &Chain,
|
||||
const BlockFilterSet *BlockFilter);
|
||||
MachineBasicBlock *
|
||||
selectBestCandidateBlock(BlockChain &Chain,
|
||||
SmallVectorImpl<MachineBasicBlock *> &WorkList);
|
||||
@ -409,6 +431,18 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||
void buildCFGChains();
|
||||
void optimizeBranches();
|
||||
void alignBlocks();
|
||||
bool shouldTailDuplicate(MachineBasicBlock *BB);
|
||||
/// Check the edge frequencies to see if tail duplication will increase
|
||||
/// fallthroughs.
|
||||
bool isProfitableToTailDup(
|
||||
MachineBasicBlock *BB, MachineBasicBlock *Succ,
|
||||
BranchProbability AdjustedSumProb,
|
||||
BlockChain &Chain, const BlockFilterSet *BlockFilter);
|
||||
/// Returns true if a block can tail duplicate into all unplaced
|
||||
/// predecessors. Filters based on loop.
|
||||
bool canTailDuplicateUnplacedPreds(
|
||||
MachineBasicBlock *BB, MachineBasicBlock *Succ,
|
||||
BlockChain &Chain, const BlockFilterSet *BlockFilter);
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
@ -422,6 +456,8 @@ public:
|
||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
if (TailDupPlacement)
|
||||
AU.addRequired<MachinePostDominatorTree>();
|
||||
AU.addRequired<MachineLoopInfo>();
|
||||
AU.addRequired<TargetPassConfig>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
@ -436,6 +472,7 @@ INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
||||
INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
|
||||
"Branch Probability Basic Block Placement", false, false)
|
||||
@ -567,6 +604,201 @@ getAdjustedProbability(BranchProbability OrigProb,
|
||||
return SuccProb;
|
||||
}
|
||||
|
||||
/// Check if a block should be tail duplicated.
|
||||
/// \p BB Block to check.
|
||||
bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
|
||||
// Blocks with single successors don't create additional fallthrough
|
||||
// opportunities. Don't duplicate them. TODO: When conditional exits are
|
||||
// analyzable, allow them to be duplicated.
|
||||
bool IsSimple = TailDup.isSimpleBB(BB);
|
||||
|
||||
if (BB->succ_size() == 1)
|
||||
return false;
|
||||
return TailDup.shouldTailDuplicate(IsSimple, *BB);
|
||||
}
|
||||
|
||||
/// Compare 2 BlockFrequency's with a small penalty for \p A.
|
||||
/// In order to be conservative, we apply a X% penalty to account for
|
||||
/// increased icache pressure and static heuristics. For small frequencies
|
||||
/// we use only the numerators to improve accuracy. For simplicity, we assume the
|
||||
/// penalty is less than 100%
|
||||
/// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
|
||||
static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
|
||||
uint64_t EntryFreq) {
|
||||
BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
|
||||
BlockFrequency Gain = A - B;
|
||||
return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
|
||||
}
|
||||
|
||||
/// Check the edge frequencies to see if tail duplication will increase
|
||||
/// fallthroughs. It only makes sense to call this function when
|
||||
/// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is
|
||||
/// always locally profitable if we would have picked \p Succ without
|
||||
/// considering duplication.
|
||||
bool MachineBlockPlacement::isProfitableToTailDup(
|
||||
MachineBasicBlock *BB, MachineBasicBlock *Succ,
|
||||
BranchProbability QProb,
|
||||
BlockChain &Chain, const BlockFilterSet *BlockFilter) {
|
||||
// We need to do a probability calculation to make sure this is profitable.
|
||||
// First: does succ have a successor that post-dominates? This affects the
|
||||
// calculation. The 2 relevant cases are:
|
||||
// BB BB
|
||||
// | \Qout | \Qout
|
||||
// P| C |P C
|
||||
// = C' = C'
|
||||
// | /Qin | /Qin
|
||||
// | / | /
|
||||
// Succ Succ
|
||||
// / \ | \ V
|
||||
// U/ =V |U \
|
||||
// / \ = D
|
||||
// D E | /
|
||||
// | /
|
||||
// |/
|
||||
// PDom
|
||||
// '=' : Branch taken for that CFG edge
|
||||
// In the second case, Placing Succ while duplicating it into C prevents the
|
||||
// fallthrough of Succ into either D or PDom, because they now have C as an
|
||||
// unplaced predecessor
|
||||
|
||||
// Start by figuring out which case we fall into
|
||||
MachineBasicBlock *PDom = nullptr;
|
||||
SmallVector<MachineBasicBlock *, 4> SuccSuccs;
|
||||
// Only scan the relevant successors
|
||||
auto AdjustedSuccSumProb =
|
||||
collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
|
||||
BranchProbability PProb = MBPI->getEdgeProbability(BB, Succ);
|
||||
auto BBFreq = MBFI->getBlockFreq(BB);
|
||||
auto SuccFreq = MBFI->getBlockFreq(Succ);
|
||||
BlockFrequency P = BBFreq * PProb;
|
||||
BlockFrequency Qout = BBFreq * QProb;
|
||||
uint64_t EntryFreq = MBFI->getEntryFreq();
|
||||
// If there are no more successors, it is profitable to copy, as it strictly
|
||||
// increases fallthrough.
|
||||
if (SuccSuccs.size() == 0)
|
||||
return greaterWithBias(P, Qout, EntryFreq);
|
||||
|
||||
auto BestSuccSucc = BranchProbability::getZero();
|
||||
// Find the PDom or the best Succ if no PDom exists.
|
||||
for (MachineBasicBlock *SuccSucc : SuccSuccs) {
|
||||
auto Prob = MBPI->getEdgeProbability(Succ, SuccSucc);
|
||||
if (Prob > BestSuccSucc)
|
||||
BestSuccSucc = Prob;
|
||||
if (PDom == nullptr)
|
||||
if (MPDT->dominates(SuccSucc, Succ)) {
|
||||
PDom = SuccSucc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// For the comparisons, we need to know Succ's best incoming edge that isn't
|
||||
// from BB.
|
||||
auto SuccBestPred = BlockFrequency(0);
|
||||
for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
|
||||
if (SuccPred == Succ || SuccPred == BB
|
||||
|| BlockToChain[SuccPred] == &Chain
|
||||
|| (BlockFilter && !BlockFilter->count(SuccPred)))
|
||||
continue;
|
||||
auto Freq = MBFI->getBlockFreq(SuccPred)
|
||||
* MBPI->getEdgeProbability(SuccPred, Succ);
|
||||
if (Freq > SuccBestPred)
|
||||
SuccBestPred = Freq;
|
||||
}
|
||||
// Qin is Succ's best unplaced incoming edge that isn't BB
|
||||
BlockFrequency Qin = SuccBestPred;
|
||||
// If it doesn't have a post-dominating successor, here is the calculation:
|
||||
// BB BB
|
||||
// | \Qout | \
|
||||
// P| C | =
|
||||
// = C' | C
|
||||
// | /Qin | |
|
||||
// | / | C' (+Succ)
|
||||
// Succ Succ /|
|
||||
// / \ | \/ |
|
||||
// U/ =V = /= =
|
||||
// / \ | / \|
|
||||
// D E D E
|
||||
// '=' : Branch taken for that CFG edge
|
||||
// Cost in the first case is: P + V
|
||||
// For this calculation, we always assume P > Qout. If Qout > P
|
||||
// The result of this function will be ignored at the caller.
|
||||
// Cost in the second case is: Qout + Qin * V + P * U + P * V
|
||||
// TODO(iteratee): If we lay out D after Succ, the P * U term
|
||||
// goes away. This logic is coming in D28522.
|
||||
|
||||
if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
|
||||
BranchProbability UProb = BestSuccSucc;
|
||||
BranchProbability VProb = AdjustedSuccSumProb - UProb;
|
||||
BlockFrequency V = SuccFreq * VProb;
|
||||
BlockFrequency QinV = Qin * VProb;
|
||||
BlockFrequency BaseCost = P + V;
|
||||
BlockFrequency DupCost = Qout + QinV + P * AdjustedSuccSumProb;
|
||||
return greaterWithBias(BaseCost, DupCost, EntryFreq);
|
||||
}
|
||||
BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
|
||||
BranchProbability VProb = AdjustedSuccSumProb - UProb;
|
||||
BlockFrequency U = SuccFreq * UProb;
|
||||
BlockFrequency V = SuccFreq * VProb;
|
||||
// If there is a post-dominating successor, here is the calculation:
|
||||
// BB BB BB BB
|
||||
// | \Qout | \ | \Qout | \
|
||||
// |P C | = |P C | =
|
||||
// = C' |P C = C' |P C
|
||||
// | /Qin | | | /Qin | |
|
||||
// | / | C' (+Succ) | / | C' (+Succ)
|
||||
// Succ Succ /| Succ Succ /|
|
||||
// | \ V | \/ | | \ V | \/ |
|
||||
// |U \ |U /\ | |U = |U /\ |
|
||||
// = D = = =| | D | = =|
|
||||
// | / |/ D | / |/ D
|
||||
// | / | / | = | /
|
||||
// |/ | / |/ | =
|
||||
// Dom Dom Dom Dom
|
||||
// '=' : Branch taken for that CFG edge
|
||||
// The cost for taken branches in the first case is P + U
|
||||
// The cost in the second case (assuming independence), given the layout:
|
||||
// BB, Succ, (C+Succ), D, Dom
|
||||
// is Qout + P * V + Qin * U
|
||||
// compare P + U vs Qout + P + Qin * U.
|
||||
//
|
||||
// The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
|
||||
//
|
||||
// For the 3rd case, the cost is P + 2 * V
|
||||
// For the 4th case, the cost is Qout + Qin * U + P * V + V
|
||||
// We choose 4 over 3 when (P + V) > Qout + Qin * U + P * V
|
||||
if (UProb > AdjustedSuccSumProb / 2
|
||||
&& !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom],
|
||||
UProb, UProb, Chain, BlockFilter)) {
|
||||
// Cases 3 & 4
|
||||
return greaterWithBias((P + V), (Qout + Qin * UProb + P * VProb),
|
||||
EntryFreq);
|
||||
}
|
||||
// Cases 1 & 2
|
||||
return greaterWithBias(
|
||||
(P + U), (Qout + Qin * UProb + P * AdjustedSuccSumProb), EntryFreq);
|
||||
}
|
||||
|
||||
|
||||
/// When the option TailDupPlacement is on, this method checks if the
|
||||
/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
|
||||
/// into all of its unplaced, unfiltered predecessors, that are not BB.
|
||||
bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
|
||||
MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
|
||||
const BlockFilterSet *BlockFilter) {
|
||||
if (!shouldTailDuplicate(Succ))
|
||||
return false;
|
||||
|
||||
for (MachineBasicBlock *Pred : Succ->predecessors()) {
|
||||
// Make sure all unplaced and unfiltered predecessors can be
|
||||
// tail-duplicated into.
|
||||
if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
|
||||
|| BlockToChain[Pred] == &Chain)
|
||||
continue;
|
||||
if (!TailDup.canTailDuplicate(Succ, Pred))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// When the option OutlineOptionalBranches is on, this method
|
||||
/// checks if the fallthrough candidate block \p Succ (of block
|
||||
/// \p BB) also has other unscheduled predecessor blocks which
|
||||
@ -615,11 +847,11 @@ static BranchProbability getLayoutSuccessorProbThreshold(
|
||||
if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
|
||||
/* See case 1 below for the cost analysis. For BB->Succ to
|
||||
* be taken with smaller cost, the following needs to hold:
|
||||
* Prob(BB->Succ) > 2* Prob(BB->Pred)
|
||||
* So the threshold T
|
||||
* T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
|
||||
* We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
|
||||
* branch bias, we have
|
||||
* Prob(BB->Succ) > 2 * Prob(BB->Pred)
|
||||
* So the threshold T in the calculation below
|
||||
* (1-T) * Prob(BB->Succ) > T * Prob(BB->Pred)
|
||||
* So T / (1 - T) = 2, Yielding T = 2/3
|
||||
* Also adding user specified branch bias, we have
|
||||
* T = (2/3)*(ProfileLikelyProb/50)
|
||||
* = (2*ProfileLikelyProb)/150)
|
||||
*/
|
||||
@ -631,6 +863,12 @@ static BranchProbability getLayoutSuccessorProbThreshold(
|
||||
|
||||
/// Checks to see if the layout candidate block \p Succ has a better layout
|
||||
/// predecessor than \c BB. If yes, returns true.
|
||||
/// \p SuccProb: The probability adjusted for only remaining blocks.
|
||||
/// Only used for logging
|
||||
/// \p RealSuccProb: The un-adjusted probability.
|
||||
/// \p Chain: The chain that BB belongs to and Succ is being considered for.
|
||||
/// \p BlockFilter: if non-null, the set of blocks that make up the loop being
|
||||
/// considered
|
||||
bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||
MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
|
||||
BranchProbability SuccProb, BranchProbability RealSuccProb,
|
||||
@ -762,13 +1000,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||
for (MachineBasicBlock *Pred : Succ->predecessors()) {
|
||||
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
|
||||
(BlockFilter && !BlockFilter->count(Pred)) ||
|
||||
BlockToChain[Pred] == &Chain)
|
||||
BlockToChain[Pred] == &Chain ||
|
||||
// This check is redundant except for look ahead. This function is
|
||||
// called for lookahead by isProfitableToTailDup when BB hasn't been
|
||||
// placed yet.
|
||||
(Pred == BB))
|
||||
continue;
|
||||
// Do backward checking.
|
||||
// For all cases above, we need a backward checking to filter out edges that
|
||||
// are not 'strongly' biased. With profile data available, the check is
|
||||
// mostly redundant for case 2 (when threshold prob is set at 50%) unless S
|
||||
// has more than two successors.
|
||||
// are not 'strongly' biased.
|
||||
// BB Pred
|
||||
// \ /
|
||||
// Succ
|
||||
@ -804,14 +1044,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
|
||||
/// breaking CFG structure, but cave and break such structures in the case of
|
||||
/// very hot successor edges.
|
||||
///
|
||||
/// \returns The best successor block found, or null if none are viable.
|
||||
MachineBasicBlock *
|
||||
/// \returns The best successor block found, or null if none are viable, along
|
||||
/// with a boolean indicating if tail duplication is necessary.
|
||||
MachineBlockPlacement::BlockAndTailDupResult
|
||||
MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
|
||||
BlockChain &Chain,
|
||||
const BlockFilterSet *BlockFilter) {
|
||||
const BranchProbability HotProb(StaticLikelyProb, 100);
|
||||
|
||||
MachineBasicBlock *BestSucc = nullptr;
|
||||
BlockAndTailDupResult BestSucc = { nullptr, false };
|
||||
auto BestProb = BranchProbability::getZero();
|
||||
|
||||
SmallVector<MachineBasicBlock *, 4> Successors;
|
||||
@ -819,6 +1060,12 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
|
||||
collectViableSuccessors(BB, Chain, BlockFilter, Successors);
|
||||
|
||||
DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
|
||||
|
||||
// For blocks with CFG violations, we may be able to lay them out anyway with
|
||||
// tail-duplication. We keep this vector so we can perform the probability
|
||||
// calculations the minimum number of times.
|
||||
SmallVector<std::tuple<BranchProbability, MachineBasicBlock *>, 4>
|
||||
DupCandidates;
|
||||
for (MachineBasicBlock *Succ : Successors) {
|
||||
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
|
||||
BranchProbability SuccProb =
|
||||
@ -826,15 +1073,21 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
|
||||
|
||||
// This heuristic is off by default.
|
||||
if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
|
||||
HotProb))
|
||||
return Succ;
|
||||
HotProb)) {
|
||||
BestSucc.BB = Succ;
|
||||
return BestSucc;
|
||||
}
|
||||
|
||||
BlockChain &SuccChain = *BlockToChain[Succ];
|
||||
// Skip the edge \c BB->Succ if block \c Succ has a better layout
|
||||
// predecessor that yields lower global cost.
|
||||
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
|
||||
Chain, BlockFilter))
|
||||
Chain, BlockFilter)) {
|
||||
// If tail duplication would make Succ profitable, place it.
|
||||
if (TailDupPlacement && shouldTailDuplicate(Succ))
|
||||
DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(
|
||||
dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
|
||||
@ -842,17 +1095,52 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
|
||||
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
|
||||
<< "\n");
|
||||
|
||||
if (BestSucc && BestProb >= SuccProb) {
|
||||
if (BestSucc.BB && BestProb >= SuccProb) {
|
||||
DEBUG(dbgs() << " Not the best candidate, continuing\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << " Setting it as best candidate\n");
|
||||
BestSucc = Succ;
|
||||
BestSucc.BB = Succ;
|
||||
BestProb = SuccProb;
|
||||
}
|
||||
if (BestSucc)
|
||||
DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n");
|
||||
// Handle the tail duplication candidates in order of decreasing probability.
|
||||
// Stop at the first one that is profitable. Also stop if they are less
|
||||
// profitable than BestSucc. Position is important because we preserve it and
|
||||
// prefer first best match. Here we aren't comparing in order, so we capture
|
||||
// the position instead.
|
||||
if (DupCandidates.size() != 0) {
|
||||
auto cmp =
|
||||
[](const std::tuple<BranchProbability, MachineBasicBlock *> &a,
|
||||
const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
|
||||
return std::get<0>(a) > std::get<0>(b);
|
||||
};
|
||||
std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp);
|
||||
}
|
||||
for(auto &Tup : DupCandidates) {
|
||||
BranchProbability DupProb;
|
||||
MachineBasicBlock *Succ;
|
||||
std::tie(DupProb, Succ) = Tup;
|
||||
if (DupProb < BestProb)
|
||||
break;
|
||||
if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
|
||||
// If tail duplication gives us fallthrough when we otherwise wouldn't
|
||||
// have it, that is a strict gain.
|
||||
&& (BestSucc.BB == nullptr
|
||||
|| isProfitableToTailDup(BB, Succ, BestProb, Chain,
|
||||
BlockFilter))) {
|
||||
DEBUG(
|
||||
dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
|
||||
<< DupProb
|
||||
<< " (Tail Duplicate)\n");
|
||||
BestSucc.BB = Succ;
|
||||
BestSucc.ShouldTailDup = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (BestSucc.BB)
|
||||
DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n");
|
||||
|
||||
return BestSucc;
|
||||
}
|
||||
@ -1001,7 +1289,11 @@ void MachineBlockPlacement::buildChain(
|
||||
|
||||
// Look for the best viable successor if there is one to place immediately
|
||||
// after this block.
|
||||
MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
|
||||
auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
|
||||
MachineBasicBlock* BestSucc = Result.BB;
|
||||
bool ShouldTailDup = Result.ShouldTailDup;
|
||||
if (TailDupPlacement)
|
||||
ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
|
||||
|
||||
// If an immediate successor isn't available, look for the best viable
|
||||
// block among those we've identified as not violating the loop's CFG at
|
||||
@ -1022,7 +1314,7 @@ void MachineBlockPlacement::buildChain(
|
||||
|
||||
// Placement may have changed tail duplication opportunities.
|
||||
// Check for that now.
|
||||
if (TailDupPlacement && BestSucc) {
|
||||
if (TailDupPlacement && BestSucc && ShouldTailDup) {
|
||||
// If the chosen successor was duplicated into all its predecessors,
|
||||
// don't bother laying it out, just go round the loop again with BB as
|
||||
// the chain end.
|
||||
@ -1914,13 +2206,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
|
||||
DuplicatedToLPred = false;
|
||||
DEBUG(dbgs() << "Redoing tail duplication for Succ#"
|
||||
<< BB->getNumber() << "\n");
|
||||
bool IsSimple = TailDup.isSimpleBB(BB);
|
||||
// Blocks with single successors don't create additional fallthrough
|
||||
// opportunities. Don't duplicate them. TODO: When conditional exits are
|
||||
// analyzable, allow them to be duplicated.
|
||||
if (!IsSimple && BB->succ_size() == 1)
|
||||
return false;
|
||||
if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
|
||||
|
||||
if (!shouldTailDuplicate(BB))
|
||||
return false;
|
||||
// This has to be a callback because none of it can be done after
|
||||
// BB is deleted.
|
||||
@ -1973,6 +2260,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
|
||||
llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
|
||||
|
||||
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
|
||||
bool IsSimple = TailDup.isSimpleBB(BB);
|
||||
TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
|
||||
&DuplicatedPreds, &RemovalCallbackRef);
|
||||
|
||||
@ -2013,13 +2301,15 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = MF.getSubtarget().getInstrInfo();
|
||||
TLI = MF.getSubtarget().getTargetLowering();
|
||||
MDT = &getAnalysis<MachineDominatorTree>();
|
||||
MPDT = nullptr;
|
||||
|
||||
// Initialize PreferredLoopExit to nullptr here since it may never be set if
|
||||
// there are no MachineLoops.
|
||||
PreferredLoopExit = nullptr;
|
||||
|
||||
if (TailDupPlacement) {
|
||||
unsigned TailDupSize = TailDuplicatePlacementThreshold;
|
||||
MPDT = &getAnalysis<MachinePostDominatorTree>();
|
||||
unsigned TailDupSize = TailDupPlacementThreshold;
|
||||
if (MF.getFunction()->optForSize())
|
||||
TailDupSize = 1;
|
||||
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
|
||||
@ -2038,7 +2328,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
BranchFoldPlacement;
|
||||
// No tail merging opportunities if the block number is less than four.
|
||||
if (MF.size() > 3 && EnableTailMerge) {
|
||||
unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
|
||||
unsigned TailMergeSize = TailDupPlacementThreshold + 1;
|
||||
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
|
||||
*MBPI, TailMergeSize);
|
||||
|
||||
@ -2049,6 +2339,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
BlockToChain.clear();
|
||||
// Must redo the dominator tree if blocks were changed.
|
||||
MDT->runOnMachineFunction(MF);
|
||||
if (MPDT)
|
||||
MPDT->runOnMachineFunction(MF);
|
||||
ChainAllocator.DestroyAll();
|
||||
buildCFGChains();
|
||||
}
|
||||
|
@ -9,10 +9,10 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
; CHECK-NEXT: ret
|
||||
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
|
||||
%val = extractvalue { i32, i1 } %pair, 0
|
||||
ret i32 %val
|
||||
@ -27,10 +27,12 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: mov x0, x[[ADDR]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
; CHECK-NEXT: mov x0, x[[ADDR]]
|
||||
; CHECK-NEXT: ret
|
||||
%new = load i32, i32* %pnew
|
||||
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
|
||||
%val = extractvalue { i32, i1 } %pair, 0
|
||||
@ -41,15 +43,15 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
|
||||
; CHECK-LABEL: val_compare_and_swap_rel:
|
||||
; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
|
||||
; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]
|
||||
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[RESULT]], w1
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]
|
||||
; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
; CHECK-NEXT: ret
|
||||
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
|
||||
%val = extractvalue { i32, i1 } %pair, 0
|
||||
ret i32 %val
|
||||
@ -64,10 +66,10 @@ define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
; CHECK-NEXT: ret
|
||||
%pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
|
||||
%val = extractvalue { i64, i1 } %pair, 0
|
||||
ret i64 %val
|
||||
|
@ -346,19 +346,15 @@ entry:
|
||||
; CHECK-NEXT: sub w1, w1, #1
|
||||
; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
|
||||
; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
|
||||
; DISABLE-NEXT: b [[IFEND_LABEL]]
|
||||
;
|
||||
; DISABLE: [[ELSE_LABEL]]: ; %if.else
|
||||
; DISABLE: lsl w0, w1, #1
|
||||
;
|
||||
; CHECK: [[IFEND_LABEL]]:
|
||||
; CHECK-NEXT: [[IFEND_LABEL]]:
|
||||
; Epilogue code.
|
||||
; CHECK: add sp, sp, #16
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; ENABLE: [[ELSE_LABEL]]: ; %if.else
|
||||
; ENABLE-NEXT: lsl w0, w1, #1
|
||||
; ENABLE_NEXT: ret
|
||||
; CHECK: [[ELSE_LABEL]]: ; %if.else
|
||||
; CHECK-NEXT: lsl w0, w1, #1
|
||||
; DISABLE-NEXT: add sp, sp, #16
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
|
||||
entry:
|
||||
%ap = alloca i8*, align 8
|
||||
|
@ -1,69 +0,0 @@
|
||||
; RUN: llc -O3 -o - -verify-machineinstrs %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
%struct.s1 = type { %struct.s3*, %struct.s1* }
|
||||
%struct.s2 = type opaque
|
||||
%struct.s3 = type { i32 }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define internal fastcc i32 @repeated_dup_worklist(%struct.s1** %pp1, %struct.s2* %p2, i32 %state, i1 %i1_1, i32 %i32_1) unnamed_addr #0 {
|
||||
entry:
|
||||
br label %while.cond.outer
|
||||
|
||||
; The loop gets laid out:
|
||||
; %while.cond.outer
|
||||
; %(null)
|
||||
; %(null)
|
||||
; %dup2
|
||||
; and then %dup1 gets chosen as the next block.
|
||||
; when dup2 is duplicated into dup1, %worklist could erroneously be placed on
|
||||
; the worklist, because all of its current predecessors are now scheduled.
|
||||
; However, after dup2 is tail-duplicated, %worklist can't be on the worklist
|
||||
; because it now has unscheduled predecessors.q
|
||||
; CHECK-LABEL: repeated_dup_worklist
|
||||
; CHECK: // %entry
|
||||
; CHECK: // %while.cond.outer
|
||||
; first %(null) block
|
||||
; CHECK: // in Loop:
|
||||
; CHECK: ldr
|
||||
; CHECK-NEXT: tbnz
|
||||
; second %(null) block
|
||||
; CHECK: // in Loop:
|
||||
; CHECK: // %dup2
|
||||
; CHECK: // %worklist
|
||||
; CHECK: // %if.then96.i
|
||||
while.cond.outer: ; preds = %dup1, %entry
|
||||
%progress.0.ph = phi i32 [ 0, %entry ], [ %progress.1, %dup1 ]
|
||||
%inc77 = add nsw i32 %progress.0.ph, 1
|
||||
%cmp = icmp slt i32 %progress.0.ph, %i32_1
|
||||
br i1 %cmp, label %dup2, label %dup1
|
||||
|
||||
dup2: ; preds = %if.then96.i, %worklist, %while.cond.outer
|
||||
%progress.1.ph = phi i32 [ 0, %while.cond.outer ], [ %progress.1, %if.then96.i ], [ %progress.1, %worklist ]
|
||||
%.pr = load %struct.s1*, %struct.s1** %pp1, align 8
|
||||
br label %dup1
|
||||
|
||||
dup1: ; preds = %dup2, %while.cond.outer
|
||||
%0 = phi %struct.s1* [ %.pr, %dup2 ], [ undef, %while.cond.outer ]
|
||||
%progress.1 = phi i32 [ %progress.1.ph, %dup2 ], [ %inc77, %while.cond.outer ]
|
||||
br i1 %i1_1, label %while.cond.outer, label %worklist
|
||||
|
||||
worklist: ; preds = %dup1
|
||||
%snode94 = getelementptr inbounds %struct.s1, %struct.s1* %0, i64 0, i32 0
|
||||
%1 = load %struct.s3*, %struct.s3** %snode94, align 8
|
||||
%2 = getelementptr inbounds %struct.s3, %struct.s3* %1, i32 0, i32 0
|
||||
%3 = load i32, i32* %2, align 4
|
||||
%tobool95.i = icmp eq i32 %3, 0
|
||||
br i1 %tobool95.i, label %if.then96.i, label %dup2
|
||||
|
||||
if.then96.i: ; preds = %worklist
|
||||
call fastcc void @free_s3(%struct.s2* %p2, %struct.s3* %1) #1
|
||||
br label %dup2
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare fastcc void @free_s3(%struct.s2*, %struct.s3*) unnamed_addr #0
|
||||
|
||||
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind }
|
@ -10,7 +10,7 @@ entry:
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
; CHECK: sub [[CMP:w[0-9]+]], w0, #12
|
||||
; CHECK: tbz [[CMP]], #31
|
||||
; CHECK: tbnz [[CMP]], #31
|
||||
|
||||
if.then:
|
||||
call void @t()
|
||||
@ -28,7 +28,7 @@ entry:
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
; CHECK: sub [[CMP:x[0-9]+]], x0, #12
|
||||
; CHECK: tbz [[CMP]], #63
|
||||
; CHECK: tbnz [[CMP]], #63
|
||||
|
||||
if.then:
|
||||
call void @t()
|
||||
@ -118,7 +118,7 @@ entry:
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
; CHECK: sub [[CMP:w[0-9]+]], w0, #12
|
||||
; CHECK: tbz [[CMP]], #31
|
||||
; CHECK: tbnz [[CMP]], #31
|
||||
|
||||
if.then:
|
||||
call void @t()
|
||||
@ -178,7 +178,7 @@ define void @test9(i64 %val1) {
|
||||
br i1 %tst, label %if.then, label %if.end
|
||||
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: tbz x0, #63
|
||||
; CHECK: tbnz x0, #63
|
||||
|
||||
if.then:
|
||||
call void @t()
|
||||
@ -194,7 +194,7 @@ define void @test10(i64 %val1) {
|
||||
br i1 %tst, label %if.then, label %if.end
|
||||
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: tbz x0, #63
|
||||
; CHECK: tbnz x0, #63
|
||||
|
||||
if.then:
|
||||
call void @t()
|
||||
@ -209,7 +209,7 @@ define void @test11(i64 %val1, i64* %ptr) {
|
||||
|
||||
; CHECK: ldr [[CMP:x[0-9]+]], [x1]
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: tbz [[CMP]], #63
|
||||
; CHECK: tbnz [[CMP]], #63
|
||||
|
||||
%val = load i64, i64* %ptr
|
||||
%tst = icmp slt i64 %val, 0
|
||||
@ -229,7 +229,7 @@ define void @test12(i64 %val1) {
|
||||
br i1 %tst, label %if.then, label %if.end
|
||||
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: tbz x0, #63
|
||||
; CHECK: tbnz x0, #63
|
||||
|
||||
if.then:
|
||||
call void @t()
|
||||
@ -247,7 +247,7 @@ define void @test13(i64 %val1, i64 %val2) {
|
||||
|
||||
; CHECK: orr [[CMP:x[0-9]+]], x0, x1
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: tbz [[CMP]], #63
|
||||
; CHECK: tbnz [[CMP]], #63
|
||||
|
||||
if.then:
|
||||
call void @t()
|
||||
|
@ -335,6 +335,12 @@ loop:
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
|
||||
; GCN-NEXT: [[BB3]]: ; %bb3
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: v_nop_e64
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: ;;#ASMSTART
|
||||
; GCN-NEXT: v_nop_e64
|
||||
; GCN-NEXT: ;;#ASMEND
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @expand_requires_expand(i32 %cond0) #0 {
|
||||
bb0:
|
||||
@ -356,6 +362,12 @@ bb2:
|
||||
br label %bb3
|
||||
|
||||
bb3:
|
||||
; These NOPs prevent tail-duplication-based outlining
|
||||
; from firing, which defeats the need to expand the branches and this test.
|
||||
call void asm sideeffect
|
||||
"v_nop_e64", ""() #0
|
||||
call void asm sideeffect
|
||||
"v_nop_e64", ""() #0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -385,6 +397,7 @@ bb3:
|
||||
|
||||
; GCN-NEXT: [[ENDIF]]: ; %endif
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
|
||||
; GCN-NEXT: s_sleep 5
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 {
|
||||
entry:
|
||||
@ -402,6 +415,9 @@ if_uniform:
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
; layout can remove the split branch if it can copy the return block.
|
||||
; This call makes the return block long enough that it doesn't get copied.
|
||||
call void @llvm.amdgcn.s.sleep(i32 5);
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -252,10 +252,12 @@ ENDIF: ; preds = %IF, %main_body
|
||||
; GCN: s_cmp_lt_i32 [[COND]], 1
|
||||
; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
|
||||
; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}}
|
||||
; GCN: s_cbranch_vccnz [[EXIT]]
|
||||
; GCN: buffer_store
|
||||
; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]]
|
||||
; GCN: {{^}}[[EXIT]]:
|
||||
; GCN: s_endpgm
|
||||
; GCN: {{^}}[[BODY]]:
|
||||
; GCN: buffer_store
|
||||
; GCN: s_endpgm
|
||||
define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
|
||||
bb:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
@ -302,9 +304,10 @@ done:
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
|
||||
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
|
||||
; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0
|
||||
; GCN: s_cbranch_scc1 [[ENDIF_LABEL]]
|
||||
; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]]
|
||||
; GCN: s_endpgm
|
||||
; GCN: {{^}}[[IF_UNIFORM_LABEL]]:
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
|
||||
@ -328,14 +331,13 @@ endif:
|
||||
|
||||
; GCN-LABEL: {{^}}divergent_inside_uniform:
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: [[IF_LABEL]]:
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
|
||||
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
; GCN: [[ENDIF_LABEL]]:
|
||||
; GCN: s_endpgm
|
||||
define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
|
||||
entry:
|
||||
%u_cmp = icmp eq i32 %cond, 0
|
||||
@ -363,11 +365,11 @@ endif:
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
; GCN: s_or_b64 exec, exec, [[MASK]]
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]
|
||||
; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]]
|
||||
; GCN: s_endpgm
|
||||
; GCN: [[IF_UNIFORM]]:
|
||||
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN: buffer_store_dword [[TWO]]
|
||||
; GCN: [[EXIT]]:
|
||||
; GCN: s_endpgm
|
||||
define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
@ -49,9 +49,9 @@ tailrecurse.switch: ; preds = %tailrecurse
|
||||
; V8-NEXT: beq
|
||||
; V8-NEXT: %tailrecurse.switch
|
||||
; V8: cmp
|
||||
; V8-NEXT: bne
|
||||
; V8-NEXT: b
|
||||
; The trailing space in the last line checks that the branch is unconditional
|
||||
; V8-NEXT: beq
|
||||
; V8-NEXT: %sw.epilog
|
||||
; V8-NEXT: bx lr
|
||||
switch i32 %and, label %sw.epilog [
|
||||
i32 1, label %sw.bb
|
||||
i32 3, label %sw.bb6
|
||||
|
@ -320,10 +320,10 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
|
||||
; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
|
||||
; CHECK: cmp [[SUCCESS]], #0
|
||||
; CHECK: bne [[LOOP_BB]]
|
||||
; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: dmb ish
|
||||
; CHECK: bx lr
|
||||
; CHECK: [[FAIL_BB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[END_BB]]:
|
||||
; CHECK: dmb ish
|
||||
; CHECK: bx lr
|
||||
|
||||
|
@ -1045,20 +1045,21 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind
|
||||
; function there.
|
||||
; CHECK-ARM-NEXT: cmp r[[OLD]], r0
|
||||
; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: BB#2:
|
||||
; As above, r1 is a reasonable guess.
|
||||
; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[STATUS]], #0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
|
||||
; CHECK-NEXT: b .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_3:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-ARM: mov r0, r[[OLD]]
|
||||
; CHECK: bx lr
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
; CHECK-ARM: mov r0, r[[OLD]]
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
ret i8 %old
|
||||
}
|
||||
|
||||
@ -1078,20 +1079,21 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw
|
||||
; function there.
|
||||
; CHECK-ARM-NEXT: cmp r[[OLD]], r0
|
||||
; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: BB#2:
|
||||
; As above, r1 is a reasonable guess.
|
||||
; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[STATUS]], #0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
|
||||
; CHECK-NEXT: b .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_3:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-ARM: mov r0, r[[OLD]]
|
||||
; CHECK: bx lr
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
; CHECK-ARM: mov r0, r[[OLD]]
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
ret i16 %old
|
||||
}
|
||||
|
||||
@ -1110,20 +1112,21 @@ define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
|
||||
; r0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp r[[OLD]], r0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: BB#2:
|
||||
; As above, r1 is a reasonable guess.
|
||||
; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[STATUS]], #0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
|
||||
; CHECK-NEXT: b .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_3:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK: str{{(.w)?}} r[[OLD]],
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
; CHECK: str{{(.w)?}} r[[OLD]],
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -1148,16 +1151,16 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
|
||||
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
|
||||
; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
|
||||
; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: BB#2:
|
||||
; As above, r2, r3 is a reasonable guess.
|
||||
; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[STATUS]], #0
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
|
||||
; CHECK-NEXT: b .LBB{{[0-9]+}}_4
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_3:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
|
||||
; CHECK-NEXT: pop
|
||||
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NOT: mcr
|
||||
|
||||
|
@ -13,14 +13,16 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) {
|
||||
; CHECK-NEXT: dmb ish
|
||||
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
|
||||
; CHECK-NEXT: cmp [[SUCCESS]], #0
|
||||
; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: BB#2:
|
||||
; CHECK-NEXT: dmb ish
|
||||
; CHECK-NEXT: str r3, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: [[LDFAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: str r3, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
; CHECK-NEXT: [[SUCCESSBB]]:
|
||||
; CHECK-NEXT: dmb ish
|
||||
; CHECK-NEXT: str r3, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
|
||||
|
@ -7,7 +7,7 @@ define void @test() nounwind {
|
||||
entry:
|
||||
%0 = load i32, i32* @j, align 4
|
||||
%cmp = icmp eq i32 %0, 0
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
br i1 %cmp, label %if.then, label %if.end, !prof !1
|
||||
|
||||
; 16: bnez ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
|
||||
; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
|
||||
@ -21,4 +21,4 @@ if.end: ; preds = %if.then, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
!1 = !{!"branch_weights", i32 2, i32 1}
|
||||
|
@ -6,7 +6,7 @@ entry:
|
||||
%x = alloca i32, align 4
|
||||
%0 = load i32, i32* %x, align 4
|
||||
%cmp = icmp eq i32 %0, 0
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
br i1 %cmp, label %if.then, label %if.end, !prof !1
|
||||
|
||||
if.then:
|
||||
store i32 10, i32* %x, align 4
|
||||
@ -17,3 +17,4 @@ if.end:
|
||||
}
|
||||
|
||||
; CHECK: bnezc
|
||||
!1 = !{!"branch_weights", i32 2, i32 1}
|
||||
|
@ -17,7 +17,7 @@ entry:
|
||||
%sum1 = add i32 %sumin, 1
|
||||
%val1 = load i32, i32* %ptr
|
||||
%p = icmp eq i32 %sumin, 0
|
||||
br i1 %p, label %true, label %end
|
||||
br i1 %p, label %true, label %end, !prof !1
|
||||
true:
|
||||
%sum2 = add i32 %sum1, 1
|
||||
%ptr2 = getelementptr i32, i32* %ptr, i32 1
|
||||
@ -53,3 +53,5 @@ end:
|
||||
ret i32 %valmerge
|
||||
}
|
||||
declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
|
||||
|
||||
!1 = !{!"branch_weights", i32 2, i32 1}
|
||||
|
140
test/CodeGen/PowerPC/tail-dup-break-cfg.ll
Normal file
140
test/CodeGen/PowerPC/tail-dup-break-cfg.ll
Normal file
@ -0,0 +1,140 @@
|
||||
; RUN: llc -O2 -o - %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-grtev4-linux-gnu"
|
||||
|
||||
; Intended layout:
|
||||
; The code for tail-duplication during layout will produce the layout:
|
||||
; test1
|
||||
; test2
|
||||
; body1 (with copy of test2)
|
||||
; body2
|
||||
; exit
|
||||
|
||||
;CHECK-LABEL: tail_dup_break_cfg:
|
||||
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
||||
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
|
||||
;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]]
|
||||
;CHECK-NEXT: # %test2
|
||||
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
||||
;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]]
|
||||
;CHECK-NEXT: b [[BODY2LABEL:[._0-9A-Za-z]+]]
|
||||
;CHECK-NEXT: [[BODY1LABEL]]
|
||||
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
||||
;CHECK-NEXT: beq 0, [[EXITLABEL]]
|
||||
;CHECK-NEXT: [[BODY2LABEL]]
|
||||
;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
|
||||
;CHECK: blr
|
||||
define void @tail_dup_break_cfg(i32 %tag) {
|
||||
entry:
|
||||
br label %test1
|
||||
test1:
|
||||
%tagbit1 = and i32 %tag, 1
|
||||
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
||||
br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely
|
||||
body1:
|
||||
call void @a()
|
||||
call void @a()
|
||||
call void @a()
|
||||
call void @a()
|
||||
br label %test2
|
||||
test2:
|
||||
%tagbit2 = and i32 %tag, 2
|
||||
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
||||
br i1 %tagbit2eq0, label %exit, label %body2, !prof !1 ; %exit more likely
|
||||
body2:
|
||||
call void @b()
|
||||
call void @b()
|
||||
call void @b()
|
||||
call void @b()
|
||||
br label %exit
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The branch weights here hint that we shouldn't tail duplicate in this case.
|
||||
;CHECK-LABEL: tail_dup_dont_break_cfg:
|
||||
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
||||
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
|
||||
;CHECK-NEXT: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]]
|
||||
;CHECK-NEXT: # %body1
|
||||
;CHECK: [[TEST2LABEL]]: # %test2
|
||||
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
||||
;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]]
|
||||
;CHECK-NEXT: # %body2
|
||||
;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
|
||||
;CHECK: blr
|
||||
define void @tail_dup_dont_break_cfg(i32 %tag) {
|
||||
entry:
|
||||
br label %test1
|
||||
test1:
|
||||
%tagbit1 = and i32 %tag, 1
|
||||
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
||||
br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely
|
||||
body1:
|
||||
call void @a()
|
||||
call void @a()
|
||||
call void @a()
|
||||
call void @a()
|
||||
br label %test2
|
||||
test2:
|
||||
%tagbit2 = and i32 %tag, 2
|
||||
%tagbit2eq0 = icmp ne i32 %tagbit2, 0
|
||||
br i1 %tagbit2eq0, label %body2, label %exit, !prof !1 ; %body2 more likely
|
||||
body2:
|
||||
call void @b()
|
||||
call void @b()
|
||||
call void @b()
|
||||
call void @b()
|
||||
br label %exit
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
declare void @a()
|
||||
declare void @b()
|
||||
declare void @c()
|
||||
declare void @d()
|
||||
|
||||
; This function arranges for the successors of %succ to have already been laid
|
||||
; out. When we consider whether to lay out succ after bb and to tail-duplicate
|
||||
; it, v and ret have already been placed, so we tail-duplicate as it removes a
|
||||
; branch and strictly increases fallthrough
|
||||
; CHECK-LABEL: tail_dup_no_succ
|
||||
; CHECK: # %entry
|
||||
; CHECK: # %v
|
||||
; CHECK: # %ret
|
||||
; CHECK: # %bb
|
||||
; CHECK: # %succ
|
||||
; CHECK: # %c
|
||||
; CHECK: bl c
|
||||
; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
|
||||
; CHECK: beq
|
||||
; CHECK: b
|
||||
define void @tail_dup_no_succ(i32 %tag) {
|
||||
entry:
|
||||
%tagbit1 = and i32 %tag, 1
|
||||
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
||||
br i1 %tagbit1eq0, label %v, label %bb, !prof !2 ; %v very much more likely
|
||||
bb:
|
||||
%tagbit2 = and i32 %tag, 2
|
||||
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
||||
br i1 %tagbit2eq0, label %succ, label %c, !prof !3 ; %succ more likely
|
||||
c:
|
||||
call void @c()
|
||||
call void @c()
|
||||
br label %succ
|
||||
succ:
|
||||
%tagbit3 = and i32 %tag, 4
|
||||
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
||||
br i1 %tagbit3eq0, label %ret, label %v, !prof !1 ; %u more likely
|
||||
v:
|
||||
call void @d()
|
||||
call void @d()
|
||||
br label %ret
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
!1 = !{!"branch_weights", i32 5, i32 3}
|
||||
!2 = !{!"branch_weights", i32 95, i32 5}
|
||||
!3 = !{!"branch_weights", i32 7, i32 3}
|
@ -66,14 +66,15 @@ return: ; preds = %if.end, %if.then
|
||||
; CHECK: ba .LBB1_1
|
||||
; CHECK: nop
|
||||
; CHECK:.LBB1_1: ! %entry
|
||||
; CHECK: ba .LBB1_3
|
||||
; CHECK: mov %g0, %i0
|
||||
; CHECK: cmp %i0, 0
|
||||
; CHECK: bne .LBB1_4
|
||||
; CHECK: ba .LBB1_5
|
||||
; CHECK:.LBB1_2: ! Block address taken
|
||||
; CHECK: mov 1, %i0
|
||||
; CHECK:.LBB1_3: ! %entry
|
||||
; CHECK: cmp %i0, 0
|
||||
; CHECK: be .LBB1_5
|
||||
; CHECK: nop
|
||||
; CHECK:.LBB1_4:
|
||||
; CHECK: ba .LBB1_6
|
||||
}
|
||||
declare i8* @llvm.frameaddress(i32) #2
|
||||
|
||||
|
@ -473,8 +473,8 @@ entry:
|
||||
%xor = xor i32 %val, 1
|
||||
%add = add i32 %xor, 1000000
|
||||
call void @foo()
|
||||
%cmp = icmp ne i32 %add, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
%cmp = icmp eq i32 %add, 0
|
||||
br i1 %cmp, label %store, label %exit, !prof !1
|
||||
|
||||
store:
|
||||
store i32 %add, i32 *%ptr
|
||||
@ -888,3 +888,5 @@ store:
|
||||
exit:
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
!1 = !{!"branch_weights", i32 2, i32 1}
|
||||
|
@ -1,11 +1,12 @@
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
|
||||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T
|
||||
|
||||
;
|
||||
; Note: Lots of tests use inline asm instead of regular calls.
|
||||
; This allows to have a better control on what the allocation will do.
|
||||
@ -15,6 +16,8 @@
|
||||
; edges.
|
||||
; Also disable the late if-converter as it makes harder to reason on
|
||||
; the diffs.
|
||||
; Disable tail-duplication during placement, as v4t vs v5t get different
|
||||
; results due to branches not being analyzable under v5
|
||||
|
||||
; Initial motivating example: Simple diamond with a call just on one side.
|
||||
; CHECK-LABEL: foo:
|
||||
|
@ -26,7 +26,7 @@ t:
|
||||
call void @x()
|
||||
call void @x()
|
||||
call void @x()
|
||||
; CHECK: cbnz
|
||||
; CHECK: cbz
|
||||
%q = icmp eq i32 %y, 0
|
||||
br i1 %q, label %t2, label %f
|
||||
|
||||
|
@ -4,7 +4,7 @@ declare void @x()
|
||||
|
||||
define void @f0(i32 %x) optsize {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: cbnz
|
||||
; CHECK: cbz
|
||||
%p = icmp eq i32 %x, 0
|
||||
br i1 %p, label %t, label %f
|
||||
|
||||
|
@ -12,10 +12,11 @@
|
||||
|
||||
define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
|
||||
; CHECK-LABEL: _ZNKSs7compareERKSs:
|
||||
; CHECK: cbnz r0,
|
||||
; CHECK: cbz r0,
|
||||
; CHECK-NEXT: %bb1
|
||||
; CHECK-NEXT: pop.w
|
||||
; CHECK-NEXT: %bb
|
||||
; CHECK-NEXT: sub{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
; CHECK-NEXT: %bb1
|
||||
; CHECK-NEXT: pop.w
|
||||
entry:
|
||||
%0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
|
||||
|
@ -8,8 +8,9 @@ target triple = "wasm32-unknown-unknown"
|
||||
; Basic phi triangle.
|
||||
|
||||
; CHECK-LABEL: test0:
|
||||
; CHECK: div_s $[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}}
|
||||
; CHECK: return $[[NUM0]]{{$}}
|
||||
; CHECK: return $0
|
||||
; CHECK: div_s $push[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}}
|
||||
; CHECK: return $pop[[NUM0]]{{$}}
|
||||
define i32 @test0(i32 %p) {
|
||||
entry:
|
||||
%t = icmp slt i32 %p, 0
|
||||
|
@ -69,13 +69,14 @@ define float @test5(float %p) #0 {
|
||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vucomiss %xmm1, %xmm0
|
||||
; ALL-NEXT: jne LBB3_1
|
||||
; ALL-NEXT: jnp LBB3_2
|
||||
; ALL-NEXT: jp LBB3_1
|
||||
; ALL-NEXT: ## BB#2: ## %return
|
||||
; ALL-NEXT: retq
|
||||
; ALL-NEXT: LBB3_1: ## %if.end
|
||||
; ALL-NEXT: seta %al
|
||||
; ALL-NEXT: movzbl %al, %eax
|
||||
; ALL-NEXT: leaq {{.*}}(%rip), %rcx
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; ALL-NEXT: LBB3_2: ## %return
|
||||
; ALL-NEXT: retq
|
||||
entry:
|
||||
%cmp = fcmp oeq float %p, 0.000000e+00
|
||||
|
@ -43,7 +43,7 @@ define void @test2b(i32 %x, i32 %n) nounwind {
|
||||
; CHECK-LABEL: test2b:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: btl %esi, %edi
|
||||
; CHECK-NEXT: jb .LBB1_2
|
||||
; CHECK-NEXT: jae .LBB1_1
|
||||
;
|
||||
entry:
|
||||
%tmp29 = lshr i32 %x, %n
|
||||
@ -83,7 +83,7 @@ define void @atest2b(i32 %x, i32 %n) nounwind {
|
||||
; CHECK-LABEL: atest2b:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: btl %esi, %edi
|
||||
; CHECK-NEXT: jb .LBB3_2
|
||||
; CHECK-NEXT: jae .LBB3_1
|
||||
;
|
||||
entry:
|
||||
%tmp29 = ashr i32 %x, %n
|
||||
@ -103,7 +103,7 @@ define void @test3(i32 %x, i32 %n) nounwind {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: btl %esi, %edi
|
||||
; CHECK-NEXT: jb .LBB4_2
|
||||
; CHECK-NEXT: jae .LBB4_1
|
||||
;
|
||||
entry:
|
||||
%tmp29 = shl i32 1, %n
|
||||
@ -123,7 +123,7 @@ define void @test3b(i32 %x, i32 %n) nounwind {
|
||||
; CHECK-LABEL: test3b:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: btl %esi, %edi
|
||||
; CHECK-NEXT: jb .LBB5_2
|
||||
; CHECK-NEXT: jae .LBB5_1
|
||||
;
|
||||
entry:
|
||||
%tmp29 = shl i32 1, %n
|
||||
|
@ -36,8 +36,8 @@ define double @rdar_7859988(double %x, double %y) nounwind readnone optsize ssp
|
||||
|
||||
entry:
|
||||
%mul = fmul double %x, %y
|
||||
%cmp = fcmp une double %mul, 0.000000e+00
|
||||
br i1 %cmp, label %bb2, label %bb1
|
||||
%cmp = fcmp oeq double %mul, 0.000000e+00
|
||||
br i1 %cmp, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
%add = fadd double %mul, -1.000000e+00
|
||||
|
@ -6,7 +6,7 @@ entry:
|
||||
; CHECK: jns
|
||||
%tmp1 = add i32 %X, 1 ; <i32> [#uses=1]
|
||||
%tmp = icmp slt i32 %tmp1, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp, label %cond_true, label %cond_next
|
||||
br i1 %tmp, label %cond_true, label %cond_next, !prof !1
|
||||
|
||||
cond_true: ; preds = %entry
|
||||
%tmp2 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
|
||||
@ -303,3 +303,5 @@ if.then:
|
||||
if.end:
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
!1 = !{!"branch_weights", i32 2, i32 1}
|
||||
|
Loading…
x
Reference in New Issue
Block a user