1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[MachineOutliner] AArch64: Avoid saving + restoring LR if possible

This commit allows the outliner to avoid saving and restoring the link register
on AArch64 when it is dead within an entire class of candidates.

This introduces changes to the way the outliner interfaces with the target.
For example, the target now interfaces with the outliner using a
MachineOutlinerInfo struct rather than by using getOutliningCallOverhead and
getOutliningFrameOverhead.

This also improves several comments on the outliner's cost model.

https://reviews.llvm.org/D36721

llvm-svn: 314341
This commit is contained in:
Jessica Paquette 2017-09-27 20:47:39 +00:00
parent 8ca6c42318
commit 0fa8483b0c
9 changed files with 389 additions and 224 deletions

View File

@ -331,7 +331,8 @@ public:
///
/// \p Orig must not return true for MachineInstr::isNotDuplicable().
virtual MachineInstr &duplicate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const;
MachineBasicBlock::iterator InsertBefore,
const MachineInstr &Orig) const;
/// This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@ -1561,33 +1562,41 @@ public:
return false;
}
/// \brief Returns the number of instructions that will be taken to call a
/// function defined by the sequence on the closed interval [ \p StartIt, \p
/// EndIt].
///
/// \returns The number of instructions for the call in the first member,
/// and a target-defined unsigned describing what type of call to emit in the
/// second member.
virtual std::pair<size_t, unsigned>
getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
MachineBasicBlock::iterator &EndIt) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::getOutliningCallOverhead!");
}
/// \brief Describes the number of instructions that it will take to call and
/// construct a frame for a given outlining candidate.
struct MachineOutlinerInfo {
/// Number of instructions to call an outlined function for this candidate.
unsigned CallOverhead;
/// \brief Returns the number of instructions that will be taken to construct
/// an outlined function frame for a function defined on the closed interval
/// [ \p StartIt, \p EndIt].
///
/// \returns The number of instructions for the frame in the first member
/// and a target-defined unsigned describing what type of frame to construct
/// in the second member.
virtual std::pair<size_t, unsigned> getOutliningFrameOverhead(
/// \brief Number of instructions to construct an outlined function frame
/// for this candidate.
unsigned FrameOverhead;
/// \brief Represents the specific instructions that must be emitted to
/// construct a call to this candidate.
unsigned CallConstructionID;
/// \brief Represents the specific instructions that must be emitted to
/// construct a frame for this candidate's outlined function.
unsigned FrameConstructionID;
MachineOutlinerInfo() {}
MachineOutlinerInfo(unsigned CallOverhead, unsigned FrameOverhead,
unsigned CallConstructionID,
unsigned FrameConstructionID)
: CallOverhead(CallOverhead), FrameOverhead(FrameOverhead),
CallConstructionID(CallConstructionID),
FrameConstructionID(FrameConstructionID) {}
};
/// \brief Returns a \p MachineOutlinerInfo struct containing target-specific
/// information for a set of outlining candidates.
virtual MachineOutlinerInfo getOutlininingCandidateInfo(
std::vector<
std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
&CandidateClass) const {
&RepeatedSequenceLocs) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::getOutliningFrameOverhead!");
"Target didn't implement TargetInstrInfo::getOutliningOverhead!");
}
/// Represents how an instruction should be mapped by the outliner.
@ -1608,7 +1617,7 @@ public:
/// emitted.
virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB,
MachineFunction &MF,
unsigned FrameClass) const {
const MachineOutlinerInfo &MInfo) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!");
}
@ -1619,7 +1628,7 @@ public:
virtual MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
unsigned CallClass) const {
const MachineOutlinerInfo &MInfo) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::insertOutlinedCall!");
}
@ -1628,13 +1637,15 @@ public:
/// This may be empty, in which case no prologue will be emitted.
virtual void insertOutlinerPrologue(MachineBasicBlock &MBB,
MachineFunction &MF,
unsigned FrameClass) const {
const MachineOutlinerInfo &MInfo) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::insertOutlinerPrologue!");
}
/// Return true if the function can safely be outlined from.
/// By default, this means that the function has no red zone.
/// A function \p MF is considered safe for outlining if an outlined function
/// produced from instructions in F will produce a program which produces the
/// same output for any set of given inputs.
virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
llvm_unreachable("Target didn't implement "
"TargetInstrInfo::isFunctionSafeToOutlineFrom!");

View File

@ -15,6 +15,23 @@
/// instructions. If a sequence of instructions appears often, then it ought
/// to be beneficial to pull out into a function.
///
/// The MachineOutliner communicates with a given target using hooks defined in
/// TargetInstrInfo.h. The target supplies the outliner with information on how
/// a specific sequence of instructions should be outlined. This information
/// is used to deduce the number of instructions necessary to
///
/// * Create an outlined function
/// * Call that outlined function
///
/// Targets must implement
/// * getOutliningCandidateInfo
/// * insertOutlinerEpilogue
/// * insertOutlinedCall
/// * insertOutlinerPrologue
/// * isFunctionSafeToOutlineFrom
///
/// in order to make use of the MachineOutliner.
///
/// This was originally presented at the 2016 LLVM Developers' Meeting in the
/// talk "Reducing Code Size Using Outlining". For a high-level overview of
/// how this pass works, the talk is available on YouTube at
@ -80,17 +97,17 @@ struct Candidate {
bool InCandidateList = true;
/// The start index of this \p Candidate.
size_t StartIdx;
unsigned StartIdx;
/// The number of instructions in this \p Candidate.
size_t Len;
unsigned Len;
/// The index of this \p Candidate's \p OutlinedFunction in the list of
/// \p OutlinedFunctions.
size_t FunctionIdx;
unsigned FunctionIdx;
/// Target-defined unsigned defining how to emit a call for this candidate.
unsigned CallClass = 0;
/// Contains all target-specific information for this \p Candidate.
TargetInstrInfo::MachineOutlinerInfo MInfo;
/// \brief The number of instructions that would be saved by outlining every
/// candidate of this type.
@ -101,9 +118,8 @@ struct Candidate {
/// for some given candidate.
unsigned Benefit = 0;
Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx, unsigned CallClass)
: StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx),
CallClass(CallClass) {}
Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx)
: StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {}
Candidate() {}
@ -120,11 +136,11 @@ struct OutlinedFunction {
/// This is initialized after we go through and create the actual function.
MachineFunction *MF = nullptr;
/// A numbefr assigned to this function which appears at the end of its name.
size_t Name;
/// A number assigned to this function which appears at the end of its name.
unsigned Name;
/// The number of candidates for this OutlinedFunction.
size_t OccurrenceCount = 0;
unsigned OccurrenceCount = 0;
/// \brief The sequence of integers corresponding to the instructions in this
/// function.
@ -133,18 +149,18 @@ struct OutlinedFunction {
/// The number of instructions this function would save.
unsigned Benefit = 0;
/// Target-defined unsigned defining how to emit the frame for this function.
unsigned FrameClass = 0;
/// Contains all target-specific information for this \p OutlinedFunction.
TargetInstrInfo::MachineOutlinerInfo MInfo;
OutlinedFunction(size_t Name, size_t OccurrenceCount,
OutlinedFunction(unsigned Name, unsigned OccurrenceCount,
const std::vector<unsigned> &Sequence, unsigned Benefit,
unsigned FrameClass)
TargetInstrInfo::MachineOutlinerInfo &MInfo)
: Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence),
Benefit(Benefit), FrameClass(FrameClass) {}
Benefit(Benefit), MInfo(MInfo) {}
};
/// Represents an undefined index in the suffix tree.
const size_t EmptyIdx = -1;
const unsigned EmptyIdx = -1;
/// A node in a suffix tree which represents a substring or suffix.
///
@ -174,7 +190,7 @@ struct SuffixTreeNode {
bool IsInTree = true;
/// The start index of this node's substring in the main string.
size_t StartIdx = EmptyIdx;
unsigned StartIdx = EmptyIdx;
/// The end index of this node's substring in the main string.
///
@ -182,12 +198,12 @@ struct SuffixTreeNode {
/// step in the construction algorithm. To avoid having to update O(N)
/// nodes individually at the end of every step, the end index is stored
/// as a pointer.
size_t *EndIdx = nullptr;
unsigned *EndIdx = nullptr;
/// For leaves, the start index of the suffix represented by this node.
///
/// For all other nodes, this is ignored.
size_t SuffixIdx = EmptyIdx;
unsigned SuffixIdx = EmptyIdx;
/// \brief For internal nodes, a pointer to the internal node representing
/// the same sequence with the first character chopped off.
@ -216,11 +232,11 @@ struct SuffixTreeNode {
///
/// This is equal to the number of leaf children of the string. It represents
/// the number of suffixes that the node's string is a prefix of.
size_t OccurrenceCount = 0;
unsigned OccurrenceCount = 0;
/// The length of the string formed by concatenating the edge labels from the
/// root to this node.
size_t ConcatLen = 0;
unsigned ConcatLen = 0;
/// Returns true if this node is a leaf.
bool isLeaf() const { return SuffixIdx != EmptyIdx; }
@ -242,7 +258,7 @@ struct SuffixTreeNode {
return *EndIdx - StartIdx + 1;
}
SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link,
SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
SuffixTreeNode *Parent)
: StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
@ -301,7 +317,7 @@ private:
BumpPtrAllocator InternalEndIdxAllocator;
/// The end index of each leaf in the tree.
size_t LeafEndIdx = -1;
unsigned LeafEndIdx = -1;
/// \brief Helper struct which keeps track of the next insertion point in
/// Ukkonen's algorithm.
@ -310,10 +326,10 @@ private:
SuffixTreeNode *Node;
/// The index of the first character in the substring currently being added.
size_t Idx = EmptyIdx;
unsigned Idx = EmptyIdx;
/// The length of the substring we have to add at the current step.
size_t Len = 0;
unsigned Len = 0;
};
/// \brief The point the next insertion will take place at in the
@ -327,7 +343,7 @@ private:
/// \param Edge The label on the edge leaving \p Parent to this node.
///
/// \returns A pointer to the allocated leaf node.
SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx,
SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
unsigned Edge) {
assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
@ -347,14 +363,14 @@ private:
/// \param Edge The label on the edge leaving \p Parent to this node.
///
/// \returns A pointer to the allocated internal node.
SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx,
size_t EndIdx, unsigned Edge) {
SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
unsigned EndIdx, unsigned Edge) {
assert(StartIdx <= EndIdx && "String can't start after it ends!");
assert(!(!Parent && StartIdx != EmptyIdx) &&
"Non-root internal nodes must have parents!");
size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx);
unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
SuffixTreeNode *N = new (NodeAllocator.Allocate())
SuffixTreeNode(StartIdx, E, Root, Parent);
if (Parent)
@ -369,7 +385,7 @@ private:
///
/// \param[in] CurrNode The node currently being visited.
/// \param CurrIdx The current index of the string being visited.
void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) {
void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
@ -415,7 +431,7 @@ private:
///
/// \returns The number of suffixes that have not been added at the end of
/// this step.
unsigned extend(size_t EndIdx, size_t SuffixesToAdd) {
unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
SuffixTreeNode *NeedsLink = nullptr;
while (SuffixesToAdd > 0) {
@ -447,7 +463,7 @@ private:
// insert a new node.
SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
size_t SubstringLen = NextNode->size();
unsigned SubstringLen = NextNode->size();
// Is the current suffix we're trying to insert longer than the size of
// the child we want to move to?
@ -543,13 +559,14 @@ public:
// Keep track of the number of suffixes we have to add of the current
// prefix.
size_t SuffixesToAdd = 0;
unsigned SuffixesToAdd = 0;
Active.Node = Root;
// Construct the suffix tree iteratively on each prefix of the string.
// PfxEndIdx is the end index of the current prefix.
// End is one past the last element in the string.
for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
PfxEndIdx++) {
SuffixesToAdd++;
LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
@ -747,10 +764,10 @@ struct MachineOutliner : public ModulePass {
/// type of candidate.
///
/// \returns The length of the longest candidate found.
size_t findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
InstructionMapper &Mapper,
std::vector<Candidate> &CandidateList,
std::vector<OutlinedFunction> &FunctionList);
unsigned findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
InstructionMapper &Mapper,
std::vector<Candidate> &CandidateList,
std::vector<OutlinedFunction> &FunctionList);
/// \brief Replace the sequences of instructions represented by the
/// \p Candidates in \p CandidateList with calls to \p MachineFunctions
@ -820,16 +837,15 @@ ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
false)
size_t
unsigned
MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
InstructionMapper &Mapper,
std::vector<Candidate> &CandidateList,
std::vector<OutlinedFunction> &FunctionList) {
CandidateList.clear();
FunctionList.clear();
size_t FnIdx = 0;
size_t MaxLen = 0;
unsigned FnIdx = 0;
unsigned MaxLen = 0;
// FIXME: Visit internal nodes instead of leaves.
for (SuffixTreeNode *Leaf : ST.LeafVector) {
@ -845,7 +861,7 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
continue;
// Figure out if this candidate is beneficial.
size_t StringLen = Leaf->ConcatLen - Leaf->size();
unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
// Too short to be beneficial; skip it.
// FIXME: This isn't necessarily true for, say, X86. If we factor in
@ -853,18 +869,17 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
if (StringLen < 2)
continue;
size_t CallOverhead = 0;
size_t SequenceOverhead = StringLen;
// If this is a beneficial class of candidate, then every one is stored in
// this vector.
std::vector<Candidate> CandidatesForRepeatedSeq;
// Used for getOutliningFrameOverhead.
// Describes the start and end point of each candidate. This allows the
// target to infer some information about each occurrence of each repeated
// sequence.
// FIXME: CandidatesForRepeatedSeq and this should be combined.
std::vector<
std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
CandidateClass;
RepeatedSequenceLocs;
// Figure out the call overhead for each instance of the sequence.
for (auto &ChildPair : Parent.Children) {
@ -876,26 +891,21 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
MachineBasicBlock::iterator EndIt =
Mapper.InstrList[M->SuffixIdx + StringLen - 1];
// Get the overhead for calling a function for this sequence and any
// target-specified data for how to construct the call.
std::pair<size_t, unsigned> CallOverheadPair =
TII.getOutliningCallOverhead(StartIt, EndIt);
CallOverhead += CallOverheadPair.first;
CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx,
CallOverheadPair.second);
CandidateClass.emplace_back(std::make_pair(StartIt, EndIt));
CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx);
RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt));
// Never visit this leaf again.
M->IsInTree = false;
}
}
std::pair<size_t, unsigned> FrameOverheadPair =
TII.getOutliningFrameOverhead(CandidateClass);
size_t FrameOverhead = FrameOverheadPair.first;
unsigned SequenceOverhead = StringLen;
TargetInstrInfo::MachineOutlinerInfo MInfo =
TII.getOutlininingCandidateInfo(RepeatedSequenceLocs);
size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead;
size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount;
unsigned OutliningCost =
(MInfo.CallOverhead * Parent.OccurrenceCount) + MInfo.FrameOverhead;
unsigned NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount;
// Is it better to outline this candidate than not?
if (NotOutliningCost <= OutliningCost) {
@ -903,14 +913,14 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
// outlining.
// Emit a remark explaining why we didn't outline this candidate.
std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C =
CandidateClass[0];
RepeatedSequenceLocs[0];
MachineOptimizationRemarkEmitter MORE(
*(C.first->getParent()->getParent()), nullptr);
MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
C.first->getDebugLoc(),
C.first->getParent());
R << "Did not outline " << NV("Length", StringLen) << " instructions"
<< " from " << NV("NumOccurrences", CandidateClass.size())
<< " from " << NV("NumOccurrences", RepeatedSequenceLocs.size())
<< " locations."
<< " Instructions from outlining all occurrences ("
<< NV("OutliningCost", OutliningCost) << ")"
@ -919,9 +929,9 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
<< " (Also found at: ";
// Tell the user the other places the candidate was found.
for (size_t i = 1, e = CandidateClass.size(); i < e; i++) {
for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) {
R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
CandidateClass[i].first->getDebugLoc());
RepeatedSequenceLocs[i].first->getDebugLoc());
if (i != e - 1)
R << ", ";
}
@ -933,7 +943,7 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
continue;
}
size_t Benefit = NotOutliningCost - OutliningCost;
unsigned Benefit = NotOutliningCost - OutliningCost;
if (StringLen > MaxLen)
MaxLen = StringLen;
@ -942,6 +952,7 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
// benefit values and save them in the candidate list.
for (Candidate &C : CandidatesForRepeatedSeq) {
C.Benefit = Benefit;
C.MInfo = MInfo;
CandidateList.push_back(C);
}
@ -951,8 +962,7 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
CandidateSequence.push_back(ST.Str[i]);
FunctionList.emplace_back(FnIdx, CandidatesForRepeatedSeq.size(),
CandidateSequence, Benefit,
FrameOverheadPair.second);
CandidateSequence, Benefit, MInfo);
// Move to the next function.
FnIdx++;
@ -1023,7 +1033,7 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
continue;
}
size_t C2End = C2.StartIdx + C2.Len - 1;
unsigned C2End = C2.StartIdx + C2.Len - 1;
// Do C1 and C2 overlap?
//
@ -1050,13 +1060,9 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
F2.OccurrenceCount--;
// Remove the call overhead from the removed sequence.
MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx];
MachineBasicBlock::iterator EndIt =
Mapper.InstrList[C2.StartIdx + C2.Len - 1];
F2.Benefit += C2.MInfo.CallOverhead;
F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first;
// Add back one instance of the sequence.
if (F2.Sequence.size() > F2.Benefit)
F2.Benefit = 0;
else
@ -1077,11 +1083,7 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
F1.OccurrenceCount--;
// Remove the call overhead from the removed sequence.
MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx];
MachineBasicBlock::iterator EndIt =
Mapper.InstrList[C1.StartIdx + C1.Len - 1];
F1.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first;
F1.Benefit += C1.MInfo.CallOverhead;
// Add back one instance of the sequence.
if (F1.Sequence.size() > F1.Benefit)
@ -1110,7 +1112,7 @@ MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList,
const TargetInstrInfo &TII) {
std::vector<unsigned> CandidateSequence; // Current outlining candidate.
size_t MaxCandidateLen = 0; // Length of the longest candidate.
unsigned MaxCandidateLen = 0; // Length of the longest candidate.
MaxCandidateLen =
findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
@ -1157,7 +1159,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
TII.insertOutlinerPrologue(MBB, MF, OF.FrameClass);
TII.insertOutlinerPrologue(MBB, MF, OF.MInfo);
// Copy over the instructions for the function using the integer mappings in
// its sequence.
@ -1172,7 +1174,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
MBB.insert(MBB.end(), NewMI);
}
TII.insertOutlinerEpilogue(MBB, MF, OF.FrameClass);
TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo);
return &MF;
}
@ -1183,7 +1185,6 @@ bool MachineOutliner::outline(Module &M,
InstructionMapper &Mapper) {
bool OutlinedSomething = false;
// Replace the candidates with calls to their respective outlined functions.
for (const Candidate &C : CandidateList) {
@ -1221,7 +1222,7 @@ bool MachineOutliner::outline(Module &M,
const TargetInstrInfo &TII = *STI.getInstrInfo();
// Insert a call to the new function and erase the old sequence.
TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.CallClass);
TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo);
StartIt = Mapper.InstrList[C.StartIdx];
MBB->erase(StartIt, EndIt);

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@ -4534,38 +4535,124 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
// Constants defining how certain sequences should be outlined.
const unsigned MachineOutlinerDefaultFn = 0;
const unsigned MachineOutlinerTailCallFn = 1;
/// Constants defining how certain sequences should be outlined.
/// This encompasses how an outlined function should be called, and what kind of
/// frame should be emitted for that outlined function.
///
/// \p MachineOutlinerDefault implies that the function should be called with
/// a save and restore of LR to the stack.
///
/// That is,
///
/// I1 Save LR OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 Restore LR I2
/// I3
/// RET
///
/// * Call construction overhead: 3 (save + BL + restore)
/// * Frame construction overhead: 1 (ret)
/// * Requires stack fixups? Yes
///
/// \p MachineOutlinerTailCall implies that the function is being created from
/// a sequence of instructions ending in a return.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> B OUTLINED_FUNCTION I1
/// RET I2
/// RET
///
/// * Call construction overhead: 1 (B)
/// * Frame construction overhead: 0 (Return included in sequence)
/// * Requires stack fixups? No
///
/// \p MachineOutlinerNoLRSave implies that the function should be called using
/// a BL instruction, but doesn't require LR to be saved and restored. This
/// happens when LR is known to be dead.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 I2
/// I3
/// RET
///
/// * Call construction overhead: 1 (BL)
/// * Frame construction overhead: 1 (RET)
/// * Requires stack fixups? No
///
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
MachineOutlinerNoLRSave /// Emit a call and return.
};
std::pair<size_t, unsigned> AArch64InstrInfo::getOutliningCallOverhead(
MachineBasicBlock::iterator &StartIt,
MachineBasicBlock::iterator &EndIt) const {
// Is this a tail-call?
if (EndIt->isTerminator()) {
// Yes, so we only have to emit a call. Return a cost of 1 + signify that
// this candidate should be tail-called.
return std::make_pair(1, MachineOutlinerTailCallFn);
}
bool AArch64InstrInfo::canOutlineWithoutLRSave(
MachineBasicBlock::iterator &CallInsertionPt) const {
// Was LR saved in the function containing this basic block?
MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
LiveRegUnits LRU(getRegisterInfo());
LRU.addLiveOuts(MBB);
// No, so save + restore LR.
return std::make_pair(3, MachineOutlinerDefaultFn);
// Get liveness information from the end of the block to the end of the
// prospective outlined region.
std::for_each(MBB.rbegin(),
(MachineBasicBlock::reverse_iterator)CallInsertionPt,
[&LRU](MachineInstr &MI) {LRU.stepBackward(MI);}
);
// If the link register is available at this point, then we can safely outline
// the region without saving/restoring LR. Otherwise, we must emit a save and
// restore.
return LRU.available(AArch64::LR);
}
std::pair<size_t, unsigned> AArch64InstrInfo::getOutliningFrameOverhead(
std::vector<std::pair<MachineBasicBlock::iterator,
MachineBasicBlock::iterator>> &CandidateClass) const {
AArch64GenInstrInfo::MachineOutlinerInfo
AArch64InstrInfo::getOutlininingCandidateInfo(
std::vector<
std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
&RepeatedSequenceLocs) const {
// Is the last instruction in this class a terminator?
if (CandidateClass[0].second->isTerminator())
return std::make_pair(0, MachineOutlinerTailCallFn);
unsigned CallID = MachineOutlinerDefault;
unsigned FrameID = MachineOutlinerDefault;
unsigned NumInstrsForCall = 3;
unsigned NumInstrsToCreateFrame = 1;
// No, so we have to add a return to the end.
return std::make_pair(1, MachineOutlinerDefaultFn);
auto DoesntNeedLRSave =
[this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
&I) { return canOutlineWithoutLRSave(I.second); };
// If the last instruction in any candidate is a terminator, then we should
// tail call all of the candidates.
if (RepeatedSequenceLocs[0].second->isTerminator()) {
CallID = MachineOutlinerTailCall;
FrameID = MachineOutlinerTailCall;
NumInstrsForCall = 1;
NumInstrsToCreateFrame = 0;
}
else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
DoesntNeedLRSave)) {
CallID = MachineOutlinerNoLRSave;
FrameID = MachineOutlinerNoLRSave;
NumInstrsForCall = 1;
NumInstrsToCreateFrame = 1;
}
return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID,
FrameID);
}
bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
// If MF has a red zone, then we ought not to outline from it, since outlined
// functions can modify/read from the stack.
// If MF's address is taken, then we don't want to outline from it either
// since we don't really know what the user is doing with it.
return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone) &&
!MF.getFunction()->hasAddressTaken();
}
AArch64GenInstrInfo::MachineOutlinerInstrType
@ -4607,6 +4694,10 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
MOP.isTargetIndex())
return MachineOutlinerInstrType::Illegal;
// Don't outline anything that uses the link register.
if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR))
return MachineOutlinerInstrType::Illegal;
}
// Does this use the stack?
@ -4676,12 +4767,12 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
}
}
void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
MachineFunction &MF,
unsigned FrameClass) const {
void AArch64InstrInfo::insertOutlinerEpilogue(
MachineBasicBlock &MBB, MachineFunction &MF,
const MachineOutlinerInfo &MInfo) const {
// If this is a tail call outlined function, then there's already a return.
if (FrameClass == MachineOutlinerTailCallFn)
if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
return;
// It's not a tail call, so we have to insert the return ourselves.
@ -4689,28 +4780,40 @@ void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
.addReg(AArch64::LR, RegState::Undef);
MBB.insert(MBB.end(), ret);
// Did we have to modify the stack by saving the link register?
if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave)
return;
// We modified the stack.
// Walk over the basic block and fix up all the stack accesses.
fixupPostOutline(MBB);
}
void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
MachineFunction &MF,
unsigned FrameClass) const {}
void AArch64InstrInfo::insertOutlinerPrologue(
MachineBasicBlock &MBB, MachineFunction &MF,
const MachineOutlinerInfo &MInfo) const {}
MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
MachineFunction &MF, unsigned CallClass) const {
MachineFunction &MF, const MachineOutlinerInfo &MInfo) const {
// Are we tail calling?
if (CallClass == MachineOutlinerTailCallFn) {
if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
// If yes, then we can just branch to the label.
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
.addGlobalAddress(M.getNamedValue(MF.getName())));
return It;
}
// We're not tail calling, so we have to save LR before the call and restore
// it after.
// Are we saving the link register?
if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
// No, so just insert the call.
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
.addGlobalAddress(M.getNamedValue(MF.getName())));
return It;
}
// We have a default call. Save the link register.
MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR)

View File

@ -350,24 +350,23 @@ public:
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const override;
bool
canOutlineWithoutLRSave(MachineBasicBlock::iterator &CallInsertionPt) const;
bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
std::pair<size_t, unsigned>
getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
MachineBasicBlock::iterator &EndIt) const override;
std::pair<size_t, unsigned> getOutliningFrameOverhead(
MachineOutlinerInfo getOutlininingCandidateInfo(
std::vector<
std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
&CandidateClass) const override;
&RepeatedSequenceLocs) const override;
AArch64GenInstrInfo::MachineOutlinerInstrType
getOutliningType(MachineInstr &MI) const override;
void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF,
unsigned FrameClass) const override;
const MachineOutlinerInfo &MInfo) const override;
void insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF,
unsigned FrameClass) const override;
const MachineOutlinerInfo &MInfo) const override;
MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
unsigned CallClass) const override;
const MachineOutlinerInfo &MInfo) const override;
/// Returns true if the instruction has a shift left that can be executed
/// more efficiently.
bool isExynosShiftLeftFast(const MachineInstr &MI) const;

View File

@ -10723,31 +10723,54 @@ char LDTLSCleanup::ID = 0;
FunctionPass*
llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
// Constants defining how certain sequences should be outlined.
const unsigned MachineOutlinerDefaultFn = 0;
const unsigned MachineOutlinerTailCallFn = 1;
/// Constants defining how certain sequences should be outlined.
///
/// \p MachineOutlinerDefault implies that the function is called with a call
/// instruction, and a return must be emitted for the outlined function frame.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> call OUTLINED_FUNCTION I1
/// I3 I2
/// I3
/// ret
///
/// * Call construction overhead: 1 (call instruction)
/// * Frame construction overhead: 1 (return instruction)
///
/// \p MachineOutlinerTailCall implies that the function is being tail called.
/// A jump is emitted instead of a call, and the return is already present in
/// the outlined sequence. That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> jmp OUTLINED_FUNCTION I1
/// ret I2
/// ret
///
/// * Call construction overhead: 1 (jump instruction)
/// * Frame construction overhead: 0 (don't need to return)
///
enum MachineOutlinerClass {
MachineOutlinerDefault,
MachineOutlinerTailCall
};
std::pair<size_t, unsigned> X86InstrInfo::getOutliningCallOverhead(
MachineBasicBlock::iterator &StartIt,
MachineBasicBlock::iterator &EndIt) const {
X86GenInstrInfo::MachineOutlinerInfo
X86InstrInfo::getOutlininingCandidateInfo(
std::vector<
std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
&RepeatedSequenceLocs) const {
// Is this a tail call? If it is, make note of it.
if (EndIt->isTerminator())
return std::make_pair(1, MachineOutlinerTailCallFn);
return std::make_pair(1, MachineOutlinerDefaultFn);
}
std::pair<size_t, unsigned> X86InstrInfo::getOutliningFrameOverhead(
std::vector<std::pair<MachineBasicBlock::iterator,
MachineBasicBlock::iterator>> &CandidateClass) const {
// Is this a tail-call?
// Is the last instruction in this class a terminator?
if (CandidateClass[0].second->isTerminator())
return std::make_pair(0, MachineOutlinerTailCallFn);
// No, so we have to add a return to the end.
return std::make_pair(1, MachineOutlinerDefaultFn);
if (RepeatedSequenceLocs[0].second->isTerminator())
return MachineOutlinerInfo(1, // Number of instructions to emit call.
0, // Number of instructions to emit frame.
MachineOutlinerTailCall, // Type of call.
MachineOutlinerTailCall // Type of frame.
);
return MachineOutlinerInfo(1, 1, MachineOutlinerDefault,
MachineOutlinerDefault);
}
bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
@ -10811,10 +10834,10 @@ X86InstrInfo::getOutliningType(MachineInstr &MI) const {
void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
MachineFunction &MF,
unsigned FrameClass) const {
const MachineOutlinerInfo &MInfo)
const {
// If we're a tail call, we already have a return, so don't do anything.
if (FrameClass == MachineOutlinerTailCallFn)
if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
return;
// We're a normal call, so our sequence doesn't have a return instruction.
@ -10825,15 +10848,16 @@ void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
MachineFunction &MF,
unsigned FrameClass) const {}
const MachineOutlinerInfo &MInfo)
const {}
MachineBasicBlock::iterator
X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It,
MachineFunction &MF,
unsigned CallClass) const {
const MachineOutlinerInfo &MInfo) const {
// Is it a tail call?
if (CallClass == MachineOutlinerTailCallFn) {
if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
// Yes, just insert a JMP.
It = MBB.insert(It,
BuildMI(MF, DebugLoc(), get(X86::JMP_1))

View File

@ -559,14 +559,10 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
std::pair<size_t, unsigned>
getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
MachineBasicBlock::iterator &EndIt) const override;
std::pair<size_t, unsigned> getOutliningFrameOverhead(
virtual MachineOutlinerInfo getOutlininingCandidateInfo(
std::vector<
std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
&CandidateClass) const override;
&RepeatedSequenceLocs) const override;
bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
@ -574,15 +570,15 @@ public:
getOutliningType(MachineInstr &MI) const override;
void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF,
unsigned FrameClass) const override;
const MachineOutlinerInfo &MInfo) const override;
void insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF,
unsigned FrameClass) const override;
const MachineOutlinerInfo &MInfo) const override;
MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
unsigned CallClass) const override;
const MachineOutlinerInfo &MInfo) const override;
protected:
/// Commutes the operands in the given instruction by changing the operands

View File

@ -1,7 +1,7 @@
; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -pass-remarks-missed=machine-outliner -o /dev/null 2>&1 | FileCheck %s
; CHECK: machine-outliner-remarks.ll:5:9:
; CHECK-SAME: Did not outline 2 instructions from 2 locations.
; CHECK-SAME: Instructions from outlining all occurrences (9) >=
; CHECK-SAME: Instructions from outlining all occurrences (7) >=
; CHECK-SAME: Unoutlined instruction count (4)
; CHECK-SAME: (Also found at: machine-outliner-remarks.ll:13:9)
; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -o /dev/null -pass-remarks-missed=machine-outliner -pass-remarks-output=%t.yaml
@ -19,7 +19,7 @@
; YAML-NEXT: - NumOccurrences: '2'
; YAML-NEXT: - String: ' locations.'
; YAML-NEXT: - String: ' Instructions from outlining all occurrences ('
; YAML-NEXT: - OutliningCost: '9'
; YAML-NEXT: - OutliningCost: '7'
; YAML-NEXT: - String: ')'
; YAML-NEXT: - String: ' >= Unoutlined instruction count ('
; YAML-NEXT: - NotOutliningCost: '4'

View File

@ -1,42 +1,38 @@
# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner %s -o - | FileCheck %s
--- |
target triple = "aarch64---"
define i32 @main() #0 {
entry:
ret i32 0
}
attributes #0 = { noinline noredzone nounwind optnone ssp uwtable }
# CHECK-LABEL: @OUTLINED_FUNCTION_0
define void @bar(i32 %a) #0 {
ret void
}
attributes #0 = { noinline noredzone "no-frame-pointer-elim"="true" }
...
---
# This test ensures that we
# - Create outlined functions
# - Don't outline anything to do with LR or W30
# - Save LR when it's not available
#
# CHECK-LABEL: name: main
# CHECK: BL @OUTLINED_FUNCTION_0
# CHECK: STRHHroW %w16, %x9, %w30, 1, 1
# CHECK: %lr = ORRXri %xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_0
# CHECK: STRHHroW %w16, %x9, %w30, 1, 1
# CHECK: %lr = ORRXri %xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_0
# CHECK: STRHHroW %w16, %x9, %w30, 1, 1
# CHECK: %lr = ORRXri %xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]]
# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16
# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1
# CHECK-NEXT: %lr = ORRXri %xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16
# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1
# CHECK-NEXT: %lr = ORRXri %xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16
# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1
# CHECK-NEXT: %lr = ORRXri %xzr, 1
name: main
alignment: 2
tracksRegLiveness: true
frameInfo:
stackSize: 16
maxAlignment: 4
maxCallFrameSize: 0
body: |
bb.0.entry:
bb.0:
%sp = frame-setup SUBXri %sp, 16, 0
%x9 = ORRXri %xzr, 1
%w16 = ORRWri %wzr, 1
@ -74,8 +70,43 @@ body: |
STRHHroW %w16, %x9, %w30, 1, 1
%lr = ORRXri %xzr, 1
%w5 = ORRWri %wzr, 1995
%sp = ADDXri %sp, 16, 0
RET undef %lr
...
---
# This test ensures that we can avoid saving LR when it's available.
# CHECK-LABEL: bb.1:
# CHECK: BL @OUTLINED_FUNCTION_[[F1:[0-9]+]], implicit-def %lr, implicit %sp
# CHECK-NEXT: %w17 = ORRWri %wzr, 2
# CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F1]], implicit-def %lr, implicit %sp
# CHECK-NEXT: %w8 = ORRWri %wzr, 0
name: bar
tracksRegLiveness: true
body: |
bb.0:
liveins: %w0, %lr, %w8
%sp = frame-setup SUBXri %sp, 32, 0
%fp = frame-setup ADDXri %sp, 16, 0
bb.1:
%w17 = ORRWri %wzr, 1
%w17 = ORRWri %wzr, 1
%w17 = ORRWri %wzr, 1
%w17 = ORRWri %wzr, 1
%w17 = ORRWri %wzr, 2
%w17 = ORRWri %wzr, 1
%w17 = ORRWri %wzr, 1
%w17 = ORRWri %wzr, 1
%w17 = ORRWri %wzr, 1
%w8 = ORRWri %wzr, 0
bb.2:
%fp, %lr = LDPXi %sp, 2
%sp = ADDXri %sp, 32, 0
RET undef %lr
...
# CHECK-LABEL: name: OUTLINED_FUNCTION_{{[0-9]}}
# CHECK=LABEL: name: OUTLINED_FUNCTION_{{[1-9]}}

View File

@ -96,14 +96,14 @@ define i32 @main() #0 {
attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="true" }
; CHECK-LABEL: l_OUTLINED_FUNCTION_1:
; CHECK-LABEL: l_OUTLINED_FUNCTION_{{[0-9]+}}:
; CHECK: movl $1, -{{[0-9]+}}(%rbp)
; CHECK-NEXT: movl $2, -{{[0-9]+}}(%rbp)
; CHECK-NEXT: movl $3, -{{[0-9]+}}(%rbp)
; CHECK-NEXT: movl $4, -{{[0-9]+}}(%rbp)
; CHECK-NEXT: retq
; CHECK-LABEL: l_OUTLINED_FUNCTION_0:
; CHECK-LABEL: l_OUTLINED_FUNCTION_{{[0-9]+}}:
; CHECK: movl $1, -{{[0-9]+}}(%rbp)
; CHECK-NEXT: movl $2, -{{[0-9]+}}(%rbp)
; CHECK-NEXT: movl $3, -{{[0-9]+}}(%rbp)