mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[PGO][PGSO] Instrument the code gen / target passes.
Summary: Split off of D67120. Add the profile guided size optimization instrumentation / queries in the code gen or target passes. This doesn't enable the size optimizations in those passes yet as they are currently disabled in shouldOptimizeForSize (for non-IR pass queries). A second try after reverted D71072. Reviewers: davidxl Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71149
This commit is contained in:
parent
e7c754ad96
commit
1aa49b7b60
@ -48,6 +48,7 @@ class GlobalObject;
|
||||
class GlobalValue;
|
||||
class GlobalVariable;
|
||||
class MachineBasicBlock;
|
||||
class MachineBlockFrequencyInfo;
|
||||
class MachineConstantPoolValue;
|
||||
class MachineDominatorTree;
|
||||
class MachineFunction;
|
||||
@ -69,6 +70,7 @@ class MCSymbol;
|
||||
class MCTargetOptions;
|
||||
class MDNode;
|
||||
class Module;
|
||||
class ProfileSummaryInfo;
|
||||
class raw_ostream;
|
||||
class RemarkStreamer;
|
||||
class StackMaps;
|
||||
@ -108,6 +110,10 @@ public:
|
||||
/// Optimization remark emitter.
|
||||
MachineOptimizationRemarkEmitter *ORE;
|
||||
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
/// The symbol for the current function. This is recalculated at the beginning
|
||||
/// of each call to runOnMachineFunction().
|
||||
MCSymbol *CurrentFnSym = nullptr;
|
||||
|
@ -182,6 +182,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
MachineBlockFrequencyInfo *getBFI() {
|
||||
return MBFI;
|
||||
}
|
||||
|
||||
private:
|
||||
MachineFunction &MF;
|
||||
|
||||
|
@ -25,11 +25,13 @@
|
||||
namespace llvm {
|
||||
|
||||
class MachineBasicBlock;
|
||||
class MachineBlockFrequencyInfo;
|
||||
class MachineBranchProbabilityInfo;
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
class MachineModuleInfo;
|
||||
class MachineRegisterInfo;
|
||||
class ProfileSummaryInfo;
|
||||
class TargetRegisterInfo;
|
||||
|
||||
/// Utility class to perform tail duplication.
|
||||
@ -40,6 +42,8 @@ class TailDuplicator {
|
||||
const MachineModuleInfo *MMI;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineFunction *MF;
|
||||
const MachineBlockFrequencyInfo *MBFI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
bool PreRegAlloc;
|
||||
bool LayoutMode;
|
||||
unsigned TailDupSize;
|
||||
@ -65,6 +69,8 @@ public:
|
||||
/// default implies using the command line value TailDupSize.
|
||||
void initMF(MachineFunction &MF, bool PreRegAlloc,
|
||||
const MachineBranchProbabilityInfo *MBPI,
|
||||
const MachineBlockFrequencyInfo *MBFI,
|
||||
ProfileSummaryInfo *PSI,
|
||||
bool LayoutMode, unsigned TailDupSize = 0);
|
||||
|
||||
bool tailDuplicateBlocks();
|
||||
|
@ -31,13 +31,16 @@
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/BinaryFormat/COFF.h"
|
||||
#include "llvm/BinaryFormat/Dwarf.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/CodeGen/GCMetadata.h"
|
||||
#include "llvm/CodeGen/GCMetadataPrinter.h"
|
||||
#include "llvm/CodeGen/GCStrategy.h"
|
||||
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
@ -52,6 +55,7 @@
|
||||
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/StackMaps.h"
|
||||
#include "llvm/CodeGen/TargetFrameLowering.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
@ -248,6 +252,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<MachineModuleInfoWrapperPass>();
|
||||
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
|
||||
AU.addRequired<GCModuleInfo>();
|
||||
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
}
|
||||
|
||||
bool AsmPrinter::doInitialization(Module &M) {
|
||||
@ -1684,6 +1690,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
|
||||
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
MBFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
// ORE conditionally computes MBFI. If available, use it, otherwise
|
||||
// request it.
|
||||
(ORE->getBFI() ? ORE->getBFI() :
|
||||
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) :
|
||||
nullptr;
|
||||
}
|
||||
|
||||
namespace {
|
||||
@ -2913,8 +2926,10 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
|
||||
void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
|
||||
MCCodePaddingContext &Context) const {
|
||||
assert(MF != nullptr && "Machine function must be valid");
|
||||
bool OptForSize = MF->getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
|
||||
Context.IsPaddingActive = !MF->hasInlineAsm() &&
|
||||
!MF->getFunction().hasOptSize() &&
|
||||
!OptForSize &&
|
||||
TM.getOptLevel() != CodeGenOpt::None;
|
||||
Context.IsBasicBlockReachableViaFallthrough =
|
||||
std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/Analysis.h"
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
@ -38,6 +39,7 @@
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
@ -103,6 +105,7 @@ namespace {
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<TargetPassConfig>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
@ -129,7 +132,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
BranchFolder::MBFIWrapper MBBFreqInfo(
|
||||
getAnalysis<MachineBlockFrequencyInfo>());
|
||||
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
|
||||
getAnalysis<MachineBranchProbabilityInfo>());
|
||||
getAnalysis<MachineBranchProbabilityInfo>(),
|
||||
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
|
||||
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
|
||||
return Folder.OptimizeFunction(
|
||||
MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
|
||||
@ -139,9 +143,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
|
||||
MBFIWrapper &FreqInfo,
|
||||
const MachineBranchProbabilityInfo &ProbInfo,
|
||||
ProfileSummaryInfo *PSI,
|
||||
unsigned MinTailLength)
|
||||
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
|
||||
MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
|
||||
MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
|
||||
if (MinCommonTailLength == 0)
|
||||
MinCommonTailLength = TailMergeSize;
|
||||
switch (FlagEnableTailMerge) {
|
||||
@ -585,7 +590,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
|
||||
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
|
||||
MachineBasicBlock *PredBB,
|
||||
DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
|
||||
bool AfterPlacement) {
|
||||
bool AfterPlacement,
|
||||
BranchFolder::MBFIWrapper &MBBFreqInfo,
|
||||
ProfileSummaryInfo *PSI) {
|
||||
// It is never profitable to tail-merge blocks from two different EH scopes.
|
||||
if (!EHScopeMembership.empty()) {
|
||||
auto EHScope1 = EHScopeMembership.find(MBB1);
|
||||
@ -682,7 +689,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
|
||||
// branch instruction, which is likely to be smaller than the 2
|
||||
// instructions that would be deleted in the merge.
|
||||
MachineFunction *MF = MBB1->getParent();
|
||||
return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
|
||||
bool OptForSize =
|
||||
MF->getFunction().hasOptSize() ||
|
||||
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
|
||||
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
|
||||
return EffectiveTailLen >= 2 && OptForSize &&
|
||||
(FullBlockTail1 || FullBlockTail2);
|
||||
}
|
||||
|
||||
@ -704,7 +715,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
|
||||
CommonTailLen, TrialBBI1, TrialBBI2,
|
||||
SuccBB, PredBB,
|
||||
EHScopeMembership,
|
||||
AfterBlockPlacement)) {
|
||||
AfterBlockPlacement, MBBFreqInfo, PSI)) {
|
||||
if (CommonTailLen > maxCommonTailLength) {
|
||||
SameTails.clear();
|
||||
maxCommonTailLength = CommonTailLen;
|
||||
@ -1534,8 +1545,10 @@ ReoptimizeBlock:
|
||||
}
|
||||
}
|
||||
|
||||
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
|
||||
MF.getFunction().hasOptSize()) {
|
||||
bool OptForSize =
|
||||
MF.getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
|
||||
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
|
||||
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
|
||||
// direction, thereby defeating careful block placement and regressing
|
||||
// performance. Therefore, only consider this for optsize functions.
|
||||
|
@ -27,6 +27,7 @@ class MachineFunction;
|
||||
class MachineLoopInfo;
|
||||
class MachineModuleInfo;
|
||||
class MachineRegisterInfo;
|
||||
class ProfileSummaryInfo;
|
||||
class raw_ostream;
|
||||
class TargetInstrInfo;
|
||||
class TargetRegisterInfo;
|
||||
@ -39,6 +40,7 @@ class TargetRegisterInfo;
|
||||
bool CommonHoist,
|
||||
MBFIWrapper &FreqInfo,
|
||||
const MachineBranchProbabilityInfo &ProbInfo,
|
||||
ProfileSummaryInfo *PSI,
|
||||
// Min tail length to merge. Defaults to commandline
|
||||
// flag. Ignored for optsize.
|
||||
unsigned MinTailLength = 0);
|
||||
@ -145,6 +147,7 @@ class TargetRegisterInfo;
|
||||
const BlockFrequency Freq) const;
|
||||
void view(const Twine &Name, bool isSimple = true);
|
||||
uint64_t getEntryFreq() const;
|
||||
const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
|
||||
|
||||
private:
|
||||
const MachineBlockFrequencyInfo &MBFI;
|
||||
@ -154,6 +157,7 @@ class TargetRegisterInfo;
|
||||
private:
|
||||
MBFIWrapper &MBBFreqInfo;
|
||||
const MachineBranchProbabilityInfo &MBPI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
bool TailMergeBlocks(MachineFunction &MF);
|
||||
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
|
||||
|
@ -90,6 +90,7 @@
|
||||
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
@ -256,6 +257,7 @@ class TypePromotionTransaction;
|
||||
const LoopInfo *LI;
|
||||
std::unique_ptr<BlockFrequencyInfo> BFI;
|
||||
std::unique_ptr<BranchProbabilityInfo> BPI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
/// As we scan instructions optimizing them, this is the next instruction
|
||||
/// to optimize. Transforms that can invalidate this should update it.
|
||||
@ -298,7 +300,7 @@ class TypePromotionTransaction;
|
||||
/// Keep track of SExt promoted.
|
||||
ValueToSExts ValToSExtendedUses;
|
||||
|
||||
/// True if optimizing for size.
|
||||
/// True if the function has the OptSize attribute.
|
||||
bool OptSize;
|
||||
|
||||
/// DataLayout for the Function being processed.
|
||||
@ -435,10 +437,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
BPI.reset(new BranchProbabilityInfo(F, *LI));
|
||||
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
|
||||
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
OptSize = F.hasOptSize();
|
||||
|
||||
ProfileSummaryInfo *PSI =
|
||||
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
if (ProfileGuidedSectionPrefix) {
|
||||
if (PSI->isFunctionHotInCallGraph(&F, *BFI))
|
||||
F.setSectionPrefix(".hot");
|
||||
@ -457,7 +457,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
||||
// bypassSlowDivision may create new BBs, but we don't want to reapply the
|
||||
// optimization to those blocks.
|
||||
BasicBlock* Next = BB->getNextNode();
|
||||
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
|
||||
// F.hasOptSize is already checked in the outer if statement.
|
||||
if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
|
||||
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
|
||||
BB = Next;
|
||||
}
|
||||
}
|
||||
@ -1938,7 +1940,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
|
||||
// cold block. This interacts with our handling for loads and stores to
|
||||
// ensure that we can fold all uses of a potential addressing computation
|
||||
// into their uses. TODO: generalize this to work over profiling data
|
||||
if (!OptSize && CI->hasFnAttr(Attribute::Cold))
|
||||
bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get());
|
||||
if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
|
||||
for (auto &Arg : CI->arg_operands()) {
|
||||
if (!Arg->getType()->isPointerTy())
|
||||
continue;
|
||||
@ -2875,16 +2878,24 @@ class AddressingModeMatcher {
|
||||
/// When true, IsProfitableToFoldIntoAddressingMode always returns true.
|
||||
bool IgnoreProfitability;
|
||||
|
||||
/// True if we are optimizing for size.
|
||||
bool OptSize;
|
||||
|
||||
ProfileSummaryInfo *PSI;
|
||||
BlockFrequencyInfo *BFI;
|
||||
|
||||
AddressingModeMatcher(
|
||||
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
|
||||
const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
|
||||
ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
|
||||
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
|
||||
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
|
||||
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
|
||||
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
|
||||
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
|
||||
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
|
||||
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
|
||||
PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
|
||||
PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
|
||||
OptSize(OptSize), PSI(PSI), BFI(BFI) {
|
||||
IgnoreProfitability = false;
|
||||
}
|
||||
|
||||
@ -2902,12 +2913,14 @@ public:
|
||||
const TargetLowering &TLI, const TargetRegisterInfo &TRI,
|
||||
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
|
||||
TypePromotionTransaction &TPT,
|
||||
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
|
||||
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
|
||||
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
|
||||
ExtAddrMode Result;
|
||||
|
||||
bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
|
||||
MemoryInst, Result, InsertedInsts,
|
||||
PromotedInsts, TPT, LargeOffsetGEP)
|
||||
PromotedInsts, TPT, LargeOffsetGEP,
|
||||
OptSize, PSI, BFI)
|
||||
.matchAddr(V, 0);
|
||||
(void)Success; assert(Success && "Couldn't select *anything*?");
|
||||
return Result;
|
||||
@ -4518,7 +4531,8 @@ static bool FindAllMemoryUses(
|
||||
Instruction *I,
|
||||
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
|
||||
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
|
||||
const TargetRegisterInfo &TRI, int SeenInsts = 0) {
|
||||
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI, int SeenInsts = 0) {
|
||||
// If we already considered this instruction, we're done.
|
||||
if (!ConsideredInsts.insert(I).second)
|
||||
return false;
|
||||
@ -4527,8 +4541,6 @@ static bool FindAllMemoryUses(
|
||||
if (!MightBeFoldableInst(I))
|
||||
return true;
|
||||
|
||||
const bool OptSize = I->getFunction()->hasOptSize();
|
||||
|
||||
// Loop over all the uses, recursively processing them.
|
||||
for (Use &U : I->uses()) {
|
||||
// Conservatively return true if we're seeing a large number or a deep chain
|
||||
@ -4569,7 +4581,9 @@ static bool FindAllMemoryUses(
|
||||
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
|
||||
// If this is a cold call, we can sink the addressing calculation into
|
||||
// the cold path. See optimizeCallInst
|
||||
if (!OptSize && CI->hasFnAttr(Attribute::Cold))
|
||||
bool OptForSize = OptSize ||
|
||||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
|
||||
if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
|
||||
continue;
|
||||
|
||||
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
|
||||
@ -4581,8 +4595,8 @@ static bool FindAllMemoryUses(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
|
||||
SeenInsts))
|
||||
if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
|
||||
PSI, BFI, SeenInsts))
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -4670,7 +4684,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
|
||||
// the use is just a particularly nice way of sinking it.
|
||||
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
|
||||
SmallPtrSet<Instruction*, 16> ConsideredInsts;
|
||||
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
|
||||
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
|
||||
PSI, BFI))
|
||||
return false; // Has a non-memory, non-foldable use!
|
||||
|
||||
// Now that we know that all uses of this instruction are part of a chain of
|
||||
@ -4706,7 +4721,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
|
||||
TPT.getRestorationPoint();
|
||||
AddressingModeMatcher Matcher(
|
||||
MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
|
||||
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
|
||||
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
|
||||
Matcher.IgnoreProfitability = true;
|
||||
bool Success = Matcher.matchAddr(Address, 0);
|
||||
(void)Success; assert(Success && "Couldn't select *anything*?");
|
||||
@ -4812,7 +4827,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
0);
|
||||
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
|
||||
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
|
||||
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
|
||||
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
|
||||
BFI.get());
|
||||
|
||||
GetElementPtrInst *GEP = LargeOffsetGEP.first;
|
||||
if (GEP && !NewGEPBases.count(GEP)) {
|
||||
@ -6030,7 +6046,9 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
|
||||
/// turn it into a branch.
|
||||
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
|
||||
// If branch conversion isn't desirable, exit early.
|
||||
if (DisableSelectToBranch || OptSize || !TLI)
|
||||
if (DisableSelectToBranch ||
|
||||
OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) ||
|
||||
!TLI)
|
||||
return false;
|
||||
|
||||
// Find all consecutive select instructions that share the same condition.
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
@ -21,6 +23,7 @@
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -721,7 +724,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
|
||||
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
|
||||
/// ret i32 %phi.res
|
||||
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
|
||||
const TargetLowering *TLI, const DataLayout *DL) {
|
||||
const TargetLowering *TLI, const DataLayout *DL,
|
||||
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
|
||||
NumMemCmpCalls++;
|
||||
|
||||
// Early exit from expansion if -Oz.
|
||||
@ -742,18 +746,20 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
|
||||
// TTI call to check if target would like to expand memcmp. Also, get the
|
||||
// available load sizes.
|
||||
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
|
||||
auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
|
||||
bool OptForSize = CI->getFunction()->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
|
||||
auto Options = TTI->enableMemCmpExpansion(OptForSize,
|
||||
IsUsedForZeroCmp);
|
||||
if (!Options) return false;
|
||||
|
||||
if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
|
||||
Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
|
||||
|
||||
if (CI->getFunction()->hasOptSize() &&
|
||||
if (OptForSize &&
|
||||
MaxLoadsPerMemcmpOptSize.getNumOccurrences())
|
||||
Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
|
||||
|
||||
if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
|
||||
if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
|
||||
Options.MaxNumLoads = MaxLoadsPerMemcmp;
|
||||
|
||||
MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
|
||||
@ -799,7 +805,11 @@ public:
|
||||
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
|
||||
const TargetTransformInfo *TTI =
|
||||
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||||
auto PA = runImpl(F, TLI, TTI, TL);
|
||||
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
|
||||
return !PA.areAllPreserved();
|
||||
}
|
||||
|
||||
@ -807,22 +817,26 @@ private:
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
|
||||
const TargetTransformInfo *TTI,
|
||||
const TargetLowering* TL);
|
||||
const TargetLowering* TL,
|
||||
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
|
||||
// Returns true if a change was made.
|
||||
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
|
||||
const TargetTransformInfo *TTI, const TargetLowering* TL,
|
||||
const DataLayout& DL);
|
||||
const DataLayout& DL, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI);
|
||||
};
|
||||
|
||||
bool ExpandMemCmpPass::runOnBlock(
|
||||
BasicBlock &BB, const TargetLibraryInfo *TLI,
|
||||
const TargetTransformInfo *TTI, const TargetLowering* TL,
|
||||
const DataLayout& DL) {
|
||||
const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
|
||||
for (Instruction& I : BB) {
|
||||
CallInst *CI = dyn_cast<CallInst>(&I);
|
||||
if (!CI) {
|
||||
@ -831,7 +845,7 @@ bool ExpandMemCmpPass::runOnBlock(
|
||||
LibFunc Func;
|
||||
if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
|
||||
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
|
||||
expandMemCmp(CI, TTI, TL, &DL)) {
|
||||
expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -841,11 +855,12 @@ bool ExpandMemCmpPass::runOnBlock(
|
||||
|
||||
PreservedAnalyses ExpandMemCmpPass::runImpl(
|
||||
Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
|
||||
const TargetLowering* TL) {
|
||||
const TargetLowering* TL, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI) {
|
||||
const DataLayout& DL = F.getParent()->getDataLayout();
|
||||
bool MadeChanges = false;
|
||||
for (auto BBIt = F.begin(); BBIt != F.end();) {
|
||||
if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) {
|
||||
if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
|
||||
MadeChanges = true;
|
||||
// If changes were made, restart the function from the beginning, since
|
||||
// the structure of the function was changed.
|
||||
@ -864,6 +879,8 @@ INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
|
||||
"Expand memcmp() to load/stores", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
|
||||
"Expand memcmp() to load/stores", false, false)
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "llvm/ADT/SparseSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
@ -213,6 +214,7 @@ namespace {
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
@ -434,6 +436,7 @@ char &llvm::IfConverterID = IfConverter::ID;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
|
||||
|
||||
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
|
||||
@ -446,6 +449,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
|
||||
TRI = ST.getRegisterInfo();
|
||||
BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
|
||||
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
|
||||
ProfileSummaryInfo *PSI =
|
||||
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
MRI = &MF.getRegInfo();
|
||||
SchedModel.init(&ST);
|
||||
|
||||
@ -456,7 +461,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool BFChange = false;
|
||||
if (!PreRegAlloc) {
|
||||
// Tail merge tend to expose more if-conversion opportunities.
|
||||
BranchFolder BF(true, false, MBFI, *MBPI);
|
||||
BranchFolder BF(true, false, MBFI, *MBPI, PSI);
|
||||
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
|
||||
BFChange = BF.OptimizeFunction(
|
||||
MF, TII, ST.getRegisterInfo(),
|
||||
@ -598,7 +603,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
|
||||
BBAnalysis.clear();
|
||||
|
||||
if (MadeChange && IfCvtBranchFold) {
|
||||
BranchFolder BF(false, false, MBFI, *MBPI);
|
||||
BranchFolder BF(false, false, MBFI, *MBPI, PSI);
|
||||
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
|
||||
BF.OptimizeFunction(
|
||||
MF, TII, MF.getSubtarget().getRegisterInfo(),
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||
@ -41,6 +42,7 @@
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/TailDuplicator.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
@ -363,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||
/// A handle to the post dominator tree.
|
||||
MachinePostDominatorTree *MPDT;
|
||||
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
/// Duplicator used to duplicate tails during placement.
|
||||
///
|
||||
/// Placement decisions can open up new tail duplication opportunities, but
|
||||
@ -538,6 +542,7 @@ public:
|
||||
if (TailDupPlacement)
|
||||
AU.addRequired<MachinePostDominatorTree>();
|
||||
AU.addRequired<MachineLoopInfo>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<TargetPassConfig>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
@ -555,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
|
||||
"Branch Probability Basic Block Placement", false, false)
|
||||
|
||||
@ -2075,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
|
||||
// i.e. when the layout predecessor does not fallthrough to the loop header.
|
||||
// In practice this never happens though: there always seems to be a preheader
|
||||
// that can fallthrough and that is also placed before the header.
|
||||
if (F->getFunction().hasOptSize())
|
||||
bool OptForSize = F->getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(L.getHeader(), PSI,
|
||||
&MBFI->getMBFI());
|
||||
if (OptForSize)
|
||||
return L.getHeader();
|
||||
|
||||
MachineBasicBlock *OldTop = nullptr;
|
||||
@ -2831,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() {
|
||||
if (Freq < (LoopHeaderFreq * ColdProb))
|
||||
continue;
|
||||
|
||||
// If the global profiles indicates so, don't align it.
|
||||
if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
|
||||
!TLI->alignLoopsWithOptSize())
|
||||
continue;
|
||||
|
||||
// Check for the existence of a non-layout predecessor which would benefit
|
||||
// from aligning this block.
|
||||
MachineBasicBlock *LayoutPred =
|
||||
@ -3038,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = MF.getSubtarget().getInstrInfo();
|
||||
TLI = MF.getSubtarget().getTargetLowering();
|
||||
MPDT = nullptr;
|
||||
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
|
||||
// Initialize PreferredLoopExit to nullptr here since it may never be set if
|
||||
// there are no MachineLoops.
|
||||
@ -3068,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
if (allowTailDupPlacement()) {
|
||||
MPDT = &getAnalysis<MachinePostDominatorTree>();
|
||||
if (MF.getFunction().hasOptSize())
|
||||
bool OptForSize = MF.getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
|
||||
if (OptForSize)
|
||||
TailDupSize = 1;
|
||||
bool PreRegAlloc = false;
|
||||
TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
|
||||
TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
|
||||
/* LayoutMode */ true, TailDupSize);
|
||||
precomputeTriangleChains();
|
||||
}
|
||||
|
||||
@ -3087,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (MF.size() > 3 && EnableTailMerge) {
|
||||
unsigned TailMergeSize = TailDupSize + 1;
|
||||
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
|
||||
*MBPI, TailMergeSize);
|
||||
*MBPI, PSI, TailMergeSize);
|
||||
|
||||
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
|
||||
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
|
||||
|
@ -12,11 +12,14 @@
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/MachineTraceMetrics.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
@ -67,6 +70,8 @@ class MachineCombiner : public MachineFunctionPass {
|
||||
MachineLoopInfo *MLI; // Current MachineLoopInfo
|
||||
MachineTraceMetrics *Traces;
|
||||
MachineTraceMetrics::Ensemble *MinInstr;
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
TargetSchedModel TSchedModel;
|
||||
|
||||
@ -83,7 +88,7 @@ public:
|
||||
StringRef getPassName() const override { return "Machine InstCombiner"; }
|
||||
|
||||
private:
|
||||
bool doSubstitute(unsigned NewSize, unsigned OldSize);
|
||||
bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
|
||||
bool combineInstructions(MachineBasicBlock *);
|
||||
MachineInstr *getOperandDef(const MachineOperand &MO);
|
||||
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
|
||||
@ -132,6 +137,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addPreserved<MachineLoopInfo>();
|
||||
AU.addRequired<MachineTraceMetrics>();
|
||||
AU.addPreserved<MachineTraceMetrics>();
|
||||
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
@ -409,8 +416,9 @@ bool MachineCombiner::preservesResourceLen(
|
||||
|
||||
/// \returns true when new instruction sequence should be generated
|
||||
/// independent if it lengthens critical path or not
|
||||
bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
|
||||
if (OptSize && (NewSize < OldSize))
|
||||
bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
|
||||
bool OptForSize) {
|
||||
if (OptForSize && (NewSize < OldSize))
|
||||
return true;
|
||||
if (!TSchedModel.hasInstrSchedModelOrItineraries())
|
||||
return true;
|
||||
@ -508,6 +516,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
|
||||
SparseSet<LiveRegUnit> RegUnits;
|
||||
RegUnits.setUniverse(TRI->getNumRegUnits());
|
||||
|
||||
bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
|
||||
|
||||
while (BlockIter != MBB->end()) {
|
||||
auto &MI = *BlockIter++;
|
||||
SmallVector<MachineCombinerPattern, 16> Patterns;
|
||||
@ -584,7 +594,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
|
||||
// fewer instructions OR
|
||||
// the new sequence neither lengthens the critical path nor increases
|
||||
// resource pressure.
|
||||
if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
|
||||
if (SubstituteAlways ||
|
||||
doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
|
||||
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
|
||||
RegUnits, IncrementalUpdate);
|
||||
// Eagerly stop after the first pattern fires.
|
||||
@ -639,6 +650,10 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
|
||||
MRI = &MF.getRegInfo();
|
||||
MLI = &getAnalysis<MachineLoopInfo>();
|
||||
Traces = &getAnalysis<MachineTraceMetrics>();
|
||||
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
MBFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
MinInstr = nullptr;
|
||||
OptSize = MF.getFunction().hasOptSize();
|
||||
|
||||
|
@ -27,8 +27,10 @@
|
||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/FastISel.h"
|
||||
@ -334,6 +336,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
if (UseMBPI && OptLevel != CodeGenOpt::None)
|
||||
AU.addRequired<BranchProbabilityInfoWrapperPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
@ -436,14 +440,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
|
||||
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
|
||||
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
|
||||
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
|
||||
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
|
||||
|
||||
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
|
||||
|
||||
CurDAG->init(*MF, *ORE, this, LibInfo,
|
||||
getAnalysisIfAvailable<LegacyDivergenceAnalysis>(),
|
||||
nullptr, nullptr);
|
||||
getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
|
||||
FuncInfo->set(Fn, *MF, CurDAG);
|
||||
SwiftError->setFunction(*MF);
|
||||
|
||||
|
@ -12,6 +12,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
@ -38,6 +40,8 @@ public:
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
@ -75,7 +79,11 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
|
||||
return false;
|
||||
|
||||
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
|
||||
Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false);
|
||||
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
|
||||
|
||||
bool MadeChange = false;
|
||||
while (Duplicator.tailDuplicateBlocks())
|
||||
|
@ -19,13 +19,16 @@
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/MachineSSAUpdater.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
@ -77,6 +80,8 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
|
||||
|
||||
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
|
||||
const MachineBranchProbabilityInfo *MBPIin,
|
||||
const MachineBlockFrequencyInfo *MBFIin,
|
||||
ProfileSummaryInfo *PSIin,
|
||||
bool LayoutModeIn, unsigned TailDupSizeIn) {
|
||||
MF = &MFin;
|
||||
TII = MF->getSubtarget().getInstrInfo();
|
||||
@ -84,6 +89,8 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
|
||||
MRI = &MF->getRegInfo();
|
||||
MMI = &MF->getMMI();
|
||||
MBPI = MBPIin;
|
||||
MBFI = MBFIin;
|
||||
PSI = PSIin;
|
||||
TailDupSize = TailDupSizeIn;
|
||||
|
||||
assert(MBPI != nullptr && "Machine Branch Probability Info required");
|
||||
@ -555,14 +562,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
|
||||
// duplicate only one, because one branch instruction can be eliminated to
|
||||
// compensate for the duplication.
|
||||
unsigned MaxDuplicateCount;
|
||||
if (TailDupSize == 0 &&
|
||||
TailDuplicateSize.getNumOccurrences() == 0 &&
|
||||
MF->getFunction().hasOptSize())
|
||||
MaxDuplicateCount = 1;
|
||||
else if (TailDupSize == 0)
|
||||
bool OptForSize = MF->getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
|
||||
if (TailDupSize == 0)
|
||||
MaxDuplicateCount = TailDuplicateSize;
|
||||
else
|
||||
MaxDuplicateCount = TailDupSize;
|
||||
if (OptForSize)
|
||||
MaxDuplicateCount = 1;
|
||||
|
||||
// If the block to be duplicated ends in an unanalyzable fallthrough, don't
|
||||
// duplicate it.
|
||||
|
@ -48,11 +48,14 @@
|
||||
#include "X86InstrInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
@ -113,6 +116,8 @@ public:
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineLoopInfo>(); // Machine loop info is used to
|
||||
// guide some heuristics.
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
@ -140,6 +145,9 @@ private:
|
||||
|
||||
/// Register Liveness information after the current instruction.
|
||||
LivePhysRegs LiveRegs;
|
||||
|
||||
ProfileSummaryInfo *PSI;
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
};
|
||||
char FixupBWInstPass::ID = 0;
|
||||
}
|
||||
@ -154,8 +162,11 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
this->MF = &MF;
|
||||
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
||||
OptForSize = MF.getFunction().hasOptSize();
|
||||
MLI = &getAnalysis<MachineLoopInfo>();
|
||||
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
MBFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
LiveRegs.init(TII->getRegisterInfo());
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Start X86FixupBWInsts\n";);
|
||||
@ -426,6 +437,9 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
|
||||
// We run after PEI, so we need to AddPristinesAndCSRs.
|
||||
LiveRegs.addLiveOuts(MBB);
|
||||
|
||||
OptForSize = MF.getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
|
||||
|
||||
for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
|
||||
MachineInstr *MI = &*I;
|
||||
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include "llvm/ADT/Hashing.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
@ -32,6 +34,7 @@
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
@ -247,6 +250,12 @@ public:
|
||||
|
||||
static char ID;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
private:
|
||||
using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
|
||||
|
||||
@ -681,6 +690,11 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
MRI = &MF.getRegInfo();
|
||||
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
||||
TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
|
||||
auto *PSI =
|
||||
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
|
||||
// Process all basic blocks.
|
||||
for (auto &MBB : MF) {
|
||||
@ -699,7 +713,9 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
// Remove redundant address calculations. Do it only for -Os/-Oz since only
|
||||
// a code size gain is expected from this part of the pass.
|
||||
if (MF.getFunction().hasOptSize())
|
||||
bool OptForSize = MF.getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
|
||||
if (OptForSize)
|
||||
Changed |= removeRedundantAddrCalc(LEAs);
|
||||
}
|
||||
|
||||
|
@ -17,8 +17,11 @@
|
||||
#include "X86InstrInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/TargetSchedule.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
@ -52,6 +55,12 @@ namespace {
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
MachineFunctionProperties getRequiredProperties() const override {
|
||||
return MachineFunctionProperties().set(
|
||||
MachineFunctionProperties::Property::NoVRegs);
|
||||
@ -105,6 +114,12 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
TSM.init(&MF.getSubtarget());
|
||||
|
||||
auto *PSI =
|
||||
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
|
||||
// Search through basic blocks and mark the ones that have early returns
|
||||
ReturnBBs.clear();
|
||||
VisitedBBs.clear();
|
||||
@ -118,6 +133,11 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineBasicBlock *MBB = I->first;
|
||||
unsigned Cycles = I->second;
|
||||
|
||||
// Function::hasOptSize is already checked above.
|
||||
bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
|
||||
if (OptForSize)
|
||||
continue;
|
||||
|
||||
if (Cycles < Threshold) {
|
||||
// BB ends in a return. Skip over any DBG_VALUE instructions
|
||||
// trailing the terminator.
|
||||
|
@ -11,6 +11,7 @@
|
||||
; CHECK-NEXT: Scoped NoAlias Alias Analysis
|
||||
; CHECK-NEXT: Assumption Cache Tracker
|
||||
; CHECK-NEXT: Create Garbage Collector Module Metadata
|
||||
; CHECK-NEXT: Profile summary info
|
||||
; CHECK-NEXT: Machine Branch Probability Analysis
|
||||
; CHECK-NEXT: ModulePass Manager
|
||||
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
|
||||
@ -45,6 +46,10 @@
|
||||
; CHECK-NEXT: Analysis for ComputingKnownBits
|
||||
; CHECK-NEXT: InstructionSelect
|
||||
; CHECK-NEXT: ResetMachineFunction
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: AArch64 Instruction Selection
|
||||
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
|
||||
; CHECK-NEXT: Local Stack Slot Allocation
|
||||
|
@ -10,8 +10,8 @@
|
||||
; CHECK-NEXT: Assumption Cache Tracker
|
||||
; CHECK-NEXT: Type-Based Alias Analysis
|
||||
; CHECK-NEXT: Scoped NoAlias Alias Analysis
|
||||
; CHECK-NEXT: Create Garbage Collector Module Metadata
|
||||
; CHECK-NEXT: Profile summary info
|
||||
; CHECK-NEXT: Create Garbage Collector Module Metadata
|
||||
; CHECK-NEXT: Machine Branch Probability Analysis
|
||||
; CHECK-NEXT: ModulePass Manager
|
||||
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
|
||||
@ -35,6 +35,9 @@
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Merge contiguous icmps into a memcmp
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Expand memcmp() to load/stores
|
||||
; CHECK-NEXT: Lower Garbage Collection Instructions
|
||||
; CHECK-NEXT: Shadow Stack GC Lowering
|
||||
@ -78,10 +81,13 @@
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: AArch64 Instruction Selection
|
||||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
; CHECK-NEXT: AArch64 Local Dynamic TLS Access Clean-up
|
||||
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Early Tail Duplication
|
||||
; CHECK-NEXT: Optimize machine instruction PHIs
|
||||
; CHECK-NEXT: Slot index numbering
|
||||
@ -93,6 +99,7 @@
|
||||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
; CHECK-NEXT: Machine Trace Metrics
|
||||
; CHECK-NEXT: AArch64 Conditional Compares
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine InstCombiner
|
||||
; CHECK-NEXT: AArch64 Conditional Branch Tuning
|
||||
; CHECK-NEXT: Machine Trace Metrics
|
||||
@ -149,6 +156,7 @@
|
||||
; CHECK-NEXT: Shrink Wrapping analysis
|
||||
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; CHECK-NEXT: Control Flow Optimizer
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Tail Duplication
|
||||
; CHECK-NEXT: Machine Copy Propagation Pass
|
||||
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
|
||||
|
@ -19,6 +19,9 @@
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Merge contiguous icmps into a memcmp
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Expand memcmp() to load/stores
|
||||
; CHECK-NEXT: Lower Garbage Collection Instructions
|
||||
; CHECK-NEXT: Shadow Stack GC Lowering
|
||||
@ -67,8 +70,11 @@
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: ARM Instruction Selection
|
||||
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Early Tail Duplication
|
||||
; CHECK-NEXT: Optimize machine instruction PHIs
|
||||
; CHECK-NEXT: Slot index numbering
|
||||
@ -124,6 +130,7 @@
|
||||
; CHECK-NEXT: Shrink Wrapping analysis
|
||||
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; CHECK-NEXT: Control Flow Optimizer
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Tail Duplication
|
||||
; CHECK-NEXT: Machine Copy Propagation Pass
|
||||
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
|
||||
|
@ -14,6 +14,7 @@
|
||||
; CHECK-NEXT: Scoped NoAlias Alias Analysis
|
||||
; CHECK-NEXT: Assumption Cache Tracker
|
||||
; CHECK-NEXT: Create Garbage Collector Module Metadata
|
||||
; CHECK-NEXT: Profile summary info
|
||||
; CHECK-NEXT: Machine Branch Probability Analysis
|
||||
; CHECK-NEXT: ModulePass Manager
|
||||
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
|
||||
@ -37,6 +38,10 @@
|
||||
; CHECK-NEXT: Safe Stack instrumentation pass
|
||||
; CHECK-NEXT: Insert stack protectors
|
||||
; CHECK-NEXT: Module Verifier
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: X86 DAG->DAG Instruction Selection
|
||||
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
|
||||
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
|
||||
|
@ -13,8 +13,8 @@
|
||||
; CHECK-NEXT: Type-Based Alias Analysis
|
||||
; CHECK-NEXT: Scoped NoAlias Alias Analysis
|
||||
; CHECK-NEXT: Assumption Cache Tracker
|
||||
; CHECK-NEXT: Create Garbage Collector Module Metadata
|
||||
; CHECK-NEXT: Profile summary info
|
||||
; CHECK-NEXT: Create Garbage Collector Module Metadata
|
||||
; CHECK-NEXT: Machine Branch Probability Analysis
|
||||
; CHECK-NEXT: ModulePass Manager
|
||||
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
|
||||
@ -32,6 +32,9 @@
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Merge contiguous icmps into a memcmp
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Expand memcmp() to load/stores
|
||||
; CHECK-NEXT: Lower Garbage Collection Instructions
|
||||
; CHECK-NEXT: Shadow Stack GC Lowering
|
||||
@ -64,12 +67,15 @@
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Natural Loop Information
|
||||
; CHECK-NEXT: Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: X86 DAG->DAG Instruction Selection
|
||||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
; CHECK-NEXT: Local Dynamic TLS Access Clean-up
|
||||
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
|
||||
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
|
||||
; CHECK-NEXT: X86 Domain Reassignment Pass
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Early Tail Duplication
|
||||
; CHECK-NEXT: Optimize machine instruction PHIs
|
||||
; CHECK-NEXT: Slot index numbering
|
||||
@ -80,6 +86,7 @@
|
||||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
; CHECK-NEXT: Machine Trace Metrics
|
||||
; CHECK-NEXT: Early If-Conversion
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine InstCombiner
|
||||
; CHECK-NEXT: X86 cmov Conversion
|
||||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
@ -94,6 +101,7 @@
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
; CHECK-NEXT: Live Range Shrink
|
||||
; CHECK-NEXT: X86 Fixup SetCC
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: X86 LEA Optimize
|
||||
; CHECK-NEXT: X86 Optimize Call Frame
|
||||
; CHECK-NEXT: X86 Avoid Store Forwarding Block
|
||||
@ -139,6 +147,7 @@
|
||||
; CHECK-NEXT: Shrink Wrapping analysis
|
||||
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
|
||||
; CHECK-NEXT: Control Flow Optimizer
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Tail Duplication
|
||||
; CHECK-NEXT: Machine Copy Propagation Pass
|
||||
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
|
||||
@ -157,7 +166,9 @@
|
||||
; CHECK-NEXT: X86 vzeroupper inserter
|
||||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: X86 Byte/Word Instruction Fixup
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: X86 Atom pad short functions
|
||||
; CHECK-NEXT: X86 LEA Fixup
|
||||
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible
|
||||
|
Loading…
Reference in New Issue
Block a user