1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[PGO][PGSO] Instrument the code gen / target passes.

Summary:
Split off of D67120.

Add the profile guided size optimization instrumentation / queries in the code
gen or target passes. This doesn't enable the size optimizations in those passes
yet as they are currently disabled in shouldOptimizeForSize (for non-IR pass
queries).

A second try after reverted D71072.

Reviewers: davidxl

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71149
This commit is contained in:
Hiroshi Yamauchi 2019-12-05 09:39:37 -08:00
parent e7c754ad96
commit 1aa49b7b60
22 changed files with 290 additions and 61 deletions

View File

@ -48,6 +48,7 @@ class GlobalObject;
class GlobalValue;
class GlobalVariable;
class MachineBasicBlock;
class MachineBlockFrequencyInfo;
class MachineConstantPoolValue;
class MachineDominatorTree;
class MachineFunction;
@ -69,6 +70,7 @@ class MCSymbol;
class MCTargetOptions;
class MDNode;
class Module;
class ProfileSummaryInfo;
class raw_ostream;
class RemarkStreamer;
class StackMaps;
@ -108,6 +110,10 @@ public:
/// Optimization remark emitter.
MachineOptimizationRemarkEmitter *ORE;
MachineBlockFrequencyInfo *MBFI;
ProfileSummaryInfo *PSI;
/// The symbol for the current function. This is recalculated at the beginning
/// of each call to runOnMachineFunction().
MCSymbol *CurrentFnSym = nullptr;

View File

@ -182,6 +182,10 @@ public:
}
}
MachineBlockFrequencyInfo *getBFI() {
return MBFI;
}
private:
MachineFunction &MF;

View File

@ -25,11 +25,13 @@
namespace llvm {
class MachineBasicBlock;
class MachineBlockFrequencyInfo;
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineInstr;
class MachineModuleInfo;
class MachineRegisterInfo;
class ProfileSummaryInfo;
class TargetRegisterInfo;
/// Utility class to perform tail duplication.
@ -40,6 +42,8 @@ class TailDuplicator {
const MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
MachineFunction *MF;
const MachineBlockFrequencyInfo *MBFI;
ProfileSummaryInfo *PSI;
bool PreRegAlloc;
bool LayoutMode;
unsigned TailDupSize;
@ -65,6 +69,8 @@ public:
/// default implies using the command line value TailDupSize.
void initMF(MachineFunction &MF, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPI,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI,
bool LayoutMode, unsigned TailDupSize = 0);
bool tailDuplicateBlocks();

View File

@ -31,13 +31,16 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@ -52,6 +55,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@ -248,6 +252,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@ -1684,6 +1690,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MBFI = (PSI && PSI->hasProfileSummary()) ?
// ORE conditionally computes MBFI. If available, use it, otherwise
// request it.
(ORE->getBFI() ? ORE->getBFI() :
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) :
nullptr;
}
namespace {
@ -2913,8 +2926,10 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
MCCodePaddingContext &Context) const {
assert(MF != nullptr && "Machine function must be valid");
bool OptForSize = MF->getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
Context.IsPaddingActive = !MF->hasInlineAsm() &&
!MF->getFunction().hasOptSize() &&
!OptForSize &&
TM.getOptLevel() != CodeGenOpt::None;
Context.IsBasicBlockReachableViaFallthrough =
std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=

View File

@ -24,6 +24,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@ -38,6 +39,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@ -103,6 +105,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -129,7 +132,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::MBFIWrapper MBBFreqInfo(
getAnalysis<MachineBlockFrequencyInfo>());
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
getAnalysis<MachineBranchProbabilityInfo>());
getAnalysis<MachineBranchProbabilityInfo>(),
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
return Folder.OptimizeFunction(
MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
@ -139,9 +143,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
ProfileSummaryInfo *PSI,
unsigned MinTailLength)
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
if (MinCommonTailLength == 0)
MinCommonTailLength = TailMergeSize;
switch (FlagEnableTailMerge) {
@ -585,7 +590,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB,
DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
bool AfterPlacement) {
bool AfterPlacement,
BranchFolder::MBFIWrapper &MBBFreqInfo,
ProfileSummaryInfo *PSI) {
// It is never profitable to tail-merge blocks from two different EH scopes.
if (!EHScopeMembership.empty()) {
auto EHScope1 = EHScopeMembership.find(MBB1);
@ -682,7 +689,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
bool OptForSize =
MF->getFunction().hasOptSize() ||
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
return EffectiveTailLen >= 2 && OptForSize &&
(FullBlockTail1 || FullBlockTail2);
}
@ -704,7 +715,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
CommonTailLen, TrialBBI1, TrialBBI2,
SuccBB, PredBB,
EHScopeMembership,
AfterBlockPlacement)) {
AfterBlockPlacement, MBBFreqInfo, PSI)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
@ -1534,8 +1545,10 @@ ReoptimizeBlock:
}
}
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
MF.getFunction().hasOptSize()) {
bool OptForSize =
MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
// performance. Therefore, only consider this for optsize functions.

View File

@ -27,6 +27,7 @@ class MachineFunction;
class MachineLoopInfo;
class MachineModuleInfo;
class MachineRegisterInfo;
class ProfileSummaryInfo;
class raw_ostream;
class TargetInstrInfo;
class TargetRegisterInfo;
@ -39,6 +40,7 @@ class TargetRegisterInfo;
bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
ProfileSummaryInfo *PSI,
// Min tail length to merge. Defaults to commandline
// flag. Ignored for optsize.
unsigned MinTailLength = 0);
@ -145,6 +147,7 @@ class TargetRegisterInfo;
const BlockFrequency Freq) const;
void view(const Twine &Name, bool isSimple = true);
uint64_t getEntryFreq() const;
const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
private:
const MachineBlockFrequencyInfo &MBFI;
@ -154,6 +157,7 @@ class TargetRegisterInfo;
private:
MBFIWrapper &MBBFreqInfo;
const MachineBranchProbabilityInfo &MBPI;
ProfileSummaryInfo *PSI;
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,

View File

@ -90,6 +90,7 @@
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@ -256,6 +257,7 @@ class TypePromotionTransaction;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
ProfileSummaryInfo *PSI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@ -298,7 +300,7 @@ class TypePromotionTransaction;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
/// True if optimizing for size.
/// True if the function has the OptSize attribute.
bool OptSize;
/// DataLayout for the Function being processed.
@ -435,10 +437,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
OptSize = F.hasOptSize();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
@ -457,7 +457,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// bypassSlowDivision may create new BBs, but we don't want to reapply the
// optimization to those blocks.
BasicBlock* Next = BB->getNextNode();
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
// F.hasOptSize is already checked in the outer if statement.
if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
BB = Next;
}
}
@ -1938,7 +1940,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// cold block. This interacts with our handling for loads and stores to
// ensure that we can fold all uses of a potential addressing computation
// into their uses. TODO: generalize this to work over profiling data
if (!OptSize && CI->hasFnAttr(Attribute::Cold))
bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get());
if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
for (auto &Arg : CI->arg_operands()) {
if (!Arg->getType()->isPointerTy())
continue;
@ -2875,16 +2878,24 @@ class AddressingModeMatcher {
/// When true, IsProfitableToFoldIntoAddressingMode always returns true.
bool IgnoreProfitability;
/// True if we are optimizing for size.
bool OptSize;
ProfileSummaryInfo *PSI;
BlockFrequencyInfo *BFI;
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
}
@ -2902,12 +2913,14 @@ public:
const TargetLowering &TLI, const TargetRegisterInfo &TRI,
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
ExtAddrMode Result;
bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT, LargeOffsetGEP)
PromotedInsts, TPT, LargeOffsetGEP,
OptSize, PSI, BFI)
.matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
@ -4518,7 +4531,8 @@ static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, int SeenInsts = 0) {
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, int SeenInsts = 0) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@ -4527,8 +4541,6 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
const bool OptSize = I->getFunction()->hasOptSize();
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
// Conservatively return true if we're seeing a large number or a deep chain
@ -4569,7 +4581,9 @@ static bool FindAllMemoryUses(
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
if (!OptSize && CI->hasFnAttr(Attribute::Cold))
bool OptForSize = OptSize ||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
continue;
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
@ -4581,8 +4595,8 @@ static bool FindAllMemoryUses(
continue;
}
if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
SeenInsts))
if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
PSI, BFI, SeenInsts))
return true;
}
@ -4670,7 +4684,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
PSI, BFI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@ -4706,7 +4721,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
TPT.getRestorationPoint();
AddressingModeMatcher Matcher(
MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@ -4812,7 +4827,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
0);
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
BFI.get());
GetElementPtrInst *GEP = LargeOffsetGEP.first;
if (GEP && !NewGEPBases.count(GEP)) {
@ -6030,7 +6046,9 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// If branch conversion isn't desirable, exit early.
if (DisableSelectToBranch || OptSize || !TLI)
if (DisableSelectToBranch ||
OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) ||
!TLI)
return false;
// Find all consecutive select instructions that share the same condition.

View File

@ -13,6 +13,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@ -21,6 +23,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
using namespace llvm;
@ -721,7 +724,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL) {
const TargetLowering *TLI, const DataLayout *DL,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@ -742,18 +746,20 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
bool OptForSize = CI->getFunction()->hasOptSize() ||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
auto Options = TTI->enableMemCmpExpansion(OptForSize,
IsUsedForZeroCmp);
if (!Options) return false;
if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
if (CI->getFunction()->hasOptSize() &&
if (OptForSize &&
MaxLoadsPerMemcmpOptSize.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmp;
MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
@ -799,7 +805,11 @@ public:
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto PA = runImpl(F, TLI, TTI, TL);
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
nullptr;
auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
return !PA.areAllPreserved();
}
@ -807,22 +817,26 @@ private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
FunctionPass::getAnalysisUsage(AU);
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
const TargetLowering* TL);
const TargetLowering* TL,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
// Returns true if a change was made.
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, const TargetLowering* TL,
const DataLayout& DL);
const DataLayout& DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI);
};
bool ExpandMemCmpPass::runOnBlock(
BasicBlock &BB, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, const TargetLowering* TL,
const DataLayout& DL) {
const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
for (Instruction& I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI) {
@ -831,7 +845,7 @@ bool ExpandMemCmpPass::runOnBlock(
LibFunc Func;
if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
expandMemCmp(CI, TTI, TL, &DL)) {
expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
return true;
}
}
@ -841,11 +855,12 @@ bool ExpandMemCmpPass::runOnBlock(
PreservedAnalyses ExpandMemCmpPass::runImpl(
Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
const TargetLowering* TL) {
const TargetLowering* TL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) {
const DataLayout& DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) {
if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
@ -864,6 +879,8 @@ INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@ -213,6 +214,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -434,6 +436,7 @@ char &llvm::IfConverterID = IfConverter::ID;
INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
@ -446,6 +449,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TRI = ST.getRegisterInfo();
BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MRI = &MF.getRegInfo();
SchedModel.init(&ST);
@ -456,7 +461,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool BFChange = false;
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
BranchFolder BF(true, false, MBFI, *MBPI);
BranchFolder BF(true, false, MBFI, *MBPI, PSI);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
BFChange = BF.OptimizeFunction(
MF, TII, ST.getRegisterInfo(),
@ -598,7 +603,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
BranchFolder BF(false, false, MBFI, *MBPI);
BranchFolder BF(false, false, MBFI, *MBPI, PSI);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
BF.OptimizeFunction(
MF, TII, MF.getSubtarget().getRegisterInfo(),

View File

@ -33,6 +33,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@ -41,6 +42,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@ -363,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// A handle to the post dominator tree.
MachinePostDominatorTree *MPDT;
ProfileSummaryInfo *PSI;
/// Duplicator used to duplicate tails during placement.
///
/// Placement decisions can open up new tail duplication opportunities, but
@ -538,6 +542,7 @@ public:
if (TailDupPlacement)
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -555,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
@ -2075,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// i.e. when the layout predecessor does not fallthrough to the loop header.
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
if (F->getFunction().hasOptSize())
bool OptForSize = F->getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(L.getHeader(), PSI,
&MBFI->getMBFI());
if (OptForSize)
return L.getHeader();
MachineBasicBlock *OldTop = nullptr;
@ -2831,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() {
if (Freq < (LoopHeaderFreq * ColdProb))
continue;
// If the global profiles indicates so, don't align it.
if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
!TLI->alignLoopsWithOptSize())
continue;
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
MachineBasicBlock *LayoutPred =
@ -3038,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MPDT = nullptr;
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
@ -3068,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
if (MF.getFunction().hasOptSize())
bool OptForSize = MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
/* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
@ -3087,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDupSize + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
*MBPI, PSI, TailMergeSize);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),

View File

@ -12,11 +12,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@ -67,6 +70,8 @@ class MachineCombiner : public MachineFunctionPass {
MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
MachineBlockFrequencyInfo *MBFI;
ProfileSummaryInfo *PSI;
TargetSchedModel TSchedModel;
@ -83,7 +88,7 @@ public:
StringRef getPassName() const override { return "Machine InstCombiner"; }
private:
bool doSubstitute(unsigned NewSize, unsigned OldSize);
bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
bool combineInstructions(MachineBasicBlock *);
MachineInstr *getOperandDef(const MachineOperand &MO);
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
@ -132,6 +137,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -409,8 +416,9 @@ bool MachineCombiner::preservesResourceLen(
/// \returns true when new instruction sequence should be generated
/// independent if it lengthens critical path or not
bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
if (OptSize && (NewSize < OldSize))
bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
bool OptForSize) {
if (OptForSize && (NewSize < OldSize))
return true;
if (!TSchedModel.hasInstrSchedModelOrItineraries())
return true;
@ -508,6 +516,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
SparseSet<LiveRegUnit> RegUnits;
RegUnits.setUniverse(TRI->getNumRegUnits());
bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
SmallVector<MachineCombinerPattern, 16> Patterns;
@ -584,7 +594,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
if (SubstituteAlways ||
doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
RegUnits, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
@ -639,6 +650,10 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
MinInstr = nullptr;
OptSize = MF.getFunction().hasOptSize();

View File

@ -27,8 +27,10 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/FastISel.h"
@ -334,6 +336,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -436,14 +440,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
nullptr;
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
CurDAG->init(*MF, *ORE, this, LibInfo,
getAnalysisIfAvailable<LegacyDivergenceAnalysis>(),
nullptr, nullptr);
getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
FuncInfo->set(Fn, *MF, CurDAG);
SwiftError->setFunction(*MF);

View File

@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -38,6 +40,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@ -75,7 +79,11 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
return false;
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false);
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())

View File

@ -19,13 +19,16 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@ -77,6 +80,8 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
const MachineBlockFrequencyInfo *MBFIin,
ProfileSummaryInfo *PSIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;
TII = MF->getSubtarget().getInstrInfo();
@ -84,6 +89,8 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
MRI = &MF->getRegInfo();
MMI = &MF->getMMI();
MBPI = MBPIin;
MBFI = MBFIin;
PSI = PSIin;
TailDupSize = TailDupSizeIn;
assert(MBPI != nullptr && "Machine Branch Probability Info required");
@ -555,14 +562,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (TailDupSize == 0 &&
TailDuplicateSize.getNumOccurrences() == 0 &&
MF->getFunction().hasOptSize())
MaxDuplicateCount = 1;
else if (TailDupSize == 0)
bool OptForSize = MF->getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
if (TailDupSize == 0)
MaxDuplicateCount = TailDuplicateSize;
else
MaxDuplicateCount = TailDupSize;
if (OptForSize)
MaxDuplicateCount = 1;
// If the block to be duplicated ends in an unanalyzable fallthrough, don't
// duplicate it.

View File

@ -48,11 +48,14 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
@ -113,6 +116,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>(); // Machine loop info is used to
// guide some heuristics.
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -140,6 +145,9 @@ private:
/// Register Liveness information after the current instruction.
LivePhysRegs LiveRegs;
ProfileSummaryInfo *PSI;
MachineBlockFrequencyInfo *MBFI;
};
char FixupBWInstPass::ID = 0;
}
@ -154,8 +162,11 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
OptForSize = MF.getFunction().hasOptSize();
MLI = &getAnalysis<MachineLoopInfo>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
LiveRegs.init(TII->getRegisterInfo());
LLVM_DEBUG(dbgs() << "Start X86FixupBWInsts\n";);
@ -426,6 +437,9 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
// We run after PEI, so we need to AddPristinesAndCSRs.
LiveRegs.addLiveOuts(MBB);
OptForSize = MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
MachineInstr *MI = &*I;

View File

@ -25,6 +25,8 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -32,6 +34,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@ -247,6 +250,12 @@ public:
static char ID;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
@ -681,6 +690,11 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
auto *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
// Process all basic blocks.
for (auto &MBB : MF) {
@ -699,7 +713,9 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
// Remove redundant address calculations. Do it only for -Os/-Oz since only
// a code size gain is expected from this part of the pass.
if (MF.getFunction().hasOptSize())
bool OptForSize = MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
if (OptForSize)
Changed |= removeRedundantAddrCalc(LEAs);
}

View File

@ -17,8 +17,11 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Function.h"
@ -52,6 +55,12 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@ -105,6 +114,12 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
TSM.init(&MF.getSubtarget());
auto *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
// Search through basic blocks and mark the ones that have early returns
ReturnBBs.clear();
VisitedBBs.clear();
@ -118,6 +133,11 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = I->first;
unsigned Cycles = I->second;
// Function::hasOptSize is already checked above.
bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
if (OptForSize)
continue;
if (Cycles < Threshold) {
// BB ends in a return. Skip over any DBG_VALUE instructions
// trailing the terminator.

View File

@ -11,6 +11,7 @@
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
@ -45,6 +46,10 @@
; CHECK-NEXT: Analysis for ComputingKnownBits
; CHECK-NEXT: InstructionSelect
; CHECK-NEXT: ResetMachineFunction
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: AArch64 Instruction Selection
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation

View File

@ -10,8 +10,8 @@
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
@ -35,6 +35,9 @@
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Merge contiguous icmps into a memcmp
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
@ -78,10 +81,13 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Branch Probability Analysis
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: AArch64 Instruction Selection
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: AArch64 Local Dynamic TLS Access Clean-up
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
; CHECK-NEXT: Optimize machine instruction PHIs
; CHECK-NEXT: Slot index numbering
@ -93,6 +99,7 @@
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Machine Trace Metrics
; CHECK-NEXT: AArch64 Conditional Compares
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine InstCombiner
; CHECK-NEXT: AArch64 Conditional Branch Tuning
; CHECK-NEXT: Machine Trace Metrics
@ -149,6 +156,7 @@
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Post-RA pseudo instruction expansion pass

View File

@ -19,6 +19,9 @@
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Merge contiguous icmps into a memcmp
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
@ -67,8 +70,11 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Branch Probability Analysis
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: ARM Instruction Selection
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
; CHECK-NEXT: Optimize machine instruction PHIs
; CHECK-NEXT: Slot index numbering
@ -124,6 +130,7 @@
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Post-RA pseudo instruction expansion pass

View File

@ -14,6 +14,7 @@
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
@ -37,6 +38,10 @@
; CHECK-NEXT: Safe Stack instrumentation pass
; CHECK-NEXT: Insert stack protectors
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: X86 DAG->DAG Instruction Selection
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions

View File

@ -13,8 +13,8 @@
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
@ -32,6 +32,9 @@
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Merge contiguous icmps into a memcmp
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Expand memcmp() to load/stores
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
@ -64,12 +67,15 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Branch Probability Analysis
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: X86 DAG->DAG Instruction Selection
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Local Dynamic TLS Access Clean-up
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: X86 Domain Reassignment Pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
; CHECK-NEXT: Optimize machine instruction PHIs
; CHECK-NEXT: Slot index numbering
@ -80,6 +86,7 @@
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Machine Trace Metrics
; CHECK-NEXT: Early If-Conversion
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine InstCombiner
; CHECK-NEXT: X86 cmov Conversion
; CHECK-NEXT: MachineDominator Tree Construction
@ -94,6 +101,7 @@
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: Live Range Shrink
; CHECK-NEXT: X86 Fixup SetCC
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: X86 LEA Optimize
; CHECK-NEXT: X86 Optimize Call Frame
; CHECK-NEXT: X86 Avoid Store Forwarding Block
@ -139,6 +147,7 @@
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
@ -157,7 +166,9 @@
; CHECK-NEXT: X86 vzeroupper inserter
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: X86 Byte/Word Instruction Fixup
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: X86 Atom pad short functions
; CHECK-NEXT: X86 LEA Fixup
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible