2014-12-08 19:02:35 +01:00
|
|
|
//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
|
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-12-08 19:02:35 +01:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2015-11-18 19:14:55 +01:00
|
|
|
// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
|
|
|
|
// It also builds the data structures and initialization code needed for
|
|
|
|
// updating execution counts and emitting the profile at runtime.
|
2014-12-08 19:02:35 +01:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-03-23 23:11:06 +01:00
|
|
|
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
2014-12-08 19:02:35 +01:00
|
|
|
#include "llvm/ADT/Triple.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/ADT/Twine.h"
|
2019-02-27 18:24:33 +01:00
|
|
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
|
|
|
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2016-11-21 12:57:19 +01:00
|
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Attributes.h"
|
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/Constant.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/IR/Dominators.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
2017-06-06 13:49:48 +02:00
|
|
|
#include "llvm/IR/IRBuilder.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
2014-12-08 19:02:35 +01:00
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Type.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-13 22:15:01 +01:00
|
|
|
#include "llvm/InitializePasses.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/Pass.h"
|
2015-11-18 19:14:55 +01:00
|
|
|
#include "llvm/ProfileData/InstrProf.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
#include "llvm/Support/Error.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
2014-12-08 19:02:35 +01:00
|
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <string>
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "instrprof"
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2017-01-11 21:19:41 +01:00
|
|
|
cl::opt<bool> DoHashBasedCounterSplit(
|
|
|
|
"hash-based-counter-split",
|
|
|
|
cl::desc("Rename counter variable of a comdat function based on cfg hash"),
|
|
|
|
cl::init(true));
|
|
|
|
|
2019-10-04 22:29:56 +02:00
|
|
|
cl::opt<bool> RuntimeCounterRelocation(
|
|
|
|
"runtime-counter-relocation",
|
|
|
|
cl::desc("Enable relocating counters at runtime."),
|
|
|
|
cl::init(false));
|
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
cl::opt<bool> ValueProfileStaticAlloc(
|
|
|
|
"vp-static-alloc",
|
|
|
|
cl::desc("Do static counter allocation for value profiler"),
|
|
|
|
cl::init(true));
|
2017-01-18 01:57:48 +01:00
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
cl::opt<double> NumCountersPerValueSite(
|
|
|
|
"vp-counters-per-site",
|
|
|
|
cl::desc("The average number of profile counters allocated "
|
|
|
|
"per value profiling site."),
|
|
|
|
// This is set to a very small value because in real programs, only
|
|
|
|
// a very small percentage of value sites have non-zero targets, e.g, 1/30.
|
|
|
|
// For those sites with non-zero profile, the average number of targets
|
|
|
|
// is usually smaller than 2.
|
|
|
|
cl::init(1.0));
|
|
|
|
|
2018-08-17 00:24:47 +02:00
|
|
|
cl::opt<bool> AtomicCounterUpdateAll(
|
|
|
|
"instrprof-atomic-counter-update-all", cl::ZeroOrMore,
|
|
|
|
cl::desc("Make all profile counter updates atomic (for testing only)"),
|
|
|
|
cl::init(false));
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
cl::opt<bool> AtomicCounterUpdatePromoted(
|
|
|
|
"atomic-counter-update-promoted", cl::ZeroOrMore,
|
|
|
|
cl::desc("Do counter update using atomic fetch add "
|
|
|
|
" for promoted counters only"),
|
|
|
|
cl::init(false));
|
|
|
|
|
2020-06-26 19:20:09 +02:00
|
|
|
cl::opt<bool> AtomicFirstCounter(
|
|
|
|
"atomic-first-counter", cl::ZeroOrMore,
|
|
|
|
cl::desc("Use atomic fetch add for first counter in a function (usually "
|
|
|
|
"the entry counter)"),
|
|
|
|
cl::init(false));
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
// If the option is not specified, the default behavior about whether
|
|
|
|
// counter promotion is done depends on how instrumentaiton lowering
|
|
|
|
// pipeline is setup, i.e., the default value of true of this option
|
|
|
|
// does not mean the promotion will be done by default. Explicitly
|
|
|
|
// setting this option can override the default behavior.
|
|
|
|
cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
|
|
|
|
cl::desc("Do counter register promotion"),
|
|
|
|
cl::init(false));
|
|
|
|
cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
|
2017-07-13 01:27:44 +02:00
|
|
|
cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
|
2017-06-25 02:26:43 +02:00
|
|
|
cl::desc("Max number counter promotions per loop to avoid"
|
|
|
|
" increasing register pressure too much"));
|
|
|
|
|
|
|
|
// A debug option
|
|
|
|
cl::opt<int>
|
|
|
|
MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
|
|
|
|
cl::desc("Max number of allowed counter promotions"));
|
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
|
|
|
|
cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
|
|
|
|
cl::desc("The max number of exiting blocks of a loop to allow "
|
|
|
|
" speculative counter promotion"));
|
|
|
|
|
|
|
|
cl::opt<bool> SpeculativeCounterPromotionToLoop(
|
|
|
|
cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
|
|
|
|
cl::desc("When the option is false, if the target block is in a loop, "
|
|
|
|
"the promotion will be disallowed unless the promoted counter "
|
|
|
|
" update can be further/iteratively promoted into an acyclic "
|
|
|
|
" region."));
|
|
|
|
|
|
|
|
cl::opt<bool> IterativeCounterPromotion(
|
|
|
|
cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
|
|
|
|
cl::desc("Allow counter promotion across the whole loop nest."));
|
2017-06-25 02:26:43 +02:00
|
|
|
|
2020-07-25 02:38:31 +02:00
|
|
|
cl::opt<bool> SkipRetExitBlock(
|
|
|
|
cl::ZeroOrMore, "skip-ret-exit-block", cl::init(true),
|
|
|
|
cl::desc("Suppress counter promotion if exit blocks contain ret."));
|
|
|
|
|
2016-04-18 19:47:38 +02:00
|
|
|
class InstrProfilingLegacyPass : public ModulePass {
|
|
|
|
InstrProfiling InstrProf;
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
public:
|
|
|
|
static char ID;
|
2017-01-18 01:57:48 +01:00
|
|
|
|
|
|
|
InstrProfilingLegacyPass() : ModulePass(ID) {}
|
2019-02-27 18:24:33 +01:00
|
|
|
InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
|
2020-05-09 05:19:05 +02:00
|
|
|
: ModulePass(ID), InstrProf(Options, IsCS) {
|
|
|
|
initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
2017-01-18 01:57:48 +01:00
|
|
|
|
2016-10-01 04:56:57 +02:00
|
|
|
StringRef getPassName() const override {
|
2014-12-08 19:02:35 +01:00
|
|
|
return "Frontend instrumentation-based coverage lowering";
|
|
|
|
}
|
|
|
|
|
2016-11-21 12:57:19 +01:00
|
|
|
bool runOnModule(Module &M) override {
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
|
|
|
|
return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
|
|
|
|
};
|
|
|
|
return InstrProf.run(M, GetTLI);
|
2016-11-21 12:57:19 +01:00
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.setPreservesCFG();
|
2016-11-21 12:57:19 +01:00
|
|
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
2016-04-18 19:47:38 +02:00
|
|
|
};
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
///
|
2017-06-25 02:26:43 +02:00
|
|
|
/// A helper class to promote one counter RMW operation in the loop
|
|
|
|
/// into register update.
|
|
|
|
///
|
|
|
|
/// RWM update for the counter will be sinked out of the loop after
|
|
|
|
/// the transformation.
|
|
|
|
///
|
|
|
|
class PGOCounterPromoterHelper : public LoadAndStorePromoter {
|
|
|
|
public:
|
2017-07-13 01:27:44 +02:00
|
|
|
PGOCounterPromoterHelper(
|
|
|
|
Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
|
|
|
|
BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
|
|
|
|
ArrayRef<Instruction *> InsertPts,
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
|
|
|
|
LoopInfo &LI)
|
2017-06-25 02:26:43 +02:00
|
|
|
: LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
|
2017-07-13 01:27:44 +02:00
|
|
|
InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
|
2017-06-25 02:26:43 +02:00
|
|
|
assert(isa<LoadInst>(L));
|
|
|
|
assert(isa<StoreInst>(S));
|
|
|
|
SSA.AddAvailableValue(PH, Init);
|
|
|
|
}
|
2017-07-13 01:27:44 +02:00
|
|
|
|
[LICM/MSSA] Add promotion to scalars by building an AliasSetTracker with MemorySSA.
Summary:
Experimentally we found that promotion to scalars carries less benefits
than sinking and hoisting in LICM. When using MemorySSA, we build an
AliasSetTracker on demand in order to reuse the current infrastructure.
We only build it if less than AccessCapForMSSAPromotion exist in the
loop, a cap that is by default set to 250. This value ensures there are
no runtime regressions, and there are small compile time gains for
pathological cases. A much lower value (20) was found to yield a single
regression in the llvm-test-suite and much higher benefits for compile
times. Conservatively we set the current cap to a high value, but we will
explore lowering it when MemorySSA is enabled by default.
Reviewers: sanjoy, chandlerc
Subscribers: nemanjai, jlebar, Prazek, george.burgess.iv, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D56625
llvm-svn: 353339
2019-02-06 21:25:17 +01:00
|
|
|
void doExtraRewritesBeforeFinalDeletion() override {
|
2017-06-25 02:26:43 +02:00
|
|
|
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
|
|
|
|
BasicBlock *ExitBlock = ExitBlocks[i];
|
|
|
|
Instruction *InsertPos = InsertPts[i];
|
|
|
|
// Get LiveIn value into the ExitBlock. If there are multiple
|
|
|
|
// predecessors, the value is defined by a PHI node in this
|
|
|
|
// block.
|
|
|
|
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
|
|
|
|
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
|
2019-02-01 21:44:24 +01:00
|
|
|
Type *Ty = LiveInValue->getType();
|
2017-06-25 02:26:43 +02:00
|
|
|
IRBuilder<> Builder(InsertPos);
|
|
|
|
if (AtomicCounterUpdatePromoted)
|
2017-07-13 01:27:44 +02:00
|
|
|
// automic update currently can only be promoted across the current
|
|
|
|
// loop, not the whole loop nest.
|
2017-06-25 02:26:43 +02:00
|
|
|
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
|
2021-02-09 05:07:12 +01:00
|
|
|
MaybeAlign(),
|
2017-06-25 02:26:43 +02:00
|
|
|
AtomicOrdering::SequentiallyConsistent);
|
|
|
|
else {
|
2019-02-01 21:44:24 +01:00
|
|
|
LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
|
2017-06-25 02:26:43 +02:00
|
|
|
auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
|
2017-07-13 01:27:44 +02:00
|
|
|
auto *NewStore = Builder.CreateStore(NewVal, Addr);
|
|
|
|
|
|
|
|
// Now update the parent loop's candidate list:
|
|
|
|
if (IterativeCounterPromotion) {
|
|
|
|
auto *TargetLoop = LI.getLoopFor(ExitBlock);
|
|
|
|
if (TargetLoop)
|
|
|
|
LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
|
|
|
|
}
|
2017-06-25 02:26:43 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
Instruction *Store;
|
|
|
|
ArrayRef<BasicBlock *> ExitBlocks;
|
|
|
|
ArrayRef<Instruction *> InsertPts;
|
2017-07-13 01:27:44 +02:00
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
|
|
|
|
LoopInfo &LI;
|
2017-06-25 02:26:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/// A helper class to do register promotion for all profile counter
|
|
|
|
/// updates in a loop.
|
|
|
|
///
|
|
|
|
class PGOCounterPromoter {
|
|
|
|
public:
|
2017-07-13 01:27:44 +02:00
|
|
|
PGOCounterPromoter(
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
|
2019-02-27 18:24:33 +01:00
|
|
|
Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
|
2017-07-13 01:27:44 +02:00
|
|
|
: LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
|
2019-02-27 18:24:33 +01:00
|
|
|
LI(LI), BFI(BFI) {
|
2017-06-25 02:26:43 +02:00
|
|
|
|
2020-01-24 18:05:10 +01:00
|
|
|
// Skip collection of ExitBlocks and InsertPts for loops that will not be
|
|
|
|
// able to have counters promoted.
|
2017-06-25 02:26:43 +02:00
|
|
|
SmallVector<BasicBlock *, 8> LoopExitBlocks;
|
|
|
|
SmallPtrSet<BasicBlock *, 8> BlockSet;
|
2020-01-24 18:05:10 +01:00
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
L.getExitBlocks(LoopExitBlocks);
|
2020-01-24 18:05:10 +01:00
|
|
|
if (!isPromotionPossible(&L, LoopExitBlocks))
|
|
|
|
return;
|
2017-06-25 02:26:43 +02:00
|
|
|
|
|
|
|
for (BasicBlock *ExitBlock : LoopExitBlocks) {
|
|
|
|
if (BlockSet.insert(ExitBlock).second) {
|
|
|
|
ExitBlocks.push_back(ExitBlock);
|
|
|
|
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool run(int64_t *NumPromoted) {
|
2017-11-30 20:16:25 +01:00
|
|
|
// Skip 'infinite' loops:
|
|
|
|
if (ExitBlocks.size() == 0)
|
|
|
|
return false;
|
2020-07-25 02:38:31 +02:00
|
|
|
|
|
|
|
// Skip if any of the ExitBlocks contains a ret instruction.
|
|
|
|
// This is to prevent dumping of incomplete profile -- if the
|
|
|
|
// the loop is a long running loop and dump is called in the middle
|
|
|
|
// of the loop, the result profile is incomplete.
|
|
|
|
// FIXME: add other heuristics to detect long running loops.
|
|
|
|
if (SkipRetExitBlock) {
|
|
|
|
for (auto BB : ExitBlocks)
|
2020-12-31 02:45:37 +01:00
|
|
|
if (isa<ReturnInst>(BB->getTerminator()))
|
2020-07-25 02:38:31 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
|
|
|
|
if (MaxProm == 0)
|
2017-06-25 02:26:43 +02:00
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Promoted = 0;
|
2017-07-13 01:27:44 +02:00
|
|
|
for (auto &Cand : LoopToCandidates[&L]) {
|
2017-06-25 02:26:43 +02:00
|
|
|
|
|
|
|
SmallVector<PHINode *, 4> NewPHIs;
|
|
|
|
SSAUpdater SSA(&NewPHIs);
|
|
|
|
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
|
2017-07-13 01:27:44 +02:00
|
|
|
|
2019-02-27 18:24:33 +01:00
|
|
|
// If BFI is set, we will use it to guide the promotions.
|
|
|
|
if (BFI) {
|
|
|
|
auto *BB = Cand.first->getParent();
|
|
|
|
auto InstrCount = BFI->getBlockProfileCount(BB);
|
|
|
|
if (!InstrCount)
|
|
|
|
continue;
|
|
|
|
auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
|
|
|
|
// If the average loop trip count is not greater than 1.5, we skip
|
|
|
|
// promotion.
|
|
|
|
if (PreheaderCount &&
|
|
|
|
(PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
|
2017-07-13 01:27:44 +02:00
|
|
|
L.getLoopPreheader(), ExitBlocks,
|
|
|
|
InsertPts, LoopToCandidates, LI);
|
2017-06-25 02:26:43 +02:00
|
|
|
Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
|
|
|
|
Promoted++;
|
2017-07-13 01:27:44 +02:00
|
|
|
if (Promoted >= MaxProm)
|
2017-06-25 02:26:43 +02:00
|
|
|
break;
|
2017-07-13 01:27:44 +02:00
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
(*NumPromoted)++;
|
|
|
|
if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
|
|
|
|
<< L.getLoopDepth() << ")\n");
|
2017-06-25 02:26:43 +02:00
|
|
|
return Promoted != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2017-07-13 01:27:44 +02:00
|
|
|
bool allowSpeculativeCounterPromotion(Loop *LP) {
|
|
|
|
SmallVector<BasicBlock *, 8> ExitingBlocks;
|
|
|
|
L.getExitingBlocks(ExitingBlocks);
|
|
|
|
// Not considierered speculative.
|
|
|
|
if (ExitingBlocks.size() == 1)
|
|
|
|
return true;
|
|
|
|
if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-01-24 18:05:10 +01:00
|
|
|
// Check whether the loop satisfies the basic conditions needed to perform
|
|
|
|
// Counter Promotions.
|
|
|
|
bool isPromotionPossible(Loop *LP,
|
|
|
|
const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
|
2017-07-13 01:27:44 +02:00
|
|
|
// We can't insert into a catchswitch.
|
|
|
|
if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
|
|
|
|
return isa<CatchSwitchInst>(Exit->getTerminator());
|
|
|
|
}))
|
2020-01-24 18:05:10 +01:00
|
|
|
return false;
|
2017-07-13 01:27:44 +02:00
|
|
|
|
|
|
|
if (!LP->hasDedicatedExits())
|
2020-01-24 18:05:10 +01:00
|
|
|
return false;
|
2017-07-13 01:27:44 +02:00
|
|
|
|
|
|
|
BasicBlock *PH = LP->getLoopPreheader();
|
|
|
|
if (!PH)
|
2020-01-24 18:05:10 +01:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the max number of Counter Promotions for LP.
|
|
|
|
unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
|
|
|
|
SmallVector<BasicBlock *, 8> LoopExitBlocks;
|
|
|
|
LP->getExitBlocks(LoopExitBlocks);
|
|
|
|
if (!isPromotionPossible(LP, LoopExitBlocks))
|
2017-07-13 01:27:44 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
SmallVector<BasicBlock *, 8> ExitingBlocks;
|
|
|
|
LP->getExitingBlocks(ExitingBlocks);
|
2019-02-27 18:24:33 +01:00
|
|
|
|
|
|
|
// If BFI is set, we do more aggressive promotions based on BFI.
|
|
|
|
if (BFI)
|
|
|
|
return (unsigned)-1;
|
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
// Not considierered speculative.
|
|
|
|
if (ExitingBlocks.size() == 1)
|
|
|
|
return MaxNumOfPromotionsPerLoop;
|
|
|
|
|
|
|
|
if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Whether the target block is in a loop does not matter:
|
|
|
|
if (SpeculativeCounterPromotionToLoop)
|
|
|
|
return MaxNumOfPromotionsPerLoop;
|
|
|
|
|
|
|
|
// Now check the target block:
|
|
|
|
unsigned MaxProm = MaxNumOfPromotionsPerLoop;
|
|
|
|
for (auto *TargetBlock : LoopExitBlocks) {
|
|
|
|
auto *TargetLoop = LI.getLoopFor(TargetBlock);
|
|
|
|
if (!TargetLoop)
|
|
|
|
continue;
|
|
|
|
unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
|
|
|
|
unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
|
|
|
|
MaxProm =
|
|
|
|
std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
|
|
|
|
PendingCandsInTarget);
|
|
|
|
}
|
|
|
|
return MaxProm;
|
|
|
|
}
|
|
|
|
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
|
2017-06-25 02:26:43 +02:00
|
|
|
SmallVector<BasicBlock *, 8> ExitBlocks;
|
|
|
|
SmallVector<Instruction *, 8> InsertPts;
|
2017-07-13 01:27:44 +02:00
|
|
|
Loop &L;
|
|
|
|
LoopInfo &LI;
|
2019-02-27 18:24:33 +01:00
|
|
|
BlockFrequencyInfo *BFI;
|
2017-06-25 02:26:43 +02:00
|
|
|
};
|
|
|
|
|
2020-08-03 19:35:47 +02:00
|
|
|
enum class ValueProfilingCallType {
|
|
|
|
// Individual values are tracked. Currently used for indiret call target
|
|
|
|
// profiling.
|
|
|
|
Default,
|
|
|
|
|
2020-10-02 22:00:40 +02:00
|
|
|
// MemOp: the memop size value profiling.
|
2020-08-03 19:35:47 +02:00
|
|
|
MemOp
|
|
|
|
};
|
|
|
|
|
2017-01-18 01:57:48 +01:00
|
|
|
} // end anonymous namespace
|
2015-02-11 03:52:44 +01:00
|
|
|
|
2016-08-09 02:28:38 +02:00
|
|
|
PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
FunctionAnalysisManager &FAM =
|
|
|
|
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
|
|
|
auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
|
|
|
|
return FAM.getResult<TargetLibraryAnalysis>(F);
|
|
|
|
};
|
|
|
|
if (!run(M, GetTLI))
|
2016-04-18 19:47:38 +02:00
|
|
|
return PreservedAnalyses::all();
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2016-04-18 19:47:38 +02:00
|
|
|
return PreservedAnalyses::none();
|
|
|
|
}
|
2016-02-08 19:13:49 +01:00
|
|
|
|
2016-04-18 19:47:38 +02:00
|
|
|
char InstrProfilingLegacyPass::ID = 0;
|
2016-11-21 12:57:19 +01:00
|
|
|
INITIALIZE_PASS_BEGIN(
|
|
|
|
InstrProfilingLegacyPass, "instrprof",
|
|
|
|
"Frontend instrumentation-based coverage lowering.", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(
|
|
|
|
InstrProfilingLegacyPass, "instrprof",
|
|
|
|
"Frontend instrumentation-based coverage lowering.", false, false)
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2016-06-21 04:39:08 +02:00
|
|
|
ModulePass *
|
2019-02-27 18:24:33 +01:00
|
|
|
llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
|
|
|
|
bool IsCS) {
|
|
|
|
return new InstrProfilingLegacyPass(Options, IsCS);
|
2016-04-18 19:47:38 +02:00
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2016-09-18 20:34:07 +02:00
|
|
|
static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
|
|
|
|
InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
|
|
|
|
if (Inc)
|
|
|
|
return Inc;
|
|
|
|
return dyn_cast<InstrProfIncrementInst>(Instr);
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
bool InstrProfiling::lowerIntrinsics(Function *F) {
|
|
|
|
bool MadeChange = false;
|
|
|
|
PromotionCandidates.clear();
|
|
|
|
for (BasicBlock &BB : *F) {
|
|
|
|
for (auto I = BB.begin(), E = BB.end(); I != E;) {
|
|
|
|
auto Instr = I++;
|
|
|
|
InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
|
|
|
|
if (Inc) {
|
|
|
|
lowerIncrement(Inc);
|
|
|
|
MadeChange = true;
|
|
|
|
} else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
|
|
|
|
lowerValueProfileInst(Ind);
|
|
|
|
MadeChange = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!MadeChange)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
promoteCounterLoadStores(F);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-10-04 22:29:56 +02:00
|
|
|
bool InstrProfiling::isRuntimeCounterRelocationEnabled() const {
|
2021-07-08 22:44:05 +02:00
|
|
|
// Mach-O don't support weak external references.
|
|
|
|
if (TT.isOSBinFormatMachO())
|
|
|
|
return false;
|
|
|
|
|
2019-10-04 22:29:56 +02:00
|
|
|
if (RuntimeCounterRelocation.getNumOccurrences() > 0)
|
|
|
|
return RuntimeCounterRelocation;
|
|
|
|
|
2021-07-08 22:44:05 +02:00
|
|
|
// Fuchsia uses runtime counter relocation by default.
|
2019-10-04 22:29:56 +02:00
|
|
|
return TT.isOSFuchsia();
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
bool InstrProfiling::isCounterPromotionEnabled() const {
|
|
|
|
if (DoCounterPromotion.getNumOccurrences() > 0)
|
|
|
|
return DoCounterPromotion;
|
|
|
|
|
|
|
|
return Options.DoCounterPromotion;
|
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::promoteCounterLoadStores(Function *F) {
|
|
|
|
if (!isCounterPromotionEnabled())
|
|
|
|
return;
|
|
|
|
|
|
|
|
DominatorTree DT(*F);
|
|
|
|
LoopInfo LI(DT);
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
|
|
|
|
|
2019-02-27 18:24:33 +01:00
|
|
|
std::unique_ptr<BlockFrequencyInfo> BFI;
|
|
|
|
if (Options.UseBFIInPromotion) {
|
|
|
|
std::unique_ptr<BranchProbabilityInfo> BPI;
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
|
2019-02-27 18:24:33 +01:00
|
|
|
BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
for (const auto &LoadStore : PromotionCandidates) {
|
|
|
|
auto *CounterLoad = LoadStore.first;
|
|
|
|
auto *CounterStore = LoadStore.second;
|
|
|
|
BasicBlock *BB = CounterLoad->getParent();
|
|
|
|
Loop *ParentLoop = LI.getLoopFor(BB);
|
|
|
|
if (!ParentLoop)
|
|
|
|
continue;
|
|
|
|
LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
|
|
|
|
}
|
|
|
|
|
|
|
|
SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
|
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
// Do a post-order traversal of the loops so that counter updates can be
|
|
|
|
// iteratively hoisted outside the loop nest.
|
|
|
|
for (auto *Loop : llvm::reverse(Loops)) {
|
2019-02-27 18:24:33 +01:00
|
|
|
PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
|
2017-06-25 02:26:43 +02:00
|
|
|
Promoter.run(&TotalCountersPromoted);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-27 00:54:24 +01:00
|
|
|
/// Check if the module contains uses of any profiling intrinsics.
|
|
|
|
static bool containsProfilingIntrinsics(Module &M) {
|
|
|
|
if (auto *F = M.getFunction(
|
|
|
|
Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
|
2018-01-27 01:01:04 +01:00
|
|
|
if (!F->use_empty())
|
|
|
|
return true;
|
2018-01-27 00:54:24 +01:00
|
|
|
if (auto *F = M.getFunction(
|
|
|
|
Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
|
2018-01-27 01:01:04 +01:00
|
|
|
if (!F->use_empty())
|
|
|
|
return true;
|
2018-01-27 00:54:24 +01:00
|
|
|
if (auto *F = M.getFunction(
|
|
|
|
Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
|
2018-01-27 01:01:04 +01:00
|
|
|
if (!F->use_empty())
|
|
|
|
return true;
|
2018-01-27 00:54:24 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
bool InstrProfiling::run(
|
|
|
|
Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
|
2014-12-08 19:02:35 +01:00
|
|
|
this->M = &M;
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
this->GetTLI = std::move(GetTLI);
|
2016-02-08 19:13:49 +01:00
|
|
|
NamesVar = nullptr;
|
|
|
|
NamesSize = 0;
|
2015-11-18 19:14:55 +01:00
|
|
|
ProfileDataMap.clear();
|
2021-03-03 20:32:24 +01:00
|
|
|
CompilerUsedVars.clear();
|
2014-12-08 19:02:35 +01:00
|
|
|
UsedVars.clear();
|
2017-04-15 02:09:57 +02:00
|
|
|
TT = Triple(M.getTargetTriple());
|
2014-12-08 19:02:35 +01:00
|
|
|
|
Revert "[InstrProfiling] Don't generate __llvm_profile_runtime_user"
This broke the check-profile tests on Mac, see comment on the code
review.
> This is no longer needed, we can add __llvm_profile_runtime directly
> to llvm.compiler.used or llvm.used to achieve the same effect.
>
> Differential Revision: https://reviews.llvm.org/D98325
This reverts commit c7712087cbb505d324e1149fa224f607c91a8c6a.
Also reverting the dependent follow-up commit:
Revert "[InstrProfiling] Generate runtime hook for ELF platforms"
> When using -fprofile-list to selectively apply instrumentation only
> to certain files or functions, we may end up with a binary that doesn't
> have any counters in the case where no files were selected. However,
> because on Linux and Fuchsia, we pass -u__llvm_profile_runtime, the
> runtime would still be pulled in and incur some non-trivial overhead,
> especially in the case when the continuous or runtime counter relocation
> mode is being used. A better way would be to pull in the profile runtime
> only when needed by declaring the __llvm_profile_runtime symbol in the
> translation unit only when needed.
>
> This approach was already used prior to 9a041a75221ca, but we changed it
> to always generate the __llvm_profile_runtime due to a TAPI limitation.
> Since TAPI is only used on Mach-O platforms, we could use the early
> emission of __llvm_profile_runtime there, and on other platforms we
> could change back to the earlier approach where the symbol is generated
> later only when needed. We can stop passing -u__llvm_profile_runtime to
> the linker on Linux and Fuchsia since the generated undefined symbol in
> each translation unit that needed it serves the same purpose.
>
> Differential Revision: https://reviews.llvm.org/D98061
This reverts commit 87fd09b25f8892e07b7ba11525baa9c3ec3e5d3f.
2021-03-12 13:43:36 +01:00
|
|
|
// Emit the runtime hook even if no counters are present.
|
|
|
|
bool MadeChange = emitRuntimeHook();
|
2018-02-28 20:00:08 +01:00
|
|
|
|
|
|
|
// Improve compile time by avoiding linear scans when there is no work.
|
|
|
|
GlobalVariable *CoverageNamesVar =
|
|
|
|
M.getNamedGlobal(getCoverageUnusedNamesVarName());
|
|
|
|
if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
|
|
|
|
return MadeChange;
|
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
// We did not know how many value sites there would be inside
|
|
|
|
// the instrumented function. This is counting the number of instrumented
|
|
|
|
// target value sites to enter it as field in the profile data variable.
|
2016-01-19 19:29:54 +01:00
|
|
|
for (Function &F : M) {
|
|
|
|
InstrProfIncrementInst *FirstProfIncInst = nullptr;
|
2014-12-08 19:02:35 +01:00
|
|
|
for (BasicBlock &BB : F)
|
2016-01-19 19:29:54 +01:00
|
|
|
for (auto I = BB.begin(), E = BB.end(); I != E; I++)
|
|
|
|
if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
|
2015-11-18 19:14:55 +01:00
|
|
|
computeNumValueSiteCounts(Ind);
|
2016-01-19 19:29:54 +01:00
|
|
|
else if (FirstProfIncInst == nullptr)
|
|
|
|
FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
|
|
|
|
|
|
|
|
// Value profiling intrinsic lowering requires per-function profile data
|
|
|
|
// variable to be created first.
|
|
|
|
if (FirstProfIncInst != nullptr)
|
|
|
|
static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
|
|
|
|
}
|
2015-11-18 19:14:55 +01:00
|
|
|
|
|
|
|
for (Function &F : M)
|
2017-06-25 02:26:43 +02:00
|
|
|
MadeChange |= lowerIntrinsics(&F);
|
2015-11-18 19:14:55 +01:00
|
|
|
|
2018-01-27 00:54:24 +01:00
|
|
|
if (CoverageNamesVar) {
|
2016-01-07 21:05:49 +01:00
|
|
|
lowerCoverageData(CoverageNamesVar);
|
2015-02-11 03:52:44 +01:00
|
|
|
MadeChange = true;
|
|
|
|
}
|
2015-11-18 19:14:55 +01:00
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
if (!MadeChange)
|
|
|
|
return false;
|
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
emitVNodes();
|
2016-02-08 19:13:49 +01:00
|
|
|
emitNameData();
|
2014-12-08 19:02:35 +01:00
|
|
|
emitRegistration();
|
|
|
|
emitUses();
|
|
|
|
emitInitialization();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-08-03 19:35:47 +02:00
|
|
|
static FunctionCallee getOrInsertValueProfilingCall(
|
|
|
|
Module &M, const TargetLibraryInfo &TLI,
|
|
|
|
ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
|
2015-11-22 01:22:07 +01:00
|
|
|
LLVMContext &Ctx = M.getContext();
|
|
|
|
auto *ReturnTy = Type::getVoidTy(M.getContext());
|
2017-03-16 22:15:48 +01:00
|
|
|
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 03:28:03 +01:00
|
|
|
AttributeList AL;
|
|
|
|
if (auto AK = TLI.getExtAttrForI32Param(false))
|
|
|
|
AL = AL.addParamAttribute(M.getContext(), 2, AK);
|
|
|
|
|
2020-10-02 22:00:40 +02:00
|
|
|
assert((CallType == ValueProfilingCallType::Default ||
|
|
|
|
CallType == ValueProfilingCallType::MemOp) &&
|
|
|
|
"Must be Default or MemOp");
|
|
|
|
Type *ParamTypes[] = {
|
2017-03-16 22:15:48 +01:00
|
|
|
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
2020-10-02 22:00:40 +02:00
|
|
|
};
|
|
|
|
auto *ValueProfilingCallTy =
|
|
|
|
FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
|
|
|
|
StringRef FuncName = CallType == ValueProfilingCallType::Default
|
|
|
|
? getInstrProfValueProfFuncName()
|
|
|
|
: getInstrProfValueProfMemOpFuncName();
|
|
|
|
return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
|
2015-11-18 19:14:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
|
|
|
|
GlobalVariable *Name = Ind->getName();
|
|
|
|
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
|
|
|
|
uint64_t Index = Ind->getIndex()->getZExtValue();
|
|
|
|
auto It = ProfileDataMap.find(Name);
|
|
|
|
if (It == ProfileDataMap.end()) {
|
|
|
|
PerFunctionProfileData PD;
|
|
|
|
PD.NumValueSites[ValueKind] = Index + 1;
|
|
|
|
ProfileDataMap[Name] = PD;
|
|
|
|
} else if (It->second.NumValueSites[ValueKind] <= Index)
|
|
|
|
It->second.NumValueSites[ValueKind] = Index + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
|
|
|
|
GlobalVariable *Name = Ind->getName();
|
|
|
|
auto It = ProfileDataMap.find(Name);
|
|
|
|
assert(It != ProfileDataMap.end() && It->second.DataVar &&
|
2016-06-21 04:39:08 +02:00
|
|
|
"value profiling detected in function with no counter incerement");
|
2015-11-18 19:14:55 +01:00
|
|
|
|
|
|
|
GlobalVariable *DataVar = It->second.DataVar;
|
|
|
|
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
|
|
|
|
uint64_t Index = Ind->getIndex()->getZExtValue();
|
|
|
|
for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
|
|
|
|
Index += It->second.NumValueSites[Kind];
|
|
|
|
|
|
|
|
IRBuilder<> Builder(Ind);
|
2020-08-03 19:35:47 +02:00
|
|
|
bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
|
|
|
|
llvm::InstrProfValueKind::IPVK_MemOPSize);
|
2017-03-16 22:15:48 +01:00
|
|
|
CallInst *Call = nullptr;
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
auto *TLI = &GetTLI(*Ind->getFunction());
|
2020-01-24 20:19:08 +01:00
|
|
|
|
|
|
|
// To support value profiling calls within Windows exception handlers, funclet
|
|
|
|
// information contained within operand bundles needs to be copied over to
|
|
|
|
// the library call. This is required for the IR to be processed by the
|
|
|
|
// WinEHPrepare pass.
|
|
|
|
SmallVector<OperandBundleDef, 1> OpBundles;
|
|
|
|
Ind->getOperandBundlesAsDefs(OpBundles);
|
2020-08-03 19:35:47 +02:00
|
|
|
if (!IsMemOpSize) {
|
2017-03-16 22:15:48 +01:00
|
|
|
Value *Args[3] = {Ind->getTargetValue(),
|
|
|
|
Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
|
|
|
|
Builder.getInt32(Index)};
|
2020-01-24 20:19:08 +01:00
|
|
|
Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args,
|
|
|
|
OpBundles);
|
2020-10-02 22:00:40 +02:00
|
|
|
} else {
|
2020-08-03 19:35:47 +02:00
|
|
|
Value *Args[3] = {Ind->getTargetValue(),
|
|
|
|
Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
|
|
|
|
Builder.getInt32(Index)};
|
|
|
|
Call = Builder.CreateCall(
|
|
|
|
getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp),
|
|
|
|
Args, OpBundles);
|
2017-03-16 22:15:48 +01:00
|
|
|
}
|
2016-11-21 12:57:19 +01:00
|
|
|
if (auto AK = TLI->getExtAttrForI32Param(false))
|
2017-05-03 20:17:31 +02:00
|
|
|
Call->addParamAttr(2, AK);
|
2016-11-21 12:57:19 +01:00
|
|
|
Ind->replaceAllUsesWith(Call);
|
2015-11-18 19:14:55 +01:00
|
|
|
Ind->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
|
|
|
|
GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
|
|
|
|
|
2015-10-13 19:39:10 +02:00
|
|
|
IRBuilder<> Builder(Inc);
|
2014-12-08 19:02:35 +01:00
|
|
|
uint64_t Index = Inc->getIndex()->getZExtValue();
|
2019-02-01 21:44:47 +01:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
|
|
|
|
Counters, 0, Index);
|
2018-08-17 00:24:47 +02:00
|
|
|
|
2019-10-04 22:29:56 +02:00
|
|
|
if (isRuntimeCounterRelocationEnabled()) {
|
|
|
|
Type *Int64Ty = Type::getInt64Ty(M->getContext());
|
|
|
|
Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext());
|
|
|
|
Function *Fn = Inc->getParent()->getParent();
|
|
|
|
Instruction &I = Fn->getEntryBlock().front();
|
|
|
|
LoadInst *LI = dyn_cast<LoadInst>(&I);
|
|
|
|
if (!LI) {
|
|
|
|
IRBuilder<> Builder(&I);
|
|
|
|
Type *Int64Ty = Type::getInt64Ty(M->getContext());
|
|
|
|
GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
|
2020-01-28 08:28:03 +01:00
|
|
|
if (!Bias) {
|
2021-07-08 22:44:05 +02:00
|
|
|
// Compiler must define this variable when runtime counter relocation
|
|
|
|
// is being used. Runtime has a weak external reference that is used
|
|
|
|
// to check whether that's the case or not.
|
2019-10-04 22:29:56 +02:00
|
|
|
Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
|
|
|
|
Constant::getNullValue(Int64Ty),
|
|
|
|
getInstrProfCounterBiasVarName());
|
2020-01-28 08:28:03 +01:00
|
|
|
Bias->setVisibility(GlobalVariable::HiddenVisibility);
|
2021-07-08 22:44:05 +02:00
|
|
|
// A definition that's weak (linkonce_odr) without being in a COMDAT
|
|
|
|
// section wouldn't lead to link errors, but it would lead to a dead
|
|
|
|
// data word from every TU but one. Putting it in COMDAT ensures there
|
|
|
|
// will be exactly one data slot in the link.
|
|
|
|
if (TT.supportsCOMDAT())
|
|
|
|
Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
|
2020-01-28 08:28:03 +01:00
|
|
|
}
|
2019-10-04 22:29:56 +02:00
|
|
|
LI = Builder.CreateLoad(Int64Ty, Bias);
|
|
|
|
}
|
|
|
|
auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
|
|
|
|
Addr = Builder.CreateIntToPtr(Add, Int64PtrTy);
|
|
|
|
}
|
|
|
|
|
2020-06-26 19:20:09 +02:00
|
|
|
if (Options.Atomic || AtomicCounterUpdateAll ||
|
|
|
|
(Index == 0 && AtomicFirstCounter)) {
|
2018-08-17 00:24:47 +02:00
|
|
|
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
|
2021-02-09 05:07:12 +01:00
|
|
|
MaybeAlign(), AtomicOrdering::Monotonic);
|
2018-08-17 00:24:47 +02:00
|
|
|
} else {
|
2019-02-01 21:44:24 +01:00
|
|
|
Value *IncStep = Inc->getStep();
|
|
|
|
Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
|
2018-08-17 00:24:47 +02:00
|
|
|
auto *Count = Builder.CreateAdd(Load, Inc->getStep());
|
|
|
|
auto *Store = Builder.CreateStore(Count, Addr);
|
|
|
|
if (isCounterPromotionEnabled())
|
|
|
|
PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
|
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
Inc->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2016-01-07 21:05:49 +01:00
|
|
|
void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
|
|
|
|
ConstantArray *Names =
|
|
|
|
cast<ConstantArray>(CoverageNamesVar->getInitializer());
|
|
|
|
for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
|
|
|
|
Constant *NC = Names->getOperand(I);
|
|
|
|
Value *V = NC->stripPointerCasts();
|
2015-02-11 03:52:44 +01:00
|
|
|
assert(isa<GlobalVariable>(V) && "Missing reference to function name");
|
|
|
|
GlobalVariable *Name = cast<GlobalVariable>(V);
|
|
|
|
|
2016-02-08 19:13:49 +01:00
|
|
|
Name->setLinkage(GlobalValue::PrivateLinkage);
|
|
|
|
ReferencedNames.push_back(Name);
|
2017-02-14 21:03:48 +01:00
|
|
|
NC->dropAllReferences();
|
2015-02-11 03:52:44 +01:00
|
|
|
}
|
2017-02-14 21:03:48 +01:00
|
|
|
CoverageNamesVar->eraseFromParent();
|
2015-02-11 03:52:44 +01:00
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
/// Get the name of a profiling variable for a particular function.
|
2015-10-22 22:32:12 +02:00
|
|
|
static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
|
2015-12-12 18:28:03 +01:00
|
|
|
StringRef NamePrefix = getInstrProfNameVarPrefix();
|
|
|
|
StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
|
2017-01-11 21:19:41 +01:00
|
|
|
Function *F = Inc->getParent()->getParent();
|
|
|
|
Module *M = F->getParent();
|
|
|
|
if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
|
|
|
|
!canRenameComdatFunc(*F))
|
|
|
|
return (Prefix + Name).str();
|
|
|
|
uint64_t FuncHash = Inc->getHash()->getZExtValue();
|
|
|
|
SmallVector<char, 24> HashPostfix;
|
|
|
|
if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
|
|
|
|
return (Prefix + Name).str();
|
|
|
|
return (Prefix + Name + "." + Twine(FuncHash)).str();
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
2021-06-04 22:27:56 +02:00
|
|
|
static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
|
|
|
|
auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
|
[PGO] Don't reference functions unless value profiling is enabled
This reduces the size of chrome.dll.pdb built with optimizations,
coverage, and line table info from 4,690,210,816 to 2,181,128,192, which
makes it possible to fit under the 4GB limit.
This change can greatly reduce binary size in coverage builds, which do
not need value profiling. IR PGO builds are unaffected. There is a minor
behavior change for frontend PGO.
PGO and coverage both use InstrProfiling to create profile data with
counters. PGO records the address of each function in the __profd_
global. It is used later to map runtime function pointer values back to
source-level function names. Coverage does not appear to use this
information.
Recording the address of every function with code coverage drastically
increases code size. Consider this program:
void foo();
void bar();
inline void inlineMe(int x) {
if (x > 0)
foo();
else
bar();
}
int getVal();
int main() { inlineMe(getVal()); }
With code coverage, the InstrProfiling pass runs before inlining, and it
captures the address of inlineMe in the __profd_ global. This greatly
increases code size, because now the compiler can no longer delete
trivial code.
One downside to this approach is that users of frontend PGO must apply
the -mllvm -enable-value-profiling flag globally in TUs that enable PGO.
Otherwise, some inline virtual method addresses may not be recorded and
will not be able to be promoted. My assumption is that this mllvm flag
is not popular, and most frontend PGO users don't enable it.
Differential Revision: https://reviews.llvm.org/D102818
2021-05-13 23:43:22 +02:00
|
|
|
if (!MD)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// If the flag is a ConstantAsMetadata, it should be an integer representable
|
|
|
|
// in 64-bits.
|
|
|
|
return cast<ConstantInt>(MD->getValue())->getZExtValue();
|
|
|
|
}
|
|
|
|
|
2021-06-04 22:27:56 +02:00
|
|
|
static bool enablesValueProfiling(const Module &M) {
|
|
|
|
return isIRPGOFlagSet(&M) ||
|
|
|
|
getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Conservatively returns true if data variables may be referenced by code.
|
|
|
|
static bool profDataReferencedByCode(const Module &M) {
|
|
|
|
return enablesValueProfiling(M);
|
|
|
|
}
|
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
static inline bool shouldRecordFunctionAddr(Function *F) {
|
[PGO] Don't reference functions unless value profiling is enabled
This reduces the size of chrome.dll.pdb built with optimizations,
coverage, and line table info from 4,690,210,816 to 2,181,128,192, which
makes it possible to fit under the 4GB limit.
This change can greatly reduce binary size in coverage builds, which do
not need value profiling. IR PGO builds are unaffected. There is a minor
behavior change for frontend PGO.
PGO and coverage both use InstrProfiling to create profile data with
counters. PGO records the address of each function in the __profd_
global. It is used later to map runtime function pointer values back to
source-level function names. Coverage does not appear to use this
information.
Recording the address of every function with code coverage drastically
increases code size. Consider this program:
void foo();
void bar();
inline void inlineMe(int x) {
if (x > 0)
foo();
else
bar();
}
int getVal();
int main() { inlineMe(getVal()); }
With code coverage, the InstrProfiling pass runs before inlining, and it
captures the address of inlineMe in the __profd_ global. This greatly
increases code size, because now the compiler can no longer delete
trivial code.
One downside to this approach is that users of frontend PGO must apply
the -mllvm -enable-value-profiling flag globally in TUs that enable PGO.
Otherwise, some inline virtual method addresses may not be recorded and
will not be able to be promoted. My assumption is that this mllvm flag
is not popular, and most frontend PGO users don't enable it.
Differential Revision: https://reviews.llvm.org/D102818
2021-05-13 23:43:22 +02:00
|
|
|
// Only record function addresses if IR PGO is enabled or if clang value
|
|
|
|
// profiling is enabled. Recording function addresses greatly increases object
|
|
|
|
// file size, because it prevents the inliner from deleting functions that
|
|
|
|
// have been inlined everywhere.
|
2021-06-04 22:27:56 +02:00
|
|
|
if (!profDataReferencedByCode(*F->getParent()))
|
[PGO] Don't reference functions unless value profiling is enabled
This reduces the size of chrome.dll.pdb built with optimizations,
coverage, and line table info from 4,690,210,816 to 2,181,128,192, which
makes it possible to fit under the 4GB limit.
This change can greatly reduce binary size in coverage builds, which do
not need value profiling. IR PGO builds are unaffected. There is a minor
behavior change for frontend PGO.
PGO and coverage both use InstrProfiling to create profile data with
counters. PGO records the address of each function in the __profd_
global. It is used later to map runtime function pointer values back to
source-level function names. Coverage does not appear to use this
information.
Recording the address of every function with code coverage drastically
increases code size. Consider this program:
void foo();
void bar();
inline void inlineMe(int x) {
if (x > 0)
foo();
else
bar();
}
int getVal();
int main() { inlineMe(getVal()); }
With code coverage, the InstrProfiling pass runs before inlining, and it
captures the address of inlineMe in the __profd_ global. This greatly
increases code size, because now the compiler can no longer delete
trivial code.
One downside to this approach is that users of frontend PGO must apply
the -mllvm -enable-value-profiling flag globally in TUs that enable PGO.
Otherwise, some inline virtual method addresses may not be recorded and
will not be able to be promoted. My assumption is that this mllvm flag
is not popular, and most frontend PGO users don't enable it.
Differential Revision: https://reviews.llvm.org/D102818
2021-05-13 23:43:22 +02:00
|
|
|
return false;
|
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
// Check the linkage
|
2017-06-14 00:12:35 +02:00
|
|
|
bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
|
2015-11-18 19:14:55 +01:00
|
|
|
if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
|
2017-06-14 00:12:35 +02:00
|
|
|
!HasAvailableExternallyLinkage)
|
2015-11-18 19:14:55 +01:00
|
|
|
return true;
|
2017-06-14 00:12:35 +02:00
|
|
|
|
|
|
|
// A function marked 'alwaysinline' with available_externally linkage can't
|
|
|
|
// have its address taken. Doing so would create an undefined external ref to
|
|
|
|
// the function, which would fail to link.
|
|
|
|
if (HasAvailableExternallyLinkage &&
|
|
|
|
F->hasFnAttribute(Attribute::AlwaysInline))
|
|
|
|
return false;
|
|
|
|
|
2016-04-27 23:17:30 +02:00
|
|
|
// Prohibit function address recording if the function is both internal and
|
|
|
|
// COMDAT. This avoids the profile data variable referencing internal symbols
|
|
|
|
// in COMDAT.
|
|
|
|
if (F->hasLocalLinkage() && F->hasComdat())
|
|
|
|
return false;
|
2017-06-14 00:12:35 +02:00
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
// Check uses of this function for other than direct calls or invokes to it.
|
2016-06-02 18:33:41 +02:00
|
|
|
// Inline virtual functions have linkeOnceODR linkage. When a key method
|
|
|
|
// exists, the vtable will only be emitted in the TU where the key method
|
|
|
|
// is defined. In a TU where vtable is not available, the function won't
|
2016-06-04 01:02:28 +02:00
|
|
|
// be 'addresstaken'. If its address is not recorded here, the profile data
|
2016-06-21 04:39:08 +02:00
|
|
|
// with missing address may be picked by the linker leading to missing
|
2016-06-04 01:02:28 +02:00
|
|
|
// indirect call target info.
|
|
|
|
return F->hasAddressTaken() || F->hasLinkOnceLinkage();
|
2015-11-18 19:14:55 +01:00
|
|
|
}
|
|
|
|
|
2019-02-07 19:16:22 +01:00
|
|
|
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
|
2016-05-22 00:55:34 +02:00
|
|
|
// Don't do this for Darwin. compiler-rt uses linker magic.
|
2019-02-07 19:16:22 +01:00
|
|
|
if (TT.isOSDarwin())
|
2016-05-22 00:55:34 +02:00
|
|
|
return false;
|
|
|
|
// Use linker script magic to get data/cnts/name start/end.
|
2019-02-07 19:16:22 +01:00
|
|
|
if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
|
2019-06-20 23:27:06 +02:00
|
|
|
TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
|
|
|
|
TT.isOSWindows())
|
2016-05-22 00:55:34 +02:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
GlobalVariable *
|
|
|
|
InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
|
2015-11-05 01:47:26 +01:00
|
|
|
GlobalVariable *NamePtr = Inc->getName();
|
2015-11-18 19:14:55 +01:00
|
|
|
auto It = ProfileDataMap.find(NamePtr);
|
|
|
|
PerFunctionProfileData PD;
|
|
|
|
if (It != ProfileDataMap.end()) {
|
|
|
|
if (It->second.RegionCounters)
|
|
|
|
return It->second.RegionCounters;
|
|
|
|
PD = It->second;
|
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2021-06-03 01:49:54 +02:00
|
|
|
// Match the linkage and visibility of the name global.
|
2015-05-27 21:34:01 +02:00
|
|
|
Function *Fn = Inc->getParent()->getParent();
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
|
|
|
|
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
|
|
|
|
|
|
|
|
// Move the name variable to the right section. Place them in a COMDAT group
|
|
|
|
// if the associated function is a COMDAT. This will make sure that only one
|
[InstrProf] Use separate comdat group for data and counters
Summary:
I hadn't realized that instrumentation runs before inlining, so we can't
use the function as the comdat group. Doing so can create relocations
against discarded sections when references to discarded __profc_
variables are inlined into functions outside the function's comdat
group.
In the future, perhaps we should consider standardizing the comdat group
names that ELF and COFF use. It will save object file size, since
__profv_$sym won't appear in the symbol table again.
Reviewers: xur, vsk
Subscribers: eraman, hiraditya, cfe-commits, #sanitizers, llvm-commits
Tags: #clang, #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D58737
llvm-svn: 355044
2019-02-28 00:38:44 +01:00
|
|
|
// copy of counters of the COMDAT function will be emitted after linking. Keep
|
|
|
|
// in mind that this pass may run before the inliner, so we need to create a
|
|
|
|
// new comdat group for the counters and profiling data. If we use the comdat
|
|
|
|
// of the parent function, that will result in relocations against discarded
|
|
|
|
// sections.
|
2021-06-03 01:49:54 +02:00
|
|
|
//
|
2021-06-04 22:27:56 +02:00
|
|
|
// If the data variable is referenced by code, counters and data have to be
|
|
|
|
// in different comdats for COFF because the Visual C++ linker will report
|
|
|
|
// duplicate symbol errors if there are multiple external symbols with the
|
|
|
|
// same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
|
2021-06-03 01:49:54 +02:00
|
|
|
//
|
|
|
|
// For ELF, when not using COMDAT, put counters, data and values into a
|
2021-07-20 21:47:10 +02:00
|
|
|
// nodeduplicate COMDAT which is lowered to a zero-flag section group. This
|
2021-06-03 01:49:54 +02:00
|
|
|
// allows -z start-stop-gc to discard the entire group when the function is
|
|
|
|
// discarded.
|
2021-06-04 22:27:56 +02:00
|
|
|
bool DataReferencedByCode = profDataReferencedByCode(*M);
|
2019-09-17 23:10:49 +02:00
|
|
|
bool NeedComdat = needsComdatForCounter(*Fn, *M);
|
2021-06-04 22:27:56 +02:00
|
|
|
std::string CntsVarName = getVarName(Inc, getInstrProfCountersVarPrefix());
|
2020-08-04 05:35:50 +02:00
|
|
|
std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix());
|
2021-06-04 22:27:56 +02:00
|
|
|
auto MaybeSetComdat = [&](GlobalVariable *GV) {
|
2019-07-13 23:02:07 +02:00
|
|
|
bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
|
|
|
|
if (UseComdat) {
|
2021-06-04 22:27:56 +02:00
|
|
|
StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
|
|
|
|
? GV->getName()
|
|
|
|
: CntsVarName;
|
2019-07-13 23:02:07 +02:00
|
|
|
Comdat *C = M->getOrInsertComdat(GroupName);
|
|
|
|
if (!NeedComdat)
|
2021-07-20 21:47:10 +02:00
|
|
|
C->setSelectionKind(Comdat::NoDeduplicate);
|
2019-07-13 23:02:07 +02:00
|
|
|
GV->setComdat(C);
|
|
|
|
}
|
2019-09-17 23:10:49 +02:00
|
|
|
};
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
|
|
|
|
LLVMContext &Ctx = M->getContext();
|
|
|
|
ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
|
|
|
|
|
|
|
|
// Create the counters variable.
|
2015-11-05 01:47:26 +01:00
|
|
|
auto *CounterPtr =
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
new GlobalVariable(*M, CounterTy, false, Linkage,
|
2021-06-04 22:27:56 +02:00
|
|
|
Constant::getNullValue(CounterTy), CntsVarName);
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
CounterPtr->setVisibility(Visibility);
|
2017-04-15 02:09:57 +02:00
|
|
|
CounterPtr->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
|
2019-10-15 13:24:36 +02:00
|
|
|
CounterPtr->setAlignment(Align(8));
|
2019-09-17 23:10:49 +02:00
|
|
|
MaybeSetComdat(CounterPtr);
|
[PGO] Use linkonce_odr linkage for __profd_ variables in comdat groups
This fixes relocations against __profd_ symbols in discarded sections,
which is PR41380.
In general, instrumentation happens very early, and optimization and
inlining happens afterwards. The counters for a function are calculated
early, and after inlining, counters for an inlined function may be
widely referenced by other functions.
For C++ inline functions of all kinds (linkonce_odr &
available_externally mainly), instr profiling wants to deduplicate these
__profc_ and __profd_ globals. Otherwise the binary would be quite
large.
I made __profd_ and __profc_ comdat in r355044, but I chose to make
__profd_ internal. At the time, I was only dealing with coverage, and in
that case, none of the instrumentation needs to reference __profd_.
However, if you use PGO, then instrumentation passes add calls to
__llvm_profile_instrument_range which reference __profd_ globals. The
solution is to make these globals externally visible by using
linkonce_odr linkage for data as was done for counters.
This is safe because PGO adds a CFG hash to the names of the data and
counter globals, so if different TUs have different globals, they will
get different data and counter arrays.
Reviewers: xur, hans
Differential Revision: https://reviews.llvm.org/D67579
llvm-svn: 372020
2019-09-16 20:49:09 +02:00
|
|
|
CounterPtr->setLinkage(Linkage);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
|
2016-05-22 00:55:34 +02:00
|
|
|
// Allocate statically the array of pointers to value profile nodes for
|
|
|
|
// the current function.
|
|
|
|
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
|
2021-06-19 02:01:17 +02:00
|
|
|
uint64_t NS = 0;
|
|
|
|
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
|
|
|
NS += PD.NumValueSites[Kind];
|
|
|
|
if (NS > 0 && ValueProfileStaticAlloc &&
|
|
|
|
!needsRuntimeRegistrationOfSectionRange(TT)) {
|
|
|
|
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
|
|
|
|
auto *ValuesVar = new GlobalVariable(
|
|
|
|
*M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
|
|
|
|
getVarName(Inc, getInstrProfValuesVarPrefix()));
|
|
|
|
ValuesVar->setVisibility(Visibility);
|
|
|
|
ValuesVar->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
|
|
|
|
ValuesVar->setAlignment(Align(8));
|
|
|
|
MaybeSetComdat(ValuesVar);
|
|
|
|
ValuesPtrExpr =
|
|
|
|
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
|
2016-05-22 00:55:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create data variable.
|
2015-11-18 19:14:55 +01:00
|
|
|
auto *Int16Ty = Type::getInt16Ty(Ctx);
|
2016-05-22 00:55:34 +02:00
|
|
|
auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
|
2015-11-05 01:47:26 +01:00
|
|
|
Type *DataTypes[] = {
|
2016-06-21 04:39:08 +02:00
|
|
|
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
2015-11-05 01:47:26 +01:00
|
|
|
};
|
2014-12-08 19:02:35 +01:00
|
|
|
auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
|
2015-11-05 01:47:26 +01:00
|
|
|
|
2016-06-21 04:39:08 +02:00
|
|
|
Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
|
|
|
|
? ConstantExpr::getBitCast(Fn, Int8PtrTy)
|
|
|
|
: ConstantPointerNull::get(Int8PtrTy);
|
2015-11-18 19:14:55 +01:00
|
|
|
|
2016-06-21 04:39:08 +02:00
|
|
|
Constant *Int16ArrayVals[IPVK_Last + 1];
|
2015-11-18 19:14:55 +01:00
|
|
|
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
|
|
|
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
Constant *DataVals[] = {
|
2016-06-21 04:39:08 +02:00
|
|
|
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
2015-11-05 01:47:26 +01:00
|
|
|
};
|
2021-06-19 02:01:17 +02:00
|
|
|
// If the data variable is not referenced by code (if we don't emit
|
|
|
|
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
|
|
|
|
// data variable live under linker GC, the data variable can be private. This
|
|
|
|
// optimization applies to ELF.
|
|
|
|
//
|
|
|
|
// On COFF, a comdat leader cannot be local so we require DataReferencedByCode
|
|
|
|
// to be false.
|
|
|
|
if (NS == 0 && (TT.isOSBinFormatELF() ||
|
|
|
|
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
|
2021-06-04 22:27:56 +02:00
|
|
|
Linkage = GlobalValue::PrivateLinkage;
|
|
|
|
Visibility = GlobalValue::DefaultVisibility;
|
|
|
|
}
|
2020-08-04 05:35:50 +02:00
|
|
|
auto *Data =
|
|
|
|
new GlobalVariable(*M, DataTy, false, Linkage,
|
|
|
|
ConstantStruct::get(DataTy, DataVals), DataVarName);
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
Data->setVisibility(Visibility);
|
2017-04-15 02:09:57 +02:00
|
|
|
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
|
2019-10-15 13:24:36 +02:00
|
|
|
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
|
2019-09-17 23:10:49 +02:00
|
|
|
MaybeSetComdat(Data);
|
[PGO] Use linkonce_odr linkage for __profd_ variables in comdat groups
This fixes relocations against __profd_ symbols in discarded sections,
which is PR41380.
In general, instrumentation happens very early, and optimization and
inlining happens afterwards. The counters for a function are calculated
early, and after inlining, counters for an inlined function may be
widely referenced by other functions.
For C++ inline functions of all kinds (linkonce_odr &
available_externally mainly), instr profiling wants to deduplicate these
__profc_ and __profd_ globals. Otherwise the binary would be quite
large.
I made __profd_ and __profc_ comdat in r355044, but I chose to make
__profd_ internal. At the time, I was only dealing with coverage, and in
that case, none of the instrumentation needs to reference __profd_.
However, if you use PGO, then instrumentation passes add calls to
__llvm_profile_instrument_range which reference __profd_ globals. The
solution is to make these globals externally visible by using
linkonce_odr linkage for data as was done for counters.
This is safe because PGO adds a CFG hash to the names of the data and
counter globals, so if different TUs have different globals, they will
get different data and counter arrays.
Reviewers: xur, hans
Differential Revision: https://reviews.llvm.org/D67579
llvm-svn: 372020
2019-09-16 20:49:09 +02:00
|
|
|
Data->setLinkage(Linkage);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
PD.RegionCounters = CounterPtr;
|
|
|
|
PD.DataVar = Data;
|
|
|
|
ProfileDataMap[NamePtr] = PD;
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
// Mark the data variable as used so that it isn't stripped out.
|
2021-03-03 20:32:24 +01:00
|
|
|
CompilerUsedVars.push_back(Data);
|
2016-02-08 19:13:49 +01:00
|
|
|
// Now that the linkage set by the FE has been passed to the data and counter
|
|
|
|
// variables, reset Name variable's linkage and visibility to private so that
|
|
|
|
// it can be removed later by the compiler.
|
|
|
|
NamePtr->setLinkage(GlobalValue::PrivateLinkage);
|
|
|
|
// Collect the referenced names to be used by emitNameData.
|
|
|
|
ReferencedNames.push_back(NamePtr);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-11-05 01:47:26 +01:00
|
|
|
return CounterPtr;
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
void InstrProfiling::emitVNodes() {
|
|
|
|
if (!ValueProfileStaticAlloc)
|
|
|
|
return;
|
2016-05-17 22:19:03 +02:00
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
// For now only support this on platforms that do
|
|
|
|
// not require runtime registration to discover
|
|
|
|
// named section start/end.
|
2019-02-07 19:16:22 +01:00
|
|
|
if (needsRuntimeRegistrationOfSectionRange(TT))
|
2016-05-22 00:55:34 +02:00
|
|
|
return;
|
2016-05-17 22:19:03 +02:00
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
size_t TotalNS = 0;
|
|
|
|
for (auto &PD : ProfileDataMap) {
|
|
|
|
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
|
|
|
TotalNS += PD.second.NumValueSites[Kind];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!TotalNS)
|
|
|
|
return;
|
|
|
|
|
|
|
|
uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
|
2016-06-21 04:39:08 +02:00
|
|
|
// Heuristic for small programs with very few total value sites.
|
|
|
|
// The default value of vp-counters-per-site is chosen based on
|
|
|
|
// the observation that large apps usually have a low percentage
|
|
|
|
// of value sites that actually have any profile data, and thus
|
|
|
|
// the average number of counters per site is low. For small
|
|
|
|
// apps with very few sites, this may not be true. Bump up the
|
|
|
|
// number of counters in this case.
|
2016-05-23 21:29:26 +02:00
|
|
|
#define INSTR_PROF_MIN_VAL_COUNTS 10
|
|
|
|
if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
|
2016-06-21 04:39:08 +02:00
|
|
|
NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
|
2016-05-22 00:55:34 +02:00
|
|
|
|
|
|
|
auto &Ctx = M->getContext();
|
|
|
|
Type *VNodeTypes[] = {
|
|
|
|
#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
|
|
|
};
|
|
|
|
auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
|
|
|
|
|
|
|
|
ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
|
|
|
|
auto *VNodesVar = new GlobalVariable(
|
2017-01-18 01:57:48 +01:00
|
|
|
*M, VNodesTy, false, GlobalValue::PrivateLinkage,
|
2016-05-22 00:55:34 +02:00
|
|
|
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
|
2017-04-15 02:09:57 +02:00
|
|
|
VNodesVar->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
|
2021-03-03 20:32:24 +01:00
|
|
|
// VNodesVar is used by runtime but not referenced via relocation by other
|
|
|
|
// sections. Conservatively make it linker retained.
|
2016-05-22 00:55:34 +02:00
|
|
|
UsedVars.push_back(VNodesVar);
|
2016-05-17 22:19:03 +02:00
|
|
|
}
|
|
|
|
|
2016-02-08 19:13:49 +01:00
|
|
|
void InstrProfiling::emitNameData() {
|
|
|
|
std::string UncompressedData;
|
|
|
|
|
|
|
|
if (ReferencedNames.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::string CompressedNameStr;
|
2016-05-19 05:54:45 +02:00
|
|
|
if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
|
2019-10-21 20:48:38 +02:00
|
|
|
DoInstrProfNameCompression)) {
|
2017-01-18 01:57:48 +01:00
|
|
|
report_fatal_error(toString(std::move(E)), false);
|
2016-05-03 18:53:17 +02:00
|
|
|
}
|
2016-02-08 19:13:49 +01:00
|
|
|
|
|
|
|
auto &Ctx = M->getContext();
|
2017-01-18 01:57:48 +01:00
|
|
|
auto *NamesVal = ConstantDataArray::getString(
|
2016-02-08 19:13:49 +01:00
|
|
|
Ctx, StringRef(CompressedNameStr), false);
|
2017-01-18 01:57:48 +01:00
|
|
|
NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
|
|
|
|
GlobalValue::PrivateLinkage, NamesVal,
|
|
|
|
getInstrProfNamesVarName());
|
2016-02-08 19:13:49 +01:00
|
|
|
NamesSize = CompressedNameStr.size();
|
2017-04-15 02:09:57 +02:00
|
|
|
NamesVar->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
// On COFF, it's important to reduce the alignment down to 1 to prevent the
|
|
|
|
// linker from inserting padding before the start of the names section or
|
|
|
|
// between names entries.
|
[Alignment][NFC] Deprecate Align::None()
Summary:
This is a follow up on https://reviews.llvm.org/D71473#inline-647262.
There's a caveat here that `Align(1)` relies on the compiler understanding of `Log2_64` implementation to produce good code. One could use `Align()` as a replacement but I believe it is less clear that the alignment is one in that case.
Reviewers: xbolva00, courbet, bollu
Subscribers: arsenm, dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, jsji, Jim, kerbowa, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D73099
2020-01-21 15:00:04 +01:00
|
|
|
NamesVar->setAlignment(Align(1));
|
2021-03-03 20:32:24 +01:00
|
|
|
// NamesVar is used by runtime but not referenced via relocation by other
|
|
|
|
// sections. Conservatively make it linker retained.
|
2016-02-08 19:13:49 +01:00
|
|
|
UsedVars.push_back(NamesVar);
|
2017-02-14 21:03:48 +01:00
|
|
|
|
|
|
|
for (auto *NamePtr : ReferencedNames)
|
|
|
|
NamePtr->eraseFromParent();
|
2016-02-08 19:13:49 +01:00
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
void InstrProfiling::emitRegistration() {
|
2019-02-07 19:16:22 +01:00
|
|
|
if (!needsRuntimeRegistrationOfSectionRange(TT))
|
2015-10-19 06:17:10 +02:00
|
|
|
return;
|
2015-10-13 20:39:48 +02:00
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
// Construct the function.
|
|
|
|
auto *VoidTy = Type::getVoidTy(M->getContext());
|
|
|
|
auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
|
2016-02-08 19:13:49 +01:00
|
|
|
auto *Int64Ty = Type::getInt64Ty(M->getContext());
|
2014-12-08 19:02:35 +01:00
|
|
|
auto *RegisterFTy = FunctionType::get(VoidTy, false);
|
|
|
|
auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
|
2015-10-23 06:22:58 +02:00
|
|
|
getInstrProfRegFuncsName(), M);
|
2016-06-14 23:01:22 +02:00
|
|
|
RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
2016-06-21 04:39:08 +02:00
|
|
|
if (Options.NoRedZone)
|
|
|
|
RegisterF->addFnAttr(Attribute::NoRedZone);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-06-04 13:45:32 +02:00
|
|
|
auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
|
2014-12-08 19:02:35 +01:00
|
|
|
auto *RuntimeRegisterF =
|
|
|
|
Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
|
2015-10-23 06:22:58 +02:00
|
|
|
getInstrProfRegFuncName(), M);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
|
2021-03-03 20:32:24 +01:00
|
|
|
for (Value *Data : CompilerUsedVars)
|
|
|
|
if (!isa<Function>(Data))
|
|
|
|
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
|
2014-12-08 19:02:35 +01:00
|
|
|
for (Value *Data : UsedVars)
|
2018-07-28 00:21:35 +02:00
|
|
|
if (Data != NamesVar && !isa<Function>(Data))
|
2016-02-08 19:13:49 +01:00
|
|
|
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
|
|
|
|
|
|
|
|
if (NamesVar) {
|
|
|
|
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
|
|
|
|
auto *NamesRegisterTy =
|
|
|
|
FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
|
|
|
|
auto *NamesRegisterF =
|
|
|
|
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
|
|
|
|
getInstrProfNamesRegFuncName(), M);
|
|
|
|
IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
|
|
|
|
IRB.getInt64(NamesSize)});
|
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
IRB.CreateRetVoid();
|
|
|
|
}
|
|
|
|
|
2018-02-28 20:00:08 +01:00
|
|
|
bool InstrProfiling::emitRuntimeHook() {
|
Revert "[InstrProfiling] Don't generate __llvm_profile_runtime_user"
This broke the check-profile tests on Mac, see comment on the code
review.
> This is no longer needed, we can add __llvm_profile_runtime directly
> to llvm.compiler.used or llvm.used to achieve the same effect.
>
> Differential Revision: https://reviews.llvm.org/D98325
This reverts commit c7712087cbb505d324e1149fa224f607c91a8c6a.
Also reverting the dependent follow-up commit:
Revert "[InstrProfiling] Generate runtime hook for ELF platforms"
> When using -fprofile-list to selectively apply instrumentation only
> to certain files or functions, we may end up with a binary that doesn't
> have any counters in the case where no files were selected. However,
> because on Linux and Fuchsia, we pass -u__llvm_profile_runtime, the
> runtime would still be pulled in and incur some non-trivial overhead,
> especially in the case when the continuous or runtime counter relocation
> mode is being used. A better way would be to pull in the profile runtime
> only when needed by declaring the __llvm_profile_runtime symbol in the
> translation unit only when needed.
>
> This approach was already used prior to 9a041a75221ca, but we changed it
> to always generate the __llvm_profile_runtime due to a TAPI limitation.
> Since TAPI is only used on Mach-O platforms, we could use the early
> emission of __llvm_profile_runtime there, and on other platforms we
> could change back to the earlier approach where the symbol is generated
> later only when needed. We can stop passing -u__llvm_profile_runtime to
> the linker on Linux and Fuchsia since the generated undefined symbol in
> each translation unit that needed it serves the same purpose.
>
> Differential Revision: https://reviews.llvm.org/D98061
This reverts commit 87fd09b25f8892e07b7ba11525baa9c3ec3e5d3f.
2021-03-12 13:43:36 +01:00
|
|
|
// We expect the linker to be invoked with -u<hook_var> flag for Linux or
|
|
|
|
// Fuchsia, in which case there is no need to emit the user function.
|
|
|
|
if (TT.isOSLinux() || TT.isOSFuchsia())
|
|
|
|
return false;
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
// If the module's provided its own runtime, we don't need to do anything.
|
2016-06-21 04:39:08 +02:00
|
|
|
if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
|
2018-02-28 20:00:08 +01:00
|
|
|
return false;
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
// Declare an external variable that will pull in the runtime initialization.
|
|
|
|
auto *Int32Ty = Type::getInt32Ty(M->getContext());
|
|
|
|
auto *Var =
|
|
|
|
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
|
2015-10-23 06:22:58 +02:00
|
|
|
nullptr, getInstrProfRuntimeHookVarName());
|
2014-12-08 19:02:35 +01:00
|
|
|
|
Revert "[InstrProfiling] Don't generate __llvm_profile_runtime_user"
This broke the check-profile tests on Mac, see comment on the code
review.
> This is no longer needed, we can add __llvm_profile_runtime directly
> to llvm.compiler.used or llvm.used to achieve the same effect.
>
> Differential Revision: https://reviews.llvm.org/D98325
This reverts commit c7712087cbb505d324e1149fa224f607c91a8c6a.
Also reverting the dependent follow-up commit:
Revert "[InstrProfiling] Generate runtime hook for ELF platforms"
> When using -fprofile-list to selectively apply instrumentation only
> to certain files or functions, we may end up with a binary that doesn't
> have any counters in the case where no files were selected. However,
> because on Linux and Fuchsia, we pass -u__llvm_profile_runtime, the
> runtime would still be pulled in and incur some non-trivial overhead,
> especially in the case when the continuous or runtime counter relocation
> mode is being used. A better way would be to pull in the profile runtime
> only when needed by declaring the __llvm_profile_runtime symbol in the
> translation unit only when needed.
>
> This approach was already used prior to 9a041a75221ca, but we changed it
> to always generate the __llvm_profile_runtime due to a TAPI limitation.
> Since TAPI is only used on Mach-O platforms, we could use the early
> emission of __llvm_profile_runtime there, and on other platforms we
> could change back to the earlier approach where the symbol is generated
> later only when needed. We can stop passing -u__llvm_profile_runtime to
> the linker on Linux and Fuchsia since the generated undefined symbol in
> each translation unit that needed it serves the same purpose.
>
> Differential Revision: https://reviews.llvm.org/D98061
This reverts commit 87fd09b25f8892e07b7ba11525baa9c3ec3e5d3f.
2021-03-12 13:43:36 +01:00
|
|
|
// Make a function that uses it.
|
|
|
|
auto *User = Function::Create(FunctionType::get(Int32Ty, false),
|
|
|
|
GlobalValue::LinkOnceODRLinkage,
|
|
|
|
getInstrProfRuntimeHookVarUseFuncName(), M);
|
|
|
|
User->addFnAttr(Attribute::NoInline);
|
|
|
|
if (Options.NoRedZone)
|
|
|
|
User->addFnAttr(Attribute::NoRedZone);
|
|
|
|
User->setVisibility(GlobalValue::HiddenVisibility);
|
|
|
|
if (TT.supportsCOMDAT())
|
|
|
|
User->setComdat(M->getOrInsertComdat(User->getName()));
|
|
|
|
|
|
|
|
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
|
|
|
|
auto *Load = IRB.CreateLoad(Int32Ty, Var);
|
|
|
|
IRB.CreateRet(Load);
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
// Mark the user variable as used so that it isn't stripped out.
|
Revert "[InstrProfiling] Don't generate __llvm_profile_runtime_user"
This broke the check-profile tests on Mac, see comment on the code
review.
> This is no longer needed, we can add __llvm_profile_runtime directly
> to llvm.compiler.used or llvm.used to achieve the same effect.
>
> Differential Revision: https://reviews.llvm.org/D98325
This reverts commit c7712087cbb505d324e1149fa224f607c91a8c6a.
Also reverting the dependent follow-up commit:
Revert "[InstrProfiling] Generate runtime hook for ELF platforms"
> When using -fprofile-list to selectively apply instrumentation only
> to certain files or functions, we may end up with a binary that doesn't
> have any counters in the case where no files were selected. However,
> because on Linux and Fuchsia, we pass -u__llvm_profile_runtime, the
> runtime would still be pulled in and incur some non-trivial overhead,
> especially in the case when the continuous or runtime counter relocation
> mode is being used. A better way would be to pull in the profile runtime
> only when needed by declaring the __llvm_profile_runtime symbol in the
> translation unit only when needed.
>
> This approach was already used prior to 9a041a75221ca, but we changed it
> to always generate the __llvm_profile_runtime due to a TAPI limitation.
> Since TAPI is only used on Mach-O platforms, we could use the early
> emission of __llvm_profile_runtime there, and on other platforms we
> could change back to the earlier approach where the symbol is generated
> later only when needed. We can stop passing -u__llvm_profile_runtime to
> the linker on Linux and Fuchsia since the generated undefined symbol in
> each translation unit that needed it serves the same purpose.
>
> Differential Revision: https://reviews.llvm.org/D98061
This reverts commit 87fd09b25f8892e07b7ba11525baa9c3ec3e5d3f.
2021-03-12 13:43:36 +01:00
|
|
|
CompilerUsedVars.push_back(User);
|
2018-02-28 20:00:08 +01:00
|
|
|
return true;
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::emitUses() {
|
2021-02-27 01:14:03 +01:00
|
|
|
// The metadata sections are parallel arrays. Optimizers (e.g.
|
|
|
|
// GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
|
|
|
|
// we conservatively retain all unconditionally in the compiler.
|
|
|
|
//
|
|
|
|
// On ELF, the linker can guarantee the associated sections will be retained
|
2021-06-04 22:27:56 +02:00
|
|
|
// or discarded as a unit, so llvm.compiler.used is sufficient. Similarly on
|
|
|
|
// COFF, if prof data is not referenced by code we use one comdat and ensure
|
|
|
|
// this GC property as well. Otherwise, we have to conservatively make all of
|
|
|
|
// the sections retained by the linker.
|
|
|
|
if (TT.isOSBinFormatELF() ||
|
|
|
|
(TT.isOSBinFormatCOFF() && !profDataReferencedByCode(*M)))
|
2021-03-03 20:32:24 +01:00
|
|
|
appendToCompilerUsed(*M, CompilerUsedVars);
|
2021-02-27 01:14:03 +01:00
|
|
|
else
|
2021-03-03 20:32:24 +01:00
|
|
|
appendToUsed(*M, CompilerUsedVars);
|
|
|
|
|
|
|
|
// We do not add proper references from used metadata sections to NamesVar and
|
|
|
|
// VNodesVar, so we have to be conservative and place them in llvm.used
|
|
|
|
// regardless of the target,
|
|
|
|
appendToUsed(*M, UsedVars);
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::emitInitialization() {
|
2019-02-27 18:24:33 +01:00
|
|
|
// Create ProfileFileName variable. Don't don't this for the
|
|
|
|
// context-sensitive instrumentation lowering: This lowering is after
|
|
|
|
// LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
|
|
|
|
// have already create the variable before LTO/ThinLTO linking.
|
|
|
|
if (!IsCS)
|
|
|
|
createProfileFileNameVar(*M, Options.InstrProfileOutput);
|
2019-02-01 21:43:25 +01:00
|
|
|
Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
|
2016-07-22 01:19:10 +02:00
|
|
|
if (!RegisterF)
|
2016-06-21 04:39:08 +02:00
|
|
|
return;
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
// Create the initialization function.
|
|
|
|
auto *VoidTy = Type::getVoidTy(M->getContext());
|
2015-10-23 06:22:58 +02:00
|
|
|
auto *F = Function::Create(FunctionType::get(VoidTy, false),
|
|
|
|
GlobalValue::InternalLinkage,
|
|
|
|
getInstrProfInitFuncName(), M);
|
2016-06-14 23:01:22 +02:00
|
|
|
F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
2014-12-08 19:02:35 +01:00
|
|
|
F->addFnAttr(Attribute::NoInline);
|
2016-06-21 04:39:08 +02:00
|
|
|
if (Options.NoRedZone)
|
|
|
|
F->addFnAttr(Attribute::NoRedZone);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
// Add the basic block and the necessary calls.
|
|
|
|
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
|
2019-02-01 21:43:25 +01:00
|
|
|
IRB.CreateCall(RegisterF, {});
|
2014-12-08 19:02:35 +01:00
|
|
|
IRB.CreateRetVoid();
|
|
|
|
|
|
|
|
appendToGlobalCtors(*M, F, 0);
|
|
|
|
}
|