2014-12-08 19:02:35 +01:00
|
|
|
//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
|
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-12-08 19:02:35 +01:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2015-11-18 19:14:55 +01:00
|
|
|
// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
|
|
|
|
// It also builds the data structures and initialization code needed for
|
|
|
|
// updating execution counts and emitting the profile at runtime.
|
2014-12-08 19:02:35 +01:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-03-23 23:11:06 +01:00
|
|
|
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
2014-12-08 19:02:35 +01:00
|
|
|
#include "llvm/ADT/Triple.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/ADT/Twine.h"
|
2019-02-27 18:24:33 +01:00
|
|
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
|
|
|
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2016-11-21 12:57:19 +01:00
|
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Attributes.h"
|
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/Constant.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/IR/Dominators.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
2017-06-06 13:49:48 +02:00
|
|
|
#include "llvm/IR/IRBuilder.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
2014-12-08 19:02:35 +01:00
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/IR/Type.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-13 22:15:01 +01:00
|
|
|
#include "llvm/InitializePasses.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/Pass.h"
|
2015-11-18 19:14:55 +01:00
|
|
|
#include "llvm/ProfileData/InstrProf.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
#include "llvm/Support/Error.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
2014-12-08 19:02:35 +01:00
|
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
2017-06-25 02:26:43 +02:00
|
|
|
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
2017-01-18 01:57:48 +01:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <string>
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "instrprof"
|
|
|
|
|
2017-04-04 18:42:20 +02:00
|
|
|
// The start and end values of precise value profile range for memory
|
|
|
|
// intrinsic sizes
|
|
|
|
cl::opt<std::string> MemOPSizeRange(
|
|
|
|
"memop-size-range",
|
|
|
|
cl::desc("Set the range of size in memory intrinsic calls to be profiled "
|
|
|
|
"precisely, in a format of <start_val>:<end_val>"),
|
|
|
|
cl::init(""));
|
|
|
|
|
|
|
|
// The value that considered to be large value in memory intrinsic.
|
|
|
|
cl::opt<unsigned> MemOPSizeLarge(
|
|
|
|
"memop-size-large",
|
|
|
|
cl::desc("Set large value thresthold in memory intrinsic size profiling. "
|
|
|
|
"Value of 0 disables the large value profiling."),
|
|
|
|
cl::init(8192));
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
namespace {
|
|
|
|
|
2019-12-04 19:35:14 +01:00
|
|
|
cl::opt<bool> DoNameCompression("enable-name-compression",
|
|
|
|
cl::desc("Enable name string compression"),
|
|
|
|
cl::init(true));
|
|
|
|
|
2017-01-11 21:19:41 +01:00
|
|
|
cl::opt<bool> DoHashBasedCounterSplit(
|
|
|
|
"hash-based-counter-split",
|
|
|
|
cl::desc("Rename counter variable of a comdat function based on cfg hash"),
|
|
|
|
cl::init(true));
|
|
|
|
|
2019-10-04 22:29:56 +02:00
|
|
|
cl::opt<bool> RuntimeCounterRelocation(
|
|
|
|
"runtime-counter-relocation",
|
|
|
|
cl::desc("Enable relocating counters at runtime."),
|
|
|
|
cl::init(false));
|
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
cl::opt<bool> ValueProfileStaticAlloc(
|
|
|
|
"vp-static-alloc",
|
|
|
|
cl::desc("Do static counter allocation for value profiler"),
|
|
|
|
cl::init(true));
|
2017-01-18 01:57:48 +01:00
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
cl::opt<double> NumCountersPerValueSite(
|
|
|
|
"vp-counters-per-site",
|
|
|
|
cl::desc("The average number of profile counters allocated "
|
|
|
|
"per value profiling site."),
|
|
|
|
// This is set to a very small value because in real programs, only
|
|
|
|
// a very small percentage of value sites have non-zero targets, e.g, 1/30.
|
|
|
|
// For those sites with non-zero profile, the average number of targets
|
|
|
|
// is usually smaller than 2.
|
|
|
|
cl::init(1.0));
|
|
|
|
|
2018-08-17 00:24:47 +02:00
|
|
|
cl::opt<bool> AtomicCounterUpdateAll(
|
|
|
|
"instrprof-atomic-counter-update-all", cl::ZeroOrMore,
|
|
|
|
cl::desc("Make all profile counter updates atomic (for testing only)"),
|
|
|
|
cl::init(false));
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
cl::opt<bool> AtomicCounterUpdatePromoted(
|
|
|
|
"atomic-counter-update-promoted", cl::ZeroOrMore,
|
|
|
|
cl::desc("Do counter update using atomic fetch add "
|
|
|
|
" for promoted counters only"),
|
|
|
|
cl::init(false));
|
|
|
|
|
|
|
|
// If the option is not specified, the default behavior about whether
|
|
|
|
// counter promotion is done depends on how instrumentaiton lowering
|
|
|
|
// pipeline is setup, i.e., the default value of true of this option
|
|
|
|
// does not mean the promotion will be done by default. Explicitly
|
|
|
|
// setting this option can override the default behavior.
|
|
|
|
cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
|
|
|
|
cl::desc("Do counter register promotion"),
|
|
|
|
cl::init(false));
|
|
|
|
cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
|
2017-07-13 01:27:44 +02:00
|
|
|
cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
|
2017-06-25 02:26:43 +02:00
|
|
|
cl::desc("Max number counter promotions per loop to avoid"
|
|
|
|
" increasing register pressure too much"));
|
|
|
|
|
|
|
|
// A debug option
|
|
|
|
cl::opt<int>
|
|
|
|
MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
|
|
|
|
cl::desc("Max number of allowed counter promotions"));
|
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
|
|
|
|
cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
|
|
|
|
cl::desc("The max number of exiting blocks of a loop to allow "
|
|
|
|
" speculative counter promotion"));
|
|
|
|
|
|
|
|
cl::opt<bool> SpeculativeCounterPromotionToLoop(
|
|
|
|
cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
|
|
|
|
cl::desc("When the option is false, if the target block is in a loop, "
|
|
|
|
"the promotion will be disallowed unless the promoted counter "
|
|
|
|
" update can be further/iteratively promoted into an acyclic "
|
|
|
|
" region."));
|
|
|
|
|
|
|
|
cl::opt<bool> IterativeCounterPromotion(
|
|
|
|
cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
|
|
|
|
cl::desc("Allow counter promotion across the whole loop nest."));
|
2017-06-25 02:26:43 +02:00
|
|
|
|
2016-04-18 19:47:38 +02:00
|
|
|
class InstrProfilingLegacyPass : public ModulePass {
|
|
|
|
InstrProfiling InstrProf;
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
public:
|
|
|
|
static char ID;
|
2017-01-18 01:57:48 +01:00
|
|
|
|
|
|
|
InstrProfilingLegacyPass() : ModulePass(ID) {}
|
2019-02-27 18:24:33 +01:00
|
|
|
InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
|
|
|
|
: ModulePass(ID), InstrProf(Options, IsCS) {}
|
2017-01-18 01:57:48 +01:00
|
|
|
|
2016-10-01 04:56:57 +02:00
|
|
|
StringRef getPassName() const override {
|
2014-12-08 19:02:35 +01:00
|
|
|
return "Frontend instrumentation-based coverage lowering";
|
|
|
|
}
|
|
|
|
|
2016-11-21 12:57:19 +01:00
|
|
|
bool runOnModule(Module &M) override {
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
|
|
|
|
return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
|
|
|
|
};
|
|
|
|
return InstrProf.run(M, GetTLI);
|
2016-11-21 12:57:19 +01:00
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.setPreservesCFG();
|
2016-11-21 12:57:19 +01:00
|
|
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
2016-04-18 19:47:38 +02:00
|
|
|
};
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
///
|
2017-06-25 02:26:43 +02:00
|
|
|
/// A helper class to promote one counter RMW operation in the loop
|
|
|
|
/// into register update.
|
|
|
|
///
|
|
|
|
/// RWM update for the counter will be sinked out of the loop after
|
|
|
|
/// the transformation.
|
|
|
|
///
|
|
|
|
class PGOCounterPromoterHelper : public LoadAndStorePromoter {
|
|
|
|
public:
|
2017-07-13 01:27:44 +02:00
|
|
|
PGOCounterPromoterHelper(
|
|
|
|
Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
|
|
|
|
BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
|
|
|
|
ArrayRef<Instruction *> InsertPts,
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
|
|
|
|
LoopInfo &LI)
|
2017-06-25 02:26:43 +02:00
|
|
|
: LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
|
2017-07-13 01:27:44 +02:00
|
|
|
InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
|
2017-06-25 02:26:43 +02:00
|
|
|
assert(isa<LoadInst>(L));
|
|
|
|
assert(isa<StoreInst>(S));
|
|
|
|
SSA.AddAvailableValue(PH, Init);
|
|
|
|
}
|
2017-07-13 01:27:44 +02:00
|
|
|
|
[LICM/MSSA] Add promotion to scalars by building an AliasSetTracker with MemorySSA.
Summary:
Experimentally we found that promotion to scalars carries less benefits
than sinking and hoisting in LICM. When using MemorySSA, we build an
AliasSetTracker on demand in order to reuse the current infrastructure.
We only build it if less than AccessCapForMSSAPromotion exist in the
loop, a cap that is by default set to 250. This value ensures there are
no runtime regressions, and there are small compile time gains for
pathological cases. A much lower value (20) was found to yield a single
regression in the llvm-test-suite and much higher benefits for compile
times. Conservatively we set the current cap to a high value, but we will
explore lowering it when MemorySSA is enabled by default.
Reviewers: sanjoy, chandlerc
Subscribers: nemanjai, jlebar, Prazek, george.burgess.iv, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D56625
llvm-svn: 353339
2019-02-06 21:25:17 +01:00
|
|
|
void doExtraRewritesBeforeFinalDeletion() override {
|
2017-06-25 02:26:43 +02:00
|
|
|
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
|
|
|
|
BasicBlock *ExitBlock = ExitBlocks[i];
|
|
|
|
Instruction *InsertPos = InsertPts[i];
|
|
|
|
// Get LiveIn value into the ExitBlock. If there are multiple
|
|
|
|
// predecessors, the value is defined by a PHI node in this
|
|
|
|
// block.
|
|
|
|
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
|
|
|
|
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
|
2019-02-01 21:44:24 +01:00
|
|
|
Type *Ty = LiveInValue->getType();
|
2017-06-25 02:26:43 +02:00
|
|
|
IRBuilder<> Builder(InsertPos);
|
|
|
|
if (AtomicCounterUpdatePromoted)
|
2017-07-13 01:27:44 +02:00
|
|
|
// automic update currently can only be promoted across the current
|
|
|
|
// loop, not the whole loop nest.
|
2017-06-25 02:26:43 +02:00
|
|
|
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
|
|
|
|
AtomicOrdering::SequentiallyConsistent);
|
|
|
|
else {
|
2019-02-01 21:44:24 +01:00
|
|
|
LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
|
2017-06-25 02:26:43 +02:00
|
|
|
auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
|
2017-07-13 01:27:44 +02:00
|
|
|
auto *NewStore = Builder.CreateStore(NewVal, Addr);
|
|
|
|
|
|
|
|
// Now update the parent loop's candidate list:
|
|
|
|
if (IterativeCounterPromotion) {
|
|
|
|
auto *TargetLoop = LI.getLoopFor(ExitBlock);
|
|
|
|
if (TargetLoop)
|
|
|
|
LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
|
|
|
|
}
|
2017-06-25 02:26:43 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
Instruction *Store;
|
|
|
|
ArrayRef<BasicBlock *> ExitBlocks;
|
|
|
|
ArrayRef<Instruction *> InsertPts;
|
2017-07-13 01:27:44 +02:00
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
|
|
|
|
LoopInfo &LI;
|
2017-06-25 02:26:43 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/// A helper class to do register promotion for all profile counter
|
|
|
|
/// updates in a loop.
|
|
|
|
///
|
|
|
|
class PGOCounterPromoter {
|
|
|
|
public:
|
2017-07-13 01:27:44 +02:00
|
|
|
PGOCounterPromoter(
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
|
2019-02-27 18:24:33 +01:00
|
|
|
Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
|
2017-07-13 01:27:44 +02:00
|
|
|
: LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
|
2019-02-27 18:24:33 +01:00
|
|
|
LI(LI), BFI(BFI) {
|
2017-06-25 02:26:43 +02:00
|
|
|
|
|
|
|
SmallVector<BasicBlock *, 8> LoopExitBlocks;
|
|
|
|
SmallPtrSet<BasicBlock *, 8> BlockSet;
|
2017-07-13 01:27:44 +02:00
|
|
|
L.getExitBlocks(LoopExitBlocks);
|
2017-06-25 02:26:43 +02:00
|
|
|
|
|
|
|
for (BasicBlock *ExitBlock : LoopExitBlocks) {
|
|
|
|
if (BlockSet.insert(ExitBlock).second) {
|
|
|
|
ExitBlocks.push_back(ExitBlock);
|
|
|
|
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool run(int64_t *NumPromoted) {
|
2017-11-30 20:16:25 +01:00
|
|
|
// Skip 'infinite' loops:
|
|
|
|
if (ExitBlocks.size() == 0)
|
|
|
|
return false;
|
2017-07-13 01:27:44 +02:00
|
|
|
unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
|
|
|
|
if (MaxProm == 0)
|
2017-06-25 02:26:43 +02:00
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Promoted = 0;
|
2017-07-13 01:27:44 +02:00
|
|
|
for (auto &Cand : LoopToCandidates[&L]) {
|
2017-06-25 02:26:43 +02:00
|
|
|
|
|
|
|
SmallVector<PHINode *, 4> NewPHIs;
|
|
|
|
SSAUpdater SSA(&NewPHIs);
|
|
|
|
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
|
2017-07-13 01:27:44 +02:00
|
|
|
|
2019-02-27 18:24:33 +01:00
|
|
|
// If BFI is set, we will use it to guide the promotions.
|
|
|
|
if (BFI) {
|
|
|
|
auto *BB = Cand.first->getParent();
|
|
|
|
auto InstrCount = BFI->getBlockProfileCount(BB);
|
|
|
|
if (!InstrCount)
|
|
|
|
continue;
|
|
|
|
auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
|
|
|
|
// If the average loop trip count is not greater than 1.5, we skip
|
|
|
|
// promotion.
|
|
|
|
if (PreheaderCount &&
|
|
|
|
(PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
|
2017-07-13 01:27:44 +02:00
|
|
|
L.getLoopPreheader(), ExitBlocks,
|
|
|
|
InsertPts, LoopToCandidates, LI);
|
2017-06-25 02:26:43 +02:00
|
|
|
Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
|
|
|
|
Promoted++;
|
2017-07-13 01:27:44 +02:00
|
|
|
if (Promoted >= MaxProm)
|
2017-06-25 02:26:43 +02:00
|
|
|
break;
|
2017-07-13 01:27:44 +02:00
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
(*NumPromoted)++;
|
|
|
|
if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
|
|
|
|
<< L.getLoopDepth() << ")\n");
|
2017-06-25 02:26:43 +02:00
|
|
|
return Promoted != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2017-07-13 01:27:44 +02:00
|
|
|
bool allowSpeculativeCounterPromotion(Loop *LP) {
|
|
|
|
SmallVector<BasicBlock *, 8> ExitingBlocks;
|
|
|
|
L.getExitingBlocks(ExitingBlocks);
|
|
|
|
// Not considierered speculative.
|
|
|
|
if (ExitingBlocks.size() == 1)
|
|
|
|
return true;
|
|
|
|
if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the max number of Counter Promotions for LP.
|
|
|
|
unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
|
|
|
|
// We can't insert into a catchswitch.
|
|
|
|
SmallVector<BasicBlock *, 8> LoopExitBlocks;
|
|
|
|
LP->getExitBlocks(LoopExitBlocks);
|
|
|
|
if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
|
|
|
|
return isa<CatchSwitchInst>(Exit->getTerminator());
|
|
|
|
}))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!LP->hasDedicatedExits())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
BasicBlock *PH = LP->getLoopPreheader();
|
|
|
|
if (!PH)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
SmallVector<BasicBlock *, 8> ExitingBlocks;
|
|
|
|
LP->getExitingBlocks(ExitingBlocks);
|
2019-02-27 18:24:33 +01:00
|
|
|
|
|
|
|
// If BFI is set, we do more aggressive promotions based on BFI.
|
|
|
|
if (BFI)
|
|
|
|
return (unsigned)-1;
|
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
// Not considierered speculative.
|
|
|
|
if (ExitingBlocks.size() == 1)
|
|
|
|
return MaxNumOfPromotionsPerLoop;
|
|
|
|
|
|
|
|
if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Whether the target block is in a loop does not matter:
|
|
|
|
if (SpeculativeCounterPromotionToLoop)
|
|
|
|
return MaxNumOfPromotionsPerLoop;
|
|
|
|
|
|
|
|
// Now check the target block:
|
|
|
|
unsigned MaxProm = MaxNumOfPromotionsPerLoop;
|
|
|
|
for (auto *TargetBlock : LoopExitBlocks) {
|
|
|
|
auto *TargetLoop = LI.getLoopFor(TargetBlock);
|
|
|
|
if (!TargetLoop)
|
|
|
|
continue;
|
|
|
|
unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
|
|
|
|
unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
|
|
|
|
MaxProm =
|
|
|
|
std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
|
|
|
|
PendingCandsInTarget);
|
|
|
|
}
|
|
|
|
return MaxProm;
|
|
|
|
}
|
|
|
|
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
|
2017-06-25 02:26:43 +02:00
|
|
|
SmallVector<BasicBlock *, 8> ExitBlocks;
|
|
|
|
SmallVector<Instruction *, 8> InsertPts;
|
2017-07-13 01:27:44 +02:00
|
|
|
Loop &L;
|
|
|
|
LoopInfo &LI;
|
2019-02-27 18:24:33 +01:00
|
|
|
BlockFrequencyInfo *BFI;
|
2017-06-25 02:26:43 +02:00
|
|
|
};
|
|
|
|
|
2017-01-18 01:57:48 +01:00
|
|
|
} // end anonymous namespace
|
2015-02-11 03:52:44 +01:00
|
|
|
|
2016-08-09 02:28:38 +02:00
|
|
|
PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
FunctionAnalysisManager &FAM =
|
|
|
|
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
|
|
|
auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
|
|
|
|
return FAM.getResult<TargetLibraryAnalysis>(F);
|
|
|
|
};
|
|
|
|
if (!run(M, GetTLI))
|
2016-04-18 19:47:38 +02:00
|
|
|
return PreservedAnalyses::all();
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2016-04-18 19:47:38 +02:00
|
|
|
return PreservedAnalyses::none();
|
|
|
|
}
|
2016-02-08 19:13:49 +01:00
|
|
|
|
2016-04-18 19:47:38 +02:00
|
|
|
char InstrProfilingLegacyPass::ID = 0;
|
2016-11-21 12:57:19 +01:00
|
|
|
INITIALIZE_PASS_BEGIN(
|
|
|
|
InstrProfilingLegacyPass, "instrprof",
|
|
|
|
"Frontend instrumentation-based coverage lowering.", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(
|
|
|
|
InstrProfilingLegacyPass, "instrprof",
|
|
|
|
"Frontend instrumentation-based coverage lowering.", false, false)
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2016-06-21 04:39:08 +02:00
|
|
|
ModulePass *
|
2019-02-27 18:24:33 +01:00
|
|
|
llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
|
|
|
|
bool IsCS) {
|
|
|
|
return new InstrProfilingLegacyPass(Options, IsCS);
|
2016-04-18 19:47:38 +02:00
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2016-09-18 20:34:07 +02:00
|
|
|
static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
|
|
|
|
InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
|
|
|
|
if (Inc)
|
|
|
|
return Inc;
|
|
|
|
return dyn_cast<InstrProfIncrementInst>(Instr);
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
bool InstrProfiling::lowerIntrinsics(Function *F) {
|
|
|
|
bool MadeChange = false;
|
|
|
|
PromotionCandidates.clear();
|
|
|
|
for (BasicBlock &BB : *F) {
|
|
|
|
for (auto I = BB.begin(), E = BB.end(); I != E;) {
|
|
|
|
auto Instr = I++;
|
|
|
|
InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
|
|
|
|
if (Inc) {
|
|
|
|
lowerIncrement(Inc);
|
|
|
|
MadeChange = true;
|
|
|
|
} else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
|
|
|
|
lowerValueProfileInst(Ind);
|
|
|
|
MadeChange = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!MadeChange)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
promoteCounterLoadStores(F);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-10-04 22:29:56 +02:00
|
|
|
bool InstrProfiling::isRuntimeCounterRelocationEnabled() const {
|
|
|
|
if (RuntimeCounterRelocation.getNumOccurrences() > 0)
|
|
|
|
return RuntimeCounterRelocation;
|
|
|
|
|
|
|
|
return TT.isOSFuchsia();
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
bool InstrProfiling::isCounterPromotionEnabled() const {
|
|
|
|
if (DoCounterPromotion.getNumOccurrences() > 0)
|
|
|
|
return DoCounterPromotion;
|
|
|
|
|
|
|
|
return Options.DoCounterPromotion;
|
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::promoteCounterLoadStores(Function *F) {
|
|
|
|
if (!isCounterPromotionEnabled())
|
|
|
|
return;
|
|
|
|
|
|
|
|
DominatorTree DT(*F);
|
|
|
|
LoopInfo LI(DT);
|
|
|
|
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
|
|
|
|
|
2019-02-27 18:24:33 +01:00
|
|
|
std::unique_ptr<BlockFrequencyInfo> BFI;
|
|
|
|
if (Options.UseBFIInPromotion) {
|
|
|
|
std::unique_ptr<BranchProbabilityInfo> BPI;
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
|
2019-02-27 18:24:33 +01:00
|
|
|
BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
|
|
|
|
}
|
|
|
|
|
2017-06-25 02:26:43 +02:00
|
|
|
for (const auto &LoadStore : PromotionCandidates) {
|
|
|
|
auto *CounterLoad = LoadStore.first;
|
|
|
|
auto *CounterStore = LoadStore.second;
|
|
|
|
BasicBlock *BB = CounterLoad->getParent();
|
|
|
|
Loop *ParentLoop = LI.getLoopFor(BB);
|
|
|
|
if (!ParentLoop)
|
|
|
|
continue;
|
|
|
|
LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
|
|
|
|
}
|
|
|
|
|
|
|
|
SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
|
|
|
|
|
2017-07-13 01:27:44 +02:00
|
|
|
// Do a post-order traversal of the loops so that counter updates can be
|
|
|
|
// iteratively hoisted outside the loop nest.
|
|
|
|
for (auto *Loop : llvm::reverse(Loops)) {
|
2019-02-27 18:24:33 +01:00
|
|
|
PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
|
2017-06-25 02:26:43 +02:00
|
|
|
Promoter.run(&TotalCountersPromoted);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-27 00:54:24 +01:00
|
|
|
/// Check if the module contains uses of any profiling intrinsics.
|
|
|
|
static bool containsProfilingIntrinsics(Module &M) {
|
|
|
|
if (auto *F = M.getFunction(
|
|
|
|
Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
|
2018-01-27 01:01:04 +01:00
|
|
|
if (!F->use_empty())
|
|
|
|
return true;
|
2018-01-27 00:54:24 +01:00
|
|
|
if (auto *F = M.getFunction(
|
|
|
|
Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
|
2018-01-27 01:01:04 +01:00
|
|
|
if (!F->use_empty())
|
|
|
|
return true;
|
2018-01-27 00:54:24 +01:00
|
|
|
if (auto *F = M.getFunction(
|
|
|
|
Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
|
2018-01-27 01:01:04 +01:00
|
|
|
if (!F->use_empty())
|
|
|
|
return true;
|
2018-01-27 00:54:24 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
bool InstrProfiling::run(
|
|
|
|
Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
|
2014-12-08 19:02:35 +01:00
|
|
|
this->M = &M;
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
this->GetTLI = std::move(GetTLI);
|
2016-02-08 19:13:49 +01:00
|
|
|
NamesVar = nullptr;
|
|
|
|
NamesSize = 0;
|
2015-11-18 19:14:55 +01:00
|
|
|
ProfileDataMap.clear();
|
2014-12-08 19:02:35 +01:00
|
|
|
UsedVars.clear();
|
2017-04-04 18:42:20 +02:00
|
|
|
getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
|
|
|
|
MemOPSizeRangeLast);
|
2017-04-15 02:09:57 +02:00
|
|
|
TT = Triple(M.getTargetTriple());
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2018-02-28 20:00:08 +01:00
|
|
|
// Emit the runtime hook even if no counters are present.
|
|
|
|
bool MadeChange = emitRuntimeHook();
|
|
|
|
|
|
|
|
// Improve compile time by avoiding linear scans when there is no work.
|
|
|
|
GlobalVariable *CoverageNamesVar =
|
|
|
|
M.getNamedGlobal(getCoverageUnusedNamesVarName());
|
|
|
|
if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
|
|
|
|
return MadeChange;
|
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
// We did not know how many value sites there would be inside
|
|
|
|
// the instrumented function. This is counting the number of instrumented
|
|
|
|
// target value sites to enter it as field in the profile data variable.
|
2016-01-19 19:29:54 +01:00
|
|
|
for (Function &F : M) {
|
|
|
|
InstrProfIncrementInst *FirstProfIncInst = nullptr;
|
2014-12-08 19:02:35 +01:00
|
|
|
for (BasicBlock &BB : F)
|
2016-01-19 19:29:54 +01:00
|
|
|
for (auto I = BB.begin(), E = BB.end(); I != E; I++)
|
|
|
|
if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
|
2015-11-18 19:14:55 +01:00
|
|
|
computeNumValueSiteCounts(Ind);
|
2016-01-19 19:29:54 +01:00
|
|
|
else if (FirstProfIncInst == nullptr)
|
|
|
|
FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
|
|
|
|
|
|
|
|
// Value profiling intrinsic lowering requires per-function profile data
|
|
|
|
// variable to be created first.
|
|
|
|
if (FirstProfIncInst != nullptr)
|
|
|
|
static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
|
|
|
|
}
|
2015-11-18 19:14:55 +01:00
|
|
|
|
|
|
|
for (Function &F : M)
|
2017-06-25 02:26:43 +02:00
|
|
|
MadeChange |= lowerIntrinsics(&F);
|
2015-11-18 19:14:55 +01:00
|
|
|
|
2018-01-27 00:54:24 +01:00
|
|
|
if (CoverageNamesVar) {
|
2016-01-07 21:05:49 +01:00
|
|
|
lowerCoverageData(CoverageNamesVar);
|
2015-02-11 03:52:44 +01:00
|
|
|
MadeChange = true;
|
|
|
|
}
|
2015-11-18 19:14:55 +01:00
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
if (!MadeChange)
|
|
|
|
return false;
|
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
emitVNodes();
|
2016-02-08 19:13:49 +01:00
|
|
|
emitNameData();
|
2014-12-08 19:02:35 +01:00
|
|
|
emitRegistration();
|
|
|
|
emitUses();
|
|
|
|
emitInitialization();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 03:28:03 +01:00
|
|
|
static FunctionCallee
|
|
|
|
getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
|
|
|
|
bool IsRange = false) {
|
2015-11-22 01:22:07 +01:00
|
|
|
LLVMContext &Ctx = M.getContext();
|
|
|
|
auto *ReturnTy = Type::getVoidTy(M.getContext());
|
2017-03-16 22:15:48 +01:00
|
|
|
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 03:28:03 +01:00
|
|
|
AttributeList AL;
|
|
|
|
if (auto AK = TLI.getExtAttrForI32Param(false))
|
|
|
|
AL = AL.addParamAttribute(M.getContext(), 2, AK);
|
|
|
|
|
2017-03-16 22:15:48 +01:00
|
|
|
if (!IsRange) {
|
|
|
|
Type *ParamTypes[] = {
|
2017-03-15 22:47:27 +01:00
|
|
|
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
2017-03-16 22:15:48 +01:00
|
|
|
};
|
|
|
|
auto *ValueProfilingCallTy =
|
|
|
|
FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 03:28:03 +01:00
|
|
|
return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
|
|
|
|
ValueProfilingCallTy, AL);
|
2017-03-16 22:15:48 +01:00
|
|
|
} else {
|
|
|
|
Type *RangeParamTypes[] = {
|
|
|
|
#define VALUE_RANGE_PROF 1
|
|
|
|
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
|
|
|
#undef VALUE_RANGE_PROF
|
|
|
|
};
|
|
|
|
auto *ValueRangeProfilingCallTy =
|
|
|
|
FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 03:28:03 +01:00
|
|
|
return M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
|
|
|
|
ValueRangeProfilingCallTy, AL);
|
2016-11-21 12:57:19 +01:00
|
|
|
}
|
2015-11-18 19:14:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
|
|
|
|
GlobalVariable *Name = Ind->getName();
|
|
|
|
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
|
|
|
|
uint64_t Index = Ind->getIndex()->getZExtValue();
|
|
|
|
auto It = ProfileDataMap.find(Name);
|
|
|
|
if (It == ProfileDataMap.end()) {
|
|
|
|
PerFunctionProfileData PD;
|
|
|
|
PD.NumValueSites[ValueKind] = Index + 1;
|
|
|
|
ProfileDataMap[Name] = PD;
|
|
|
|
} else if (It->second.NumValueSites[ValueKind] <= Index)
|
|
|
|
It->second.NumValueSites[ValueKind] = Index + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
|
|
|
|
GlobalVariable *Name = Ind->getName();
|
|
|
|
auto It = ProfileDataMap.find(Name);
|
|
|
|
assert(It != ProfileDataMap.end() && It->second.DataVar &&
|
2016-06-21 04:39:08 +02:00
|
|
|
"value profiling detected in function with no counter incerement");
|
2015-11-18 19:14:55 +01:00
|
|
|
|
|
|
|
GlobalVariable *DataVar = It->second.DataVar;
|
|
|
|
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
|
|
|
|
uint64_t Index = Ind->getIndex()->getZExtValue();
|
|
|
|
for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
|
|
|
|
Index += It->second.NumValueSites[Kind];
|
|
|
|
|
|
|
|
IRBuilder<> Builder(Ind);
|
2017-03-16 22:15:48 +01:00
|
|
|
bool IsRange = (Ind->getValueKind()->getZExtValue() ==
|
|
|
|
llvm::InstrProfValueKind::IPVK_MemOPSize);
|
|
|
|
CallInst *Call = nullptr;
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 05:09:36 +02:00
|
|
|
auto *TLI = &GetTLI(*Ind->getFunction());
|
2017-03-16 22:15:48 +01:00
|
|
|
if (!IsRange) {
|
|
|
|
Value *Args[3] = {Ind->getTargetValue(),
|
|
|
|
Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
|
|
|
|
Builder.getInt32(Index)};
|
|
|
|
Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
|
|
|
|
} else {
|
2017-04-04 18:42:20 +02:00
|
|
|
Value *Args[6] = {
|
|
|
|
Ind->getTargetValue(),
|
|
|
|
Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
|
|
|
|
Builder.getInt32(Index),
|
|
|
|
Builder.getInt64(MemOPSizeRangeStart),
|
|
|
|
Builder.getInt64(MemOPSizeRangeLast),
|
|
|
|
Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
|
2017-03-16 22:15:48 +01:00
|
|
|
Call =
|
|
|
|
Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
|
|
|
|
}
|
2016-11-21 12:57:19 +01:00
|
|
|
if (auto AK = TLI->getExtAttrForI32Param(false))
|
2017-05-03 20:17:31 +02:00
|
|
|
Call->addParamAttr(2, AK);
|
2016-11-21 12:57:19 +01:00
|
|
|
Ind->replaceAllUsesWith(Call);
|
2015-11-18 19:14:55 +01:00
|
|
|
Ind->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
|
|
|
|
GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
|
|
|
|
|
2015-10-13 19:39:10 +02:00
|
|
|
IRBuilder<> Builder(Inc);
|
2014-12-08 19:02:35 +01:00
|
|
|
uint64_t Index = Inc->getIndex()->getZExtValue();
|
2019-02-01 21:44:47 +01:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
|
|
|
|
Counters, 0, Index);
|
2018-08-17 00:24:47 +02:00
|
|
|
|
2019-10-04 22:29:56 +02:00
|
|
|
if (isRuntimeCounterRelocationEnabled()) {
|
|
|
|
Type *Int64Ty = Type::getInt64Ty(M->getContext());
|
|
|
|
Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext());
|
|
|
|
Function *Fn = Inc->getParent()->getParent();
|
|
|
|
Instruction &I = Fn->getEntryBlock().front();
|
|
|
|
LoadInst *LI = dyn_cast<LoadInst>(&I);
|
|
|
|
if (!LI) {
|
|
|
|
IRBuilder<> Builder(&I);
|
|
|
|
Type *Int64Ty = Type::getInt64Ty(M->getContext());
|
|
|
|
GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
|
|
|
|
if (!Bias)
|
|
|
|
Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
|
|
|
|
Constant::getNullValue(Int64Ty),
|
|
|
|
getInstrProfCounterBiasVarName());
|
|
|
|
LI = Builder.CreateLoad(Int64Ty, Bias);
|
|
|
|
}
|
|
|
|
auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
|
|
|
|
Addr = Builder.CreateIntToPtr(Add, Int64PtrTy);
|
|
|
|
}
|
|
|
|
|
2018-08-17 00:24:47 +02:00
|
|
|
if (Options.Atomic || AtomicCounterUpdateAll) {
|
|
|
|
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
|
|
|
|
AtomicOrdering::Monotonic);
|
|
|
|
} else {
|
2019-02-01 21:44:24 +01:00
|
|
|
Value *IncStep = Inc->getStep();
|
|
|
|
Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
|
2018-08-17 00:24:47 +02:00
|
|
|
auto *Count = Builder.CreateAdd(Load, Inc->getStep());
|
|
|
|
auto *Store = Builder.CreateStore(Count, Addr);
|
|
|
|
if (isCounterPromotionEnabled())
|
|
|
|
PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
|
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
Inc->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2016-01-07 21:05:49 +01:00
|
|
|
void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
|
|
|
|
ConstantArray *Names =
|
|
|
|
cast<ConstantArray>(CoverageNamesVar->getInitializer());
|
|
|
|
for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
|
|
|
|
Constant *NC = Names->getOperand(I);
|
|
|
|
Value *V = NC->stripPointerCasts();
|
2015-02-11 03:52:44 +01:00
|
|
|
assert(isa<GlobalVariable>(V) && "Missing reference to function name");
|
|
|
|
GlobalVariable *Name = cast<GlobalVariable>(V);
|
|
|
|
|
2016-02-08 19:13:49 +01:00
|
|
|
Name->setLinkage(GlobalValue::PrivateLinkage);
|
|
|
|
ReferencedNames.push_back(Name);
|
2017-02-14 21:03:48 +01:00
|
|
|
NC->dropAllReferences();
|
2015-02-11 03:52:44 +01:00
|
|
|
}
|
2017-02-14 21:03:48 +01:00
|
|
|
CoverageNamesVar->eraseFromParent();
|
2015-02-11 03:52:44 +01:00
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
/// Get the name of a profiling variable for a particular function.
|
2015-10-22 22:32:12 +02:00
|
|
|
static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
|
2015-12-12 18:28:03 +01:00
|
|
|
StringRef NamePrefix = getInstrProfNameVarPrefix();
|
|
|
|
StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
|
2017-01-11 21:19:41 +01:00
|
|
|
Function *F = Inc->getParent()->getParent();
|
|
|
|
Module *M = F->getParent();
|
|
|
|
if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
|
|
|
|
!canRenameComdatFunc(*F))
|
|
|
|
return (Prefix + Name).str();
|
|
|
|
uint64_t FuncHash = Inc->getHash()->getZExtValue();
|
|
|
|
SmallVector<char, 24> HashPostfix;
|
|
|
|
if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
|
|
|
|
return (Prefix + Name).str();
|
|
|
|
return (Prefix + Name + "." + Twine(FuncHash)).str();
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
static inline bool shouldRecordFunctionAddr(Function *F) {
|
|
|
|
// Check the linkage
|
2017-06-14 00:12:35 +02:00
|
|
|
bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
|
2015-11-18 19:14:55 +01:00
|
|
|
if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
|
2017-06-14 00:12:35 +02:00
|
|
|
!HasAvailableExternallyLinkage)
|
2015-11-18 19:14:55 +01:00
|
|
|
return true;
|
2017-06-14 00:12:35 +02:00
|
|
|
|
|
|
|
// A function marked 'alwaysinline' with available_externally linkage can't
|
|
|
|
// have its address taken. Doing so would create an undefined external ref to
|
|
|
|
// the function, which would fail to link.
|
|
|
|
if (HasAvailableExternallyLinkage &&
|
|
|
|
F->hasFnAttribute(Attribute::AlwaysInline))
|
|
|
|
return false;
|
|
|
|
|
2016-04-27 23:17:30 +02:00
|
|
|
// Prohibit function address recording if the function is both internal and
|
|
|
|
// COMDAT. This avoids the profile data variable referencing internal symbols
|
|
|
|
// in COMDAT.
|
|
|
|
if (F->hasLocalLinkage() && F->hasComdat())
|
|
|
|
return false;
|
2017-06-14 00:12:35 +02:00
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
// Check uses of this function for other than direct calls or invokes to it.
|
2016-06-02 18:33:41 +02:00
|
|
|
// Inline virtual functions have linkeOnceODR linkage. When a key method
|
|
|
|
// exists, the vtable will only be emitted in the TU where the key method
|
|
|
|
// is defined. In a TU where vtable is not available, the function won't
|
2016-06-04 01:02:28 +02:00
|
|
|
// be 'addresstaken'. If its address is not recorded here, the profile data
|
2016-06-21 04:39:08 +02:00
|
|
|
// with missing address may be picked by the linker leading to missing
|
2016-06-04 01:02:28 +02:00
|
|
|
// indirect call target info.
|
|
|
|
return F->hasAddressTaken() || F->hasLinkOnceLinkage();
|
2015-11-18 19:14:55 +01:00
|
|
|
}
|
|
|
|
|
2019-02-07 19:16:22 +01:00
|
|
|
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
|
2016-05-22 00:55:34 +02:00
|
|
|
// Don't do this for Darwin. compiler-rt uses linker magic.
|
2019-02-07 19:16:22 +01:00
|
|
|
if (TT.isOSDarwin())
|
2016-05-22 00:55:34 +02:00
|
|
|
return false;
|
|
|
|
// Use linker script magic to get data/cnts/name start/end.
|
2019-02-07 19:16:22 +01:00
|
|
|
if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
|
2019-06-20 23:27:06 +02:00
|
|
|
TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
|
|
|
|
TT.isOSWindows())
|
2016-05-22 00:55:34 +02:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
GlobalVariable *
|
|
|
|
InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
|
2015-11-05 01:47:26 +01:00
|
|
|
GlobalVariable *NamePtr = Inc->getName();
|
2015-11-18 19:14:55 +01:00
|
|
|
auto It = ProfileDataMap.find(NamePtr);
|
|
|
|
PerFunctionProfileData PD;
|
|
|
|
if (It != ProfileDataMap.end()) {
|
|
|
|
if (It->second.RegionCounters)
|
|
|
|
return It->second.RegionCounters;
|
|
|
|
PD = It->second;
|
|
|
|
}
|
2014-12-08 19:02:35 +01:00
|
|
|
|
[PGO] Use linkonce_odr linkage for __profd_ variables in comdat groups
This fixes relocations against __profd_ symbols in discarded sections,
which is PR41380.
In general, instrumentation happens very early, and optimization and
inlining happens afterwards. The counters for a function are calculated
early, and after inlining, counters for an inlined function may be
widely referenced by other functions.
For C++ inline functions of all kinds (linkonce_odr &
available_externally mainly), instr profiling wants to deduplicate these
__profc_ and __profd_ globals. Otherwise the binary would be quite
large.
I made __profd_ and __profc_ comdat in r355044, but I chose to make
__profd_ internal. At the time, I was only dealing with coverage, and in
that case, none of the instrumentation needs to reference __profd_.
However, if you use PGO, then instrumentation passes add calls to
__llvm_profile_instrument_range which reference __profd_ globals. The
solution is to make these globals externally visible by using
linkonce_odr linkage for data as was done for counters.
This is safe because PGO adds a CFG hash to the names of the data and
counter globals, so if different TUs have different globals, they will
get different data and counter arrays.
Reviewers: xur, hans
Differential Revision: https://reviews.llvm.org/D67579
llvm-svn: 372020
2019-09-16 20:49:09 +02:00
|
|
|
// Match the linkage and visibility of the name global. COFF supports using
|
|
|
|
// comdats with internal symbols, so do that if we can.
|
2015-05-27 21:34:01 +02:00
|
|
|
Function *Fn = Inc->getParent()->getParent();
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
|
|
|
|
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
|
|
|
|
if (TT.isOSBinFormatCOFF()) {
|
|
|
|
Linkage = GlobalValue::InternalLinkage;
|
|
|
|
Visibility = GlobalValue::DefaultVisibility;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move the name variable to the right section. Place them in a COMDAT group
|
|
|
|
// if the associated function is a COMDAT. This will make sure that only one
|
[InstrProf] Use separate comdat group for data and counters
Summary:
I hadn't realized that instrumentation runs before inlining, so we can't
use the function as the comdat group. Doing so can create relocations
against discarded sections when references to discarded __profc_
variables are inlined into functions outside the function's comdat
group.
In the future, perhaps we should consider standardizing the comdat group
names that ELF and COFF use. It will save object file size, since
__profv_$sym won't appear in the symbol table again.
Reviewers: xur, vsk
Subscribers: eraman, hiraditya, cfe-commits, #sanitizers, llvm-commits
Tags: #clang, #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D58737
llvm-svn: 355044
2019-02-28 00:38:44 +01:00
|
|
|
// copy of counters of the COMDAT function will be emitted after linking. Keep
|
|
|
|
// in mind that this pass may run before the inliner, so we need to create a
|
|
|
|
// new comdat group for the counters and profiling data. If we use the comdat
|
|
|
|
// of the parent function, that will result in relocations against discarded
|
|
|
|
// sections.
|
2019-09-17 23:10:49 +02:00
|
|
|
bool NeedComdat = needsComdatForCounter(*Fn, *M);
|
|
|
|
if (NeedComdat) {
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
if (TT.isOSBinFormatCOFF()) {
|
2019-09-17 23:10:49 +02:00
|
|
|
// For COFF, put the counters, data, and values each into their own
|
|
|
|
// comdats. We can't use a group because the Visual C++ linker will
|
|
|
|
// report duplicate symbol errors if there are multiple external symbols
|
|
|
|
// with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
|
[PGO] Use linkonce_odr linkage for __profd_ variables in comdat groups
This fixes relocations against __profd_ symbols in discarded sections,
which is PR41380.
In general, instrumentation happens very early, and optimization and
inlining happens afterwards. The counters for a function are calculated
early, and after inlining, counters for an inlined function may be
widely referenced by other functions.
For C++ inline functions of all kinds (linkonce_odr &
available_externally mainly), instr profiling wants to deduplicate these
__profc_ and __profd_ globals. Otherwise the binary would be quite
large.
I made __profd_ and __profc_ comdat in r355044, but I chose to make
__profd_ internal. At the time, I was only dealing with coverage, and in
that case, none of the instrumentation needs to reference __profd_.
However, if you use PGO, then instrumentation passes add calls to
__llvm_profile_instrument_range which reference __profd_ globals. The
solution is to make these globals externally visible by using
linkonce_odr linkage for data as was done for counters.
This is safe because PGO adds a CFG hash to the names of the data and
counter globals, so if different TUs have different globals, they will
get different data and counter arrays.
Reviewers: xur, hans
Differential Revision: https://reviews.llvm.org/D67579
llvm-svn: 372020
2019-09-16 20:49:09 +02:00
|
|
|
Linkage = GlobalValue::LinkOnceODRLinkage;
|
|
|
|
Visibility = GlobalValue::HiddenVisibility;
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
}
|
|
|
|
}
|
2019-09-17 23:10:49 +02:00
|
|
|
auto MaybeSetComdat = [=](GlobalVariable *GV) {
|
|
|
|
if (NeedComdat)
|
2019-09-30 20:11:22 +02:00
|
|
|
GV->setComdat(M->getOrInsertComdat(GV->getName()));
|
2019-09-17 23:10:49 +02:00
|
|
|
};
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
|
|
|
|
LLVMContext &Ctx = M->getContext();
|
|
|
|
ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
|
|
|
|
|
|
|
|
// Create the counters variable.
|
2015-11-05 01:47:26 +01:00
|
|
|
auto *CounterPtr =
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
new GlobalVariable(*M, CounterTy, false, Linkage,
|
2015-10-22 22:32:12 +02:00
|
|
|
Constant::getNullValue(CounterTy),
|
|
|
|
getVarName(Inc, getInstrProfCountersVarPrefix()));
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
CounterPtr->setVisibility(Visibility);
|
2017-04-15 02:09:57 +02:00
|
|
|
CounterPtr->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
|
2019-10-15 13:24:36 +02:00
|
|
|
CounterPtr->setAlignment(Align(8));
|
2019-09-17 23:10:49 +02:00
|
|
|
MaybeSetComdat(CounterPtr);
|
[PGO] Use linkonce_odr linkage for __profd_ variables in comdat groups
This fixes relocations against __profd_ symbols in discarded sections,
which is PR41380.
In general, instrumentation happens very early, and optimization and
inlining happens afterwards. The counters for a function are calculated
early, and after inlining, counters for an inlined function may be
widely referenced by other functions.
For C++ inline functions of all kinds (linkonce_odr &
available_externally mainly), instr profiling wants to deduplicate these
__profc_ and __profd_ globals. Otherwise the binary would be quite
large.
I made __profd_ and __profc_ comdat in r355044, but I chose to make
__profd_ internal. At the time, I was only dealing with coverage, and in
that case, none of the instrumentation needs to reference __profd_.
However, if you use PGO, then instrumentation passes add calls to
__llvm_profile_instrument_range which reference __profd_ globals. The
solution is to make these globals externally visible by using
linkonce_odr linkage for data as was done for counters.
This is safe because PGO adds a CFG hash to the names of the data and
counter globals, so if different TUs have different globals, they will
get different data and counter arrays.
Reviewers: xur, hans
Differential Revision: https://reviews.llvm.org/D67579
llvm-svn: 372020
2019-09-16 20:49:09 +02:00
|
|
|
CounterPtr->setLinkage(Linkage);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
|
2016-05-22 00:55:34 +02:00
|
|
|
// Allocate statically the array of pointers to value profile nodes for
|
|
|
|
// the current function.
|
|
|
|
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
|
2019-02-07 19:16:22 +01:00
|
|
|
if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
|
2016-05-22 00:55:34 +02:00
|
|
|
uint64_t NS = 0;
|
|
|
|
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
|
|
|
NS += PD.NumValueSites[Kind];
|
|
|
|
if (NS) {
|
|
|
|
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
|
|
|
|
|
|
|
|
auto *ValuesVar =
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
new GlobalVariable(*M, ValuesTy, false, Linkage,
|
2016-05-22 00:55:34 +02:00
|
|
|
Constant::getNullValue(ValuesTy),
|
|
|
|
getVarName(Inc, getInstrProfValuesVarPrefix()));
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
ValuesVar->setVisibility(Visibility);
|
2017-04-15 02:09:57 +02:00
|
|
|
ValuesVar->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
|
2019-10-15 13:24:36 +02:00
|
|
|
ValuesVar->setAlignment(Align(8));
|
2019-09-17 23:10:49 +02:00
|
|
|
MaybeSetComdat(ValuesVar);
|
2016-05-22 00:55:34 +02:00
|
|
|
ValuesPtrExpr =
|
2017-01-18 01:57:48 +01:00
|
|
|
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
|
2016-05-22 00:55:34 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create data variable.
|
2015-11-18 19:14:55 +01:00
|
|
|
auto *Int16Ty = Type::getInt16Ty(Ctx);
|
2016-05-22 00:55:34 +02:00
|
|
|
auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
|
2015-11-05 01:47:26 +01:00
|
|
|
Type *DataTypes[] = {
|
2016-06-21 04:39:08 +02:00
|
|
|
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
2015-11-05 01:47:26 +01:00
|
|
|
};
|
2014-12-08 19:02:35 +01:00
|
|
|
auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
|
2015-11-05 01:47:26 +01:00
|
|
|
|
2016-06-21 04:39:08 +02:00
|
|
|
Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
|
|
|
|
? ConstantExpr::getBitCast(Fn, Int8PtrTy)
|
|
|
|
: ConstantPointerNull::get(Int8PtrTy);
|
2015-11-18 19:14:55 +01:00
|
|
|
|
2016-06-21 04:39:08 +02:00
|
|
|
Constant *Int16ArrayVals[IPVK_Last + 1];
|
2015-11-18 19:14:55 +01:00
|
|
|
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
|
|
|
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
Constant *DataVals[] = {
|
2016-06-21 04:39:08 +02:00
|
|
|
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
2015-11-05 01:47:26 +01:00
|
|
|
};
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
auto *Data = new GlobalVariable(*M, DataTy, false, Linkage,
|
2014-12-08 19:02:35 +01:00
|
|
|
ConstantStruct::get(DataTy, DataVals),
|
2015-10-22 22:32:12 +02:00
|
|
|
getVarName(Inc, getInstrProfDataVarPrefix()));
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
Data->setVisibility(Visibility);
|
2017-04-15 02:09:57 +02:00
|
|
|
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
|
2019-10-15 13:24:36 +02:00
|
|
|
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
|
2019-09-17 23:10:49 +02:00
|
|
|
MaybeSetComdat(Data);
|
[PGO] Use linkonce_odr linkage for __profd_ variables in comdat groups
This fixes relocations against __profd_ symbols in discarded sections,
which is PR41380.
In general, instrumentation happens very early, and optimization and
inlining happens afterwards. The counters for a function are calculated
early, and after inlining, counters for an inlined function may be
widely referenced by other functions.
For C++ inline functions of all kinds (linkonce_odr &
available_externally mainly), instr profiling wants to deduplicate these
__profc_ and __profd_ globals. Otherwise the binary would be quite
large.
I made __profd_ and __profc_ comdat in r355044, but I chose to make
__profd_ internal. At the time, I was only dealing with coverage, and in
that case, none of the instrumentation needs to reference __profd_.
However, if you use PGO, then instrumentation passes add calls to
__llvm_profile_instrument_range which reference __profd_ globals. The
solution is to make these globals externally visible by using
linkonce_odr linkage for data as was done for counters.
This is safe because PGO adds a CFG hash to the names of the data and
counter globals, so if different TUs have different globals, they will
get different data and counter arrays.
Reviewers: xur, hans
Differential Revision: https://reviews.llvm.org/D67579
llvm-svn: 372020
2019-09-16 20:49:09 +02:00
|
|
|
Data->setLinkage(Linkage);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-11-18 19:14:55 +01:00
|
|
|
PD.RegionCounters = CounterPtr;
|
|
|
|
PD.DataVar = Data;
|
|
|
|
ProfileDataMap[NamePtr] = PD;
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
// Mark the data variable as used so that it isn't stripped out.
|
|
|
|
UsedVars.push_back(Data);
|
2016-02-08 19:13:49 +01:00
|
|
|
// Now that the linkage set by the FE has been passed to the data and counter
|
|
|
|
// variables, reset Name variable's linkage and visibility to private so that
|
|
|
|
// it can be removed later by the compiler.
|
|
|
|
NamePtr->setLinkage(GlobalValue::PrivateLinkage);
|
|
|
|
// Collect the referenced names to be used by emitNameData.
|
|
|
|
ReferencedNames.push_back(NamePtr);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-11-05 01:47:26 +01:00
|
|
|
return CounterPtr;
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
void InstrProfiling::emitVNodes() {
|
|
|
|
if (!ValueProfileStaticAlloc)
|
|
|
|
return;
|
2016-05-17 22:19:03 +02:00
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
// For now only support this on platforms that do
|
|
|
|
// not require runtime registration to discover
|
|
|
|
// named section start/end.
|
2019-02-07 19:16:22 +01:00
|
|
|
if (needsRuntimeRegistrationOfSectionRange(TT))
|
2016-05-22 00:55:34 +02:00
|
|
|
return;
|
2016-05-17 22:19:03 +02:00
|
|
|
|
2016-05-22 00:55:34 +02:00
|
|
|
size_t TotalNS = 0;
|
|
|
|
for (auto &PD : ProfileDataMap) {
|
|
|
|
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
|
|
|
|
TotalNS += PD.second.NumValueSites[Kind];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!TotalNS)
|
|
|
|
return;
|
|
|
|
|
|
|
|
uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
|
2016-06-21 04:39:08 +02:00
|
|
|
// Heuristic for small programs with very few total value sites.
|
|
|
|
// The default value of vp-counters-per-site is chosen based on
|
|
|
|
// the observation that large apps usually have a low percentage
|
|
|
|
// of value sites that actually have any profile data, and thus
|
|
|
|
// the average number of counters per site is low. For small
|
|
|
|
// apps with very few sites, this may not be true. Bump up the
|
|
|
|
// number of counters in this case.
|
2016-05-23 21:29:26 +02:00
|
|
|
#define INSTR_PROF_MIN_VAL_COUNTS 10
|
|
|
|
if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
|
2016-06-21 04:39:08 +02:00
|
|
|
NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
|
2016-05-22 00:55:34 +02:00
|
|
|
|
|
|
|
auto &Ctx = M->getContext();
|
|
|
|
Type *VNodeTypes[] = {
|
|
|
|
#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
|
|
|
|
#include "llvm/ProfileData/InstrProfData.inc"
|
|
|
|
};
|
|
|
|
auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
|
|
|
|
|
|
|
|
ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
|
|
|
|
auto *VNodesVar = new GlobalVariable(
|
2017-01-18 01:57:48 +01:00
|
|
|
*M, VNodesTy, false, GlobalValue::PrivateLinkage,
|
2016-05-22 00:55:34 +02:00
|
|
|
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
|
2017-04-15 02:09:57 +02:00
|
|
|
VNodesVar->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
|
2016-05-22 00:55:34 +02:00
|
|
|
UsedVars.push_back(VNodesVar);
|
2016-05-17 22:19:03 +02:00
|
|
|
}
|
|
|
|
|
2016-02-08 19:13:49 +01:00
|
|
|
void InstrProfiling::emitNameData() {
|
|
|
|
std::string UncompressedData;
|
|
|
|
|
|
|
|
if (ReferencedNames.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::string CompressedNameStr;
|
2016-05-19 05:54:45 +02:00
|
|
|
if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
|
2019-12-04 19:35:14 +01:00
|
|
|
DoNameCompression)) {
|
2017-01-18 01:57:48 +01:00
|
|
|
report_fatal_error(toString(std::move(E)), false);
|
2016-05-03 18:53:17 +02:00
|
|
|
}
|
2016-02-08 19:13:49 +01:00
|
|
|
|
|
|
|
auto &Ctx = M->getContext();
|
2017-01-18 01:57:48 +01:00
|
|
|
auto *NamesVal = ConstantDataArray::getString(
|
2016-02-08 19:13:49 +01:00
|
|
|
Ctx, StringRef(CompressedNameStr), false);
|
2017-01-18 01:57:48 +01:00
|
|
|
NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
|
|
|
|
GlobalValue::PrivateLinkage, NamesVal,
|
|
|
|
getInstrProfNamesVarName());
|
2016-02-08 19:13:49 +01:00
|
|
|
NamesSize = CompressedNameStr.size();
|
2017-04-15 02:09:57 +02:00
|
|
|
NamesVar->setSection(
|
|
|
|
getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
|
[InstrProf] Implement static profdata registration
Summary:
The motivating use case is eliminating duplicate profile data registered
for the same inline function in two object files. Before this change,
users would observe multiple symbol definition errors with VC link, but
links with LLD would succeed.
Users (Mozilla) have reported that PGO works well with clang-cl and LLD,
but when using LLD without this static registration, we would get into a
"relocation against a discarded section" situation. I'm not sure what
happens in that situation, but I suspect that duplicate, unused profile
information was retained. If so, this change will reduce the size of
such binaries with LLD.
Now, Windows uses static registration and is in line with all the other
platforms.
Reviewers: davidxl, wmi, inglorion, void, calixte
Subscribers: mgorny, krytarowski, eraman, fedor.sergeev, hiraditya, #sanitizers, dmajor, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D57929
llvm-svn: 353547
2019-02-08 20:03:50 +01:00
|
|
|
// On COFF, it's important to reduce the alignment down to 1 to prevent the
|
|
|
|
// linker from inserting padding before the start of the names section or
|
|
|
|
// between names entries.
|
2019-10-15 13:24:36 +02:00
|
|
|
NamesVar->setAlignment(Align::None());
|
2016-02-08 19:13:49 +01:00
|
|
|
UsedVars.push_back(NamesVar);
|
2017-02-14 21:03:48 +01:00
|
|
|
|
|
|
|
for (auto *NamePtr : ReferencedNames)
|
|
|
|
NamePtr->eraseFromParent();
|
2016-02-08 19:13:49 +01:00
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
void InstrProfiling::emitRegistration() {
|
2019-02-07 19:16:22 +01:00
|
|
|
if (!needsRuntimeRegistrationOfSectionRange(TT))
|
2015-10-19 06:17:10 +02:00
|
|
|
return;
|
2015-10-13 20:39:48 +02:00
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
// Construct the function.
|
|
|
|
auto *VoidTy = Type::getVoidTy(M->getContext());
|
|
|
|
auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
|
2016-02-08 19:13:49 +01:00
|
|
|
auto *Int64Ty = Type::getInt64Ty(M->getContext());
|
2014-12-08 19:02:35 +01:00
|
|
|
auto *RegisterFTy = FunctionType::get(VoidTy, false);
|
|
|
|
auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
|
2015-10-23 06:22:58 +02:00
|
|
|
getInstrProfRegFuncsName(), M);
|
2016-06-14 23:01:22 +02:00
|
|
|
RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
2016-06-21 04:39:08 +02:00
|
|
|
if (Options.NoRedZone)
|
|
|
|
RegisterF->addFnAttr(Attribute::NoRedZone);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
2015-06-04 13:45:32 +02:00
|
|
|
auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
|
2014-12-08 19:02:35 +01:00
|
|
|
auto *RuntimeRegisterF =
|
|
|
|
Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
|
2015-10-23 06:22:58 +02:00
|
|
|
getInstrProfRegFuncName(), M);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
|
|
|
|
for (Value *Data : UsedVars)
|
2018-07-28 00:21:35 +02:00
|
|
|
if (Data != NamesVar && !isa<Function>(Data))
|
2016-02-08 19:13:49 +01:00
|
|
|
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
|
|
|
|
|
|
|
|
if (NamesVar) {
|
|
|
|
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
|
|
|
|
auto *NamesRegisterTy =
|
|
|
|
FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
|
|
|
|
auto *NamesRegisterF =
|
|
|
|
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
|
|
|
|
getInstrProfNamesRegFuncName(), M);
|
|
|
|
IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
|
|
|
|
IRB.getInt64(NamesSize)});
|
|
|
|
}
|
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
IRB.CreateRetVoid();
|
|
|
|
}
|
|
|
|
|
2018-02-28 20:00:08 +01:00
|
|
|
bool InstrProfiling::emitRuntimeHook() {
|
2015-10-29 05:08:31 +01:00
|
|
|
// We expect the linker to be invoked with -u<hook_var> flag for linux,
|
|
|
|
// for which case there is no need to emit the user function.
|
2019-02-07 19:16:22 +01:00
|
|
|
if (TT.isOSLinux())
|
2018-02-28 20:00:08 +01:00
|
|
|
return false;
|
2015-10-29 05:08:31 +01:00
|
|
|
|
2014-12-08 19:02:35 +01:00
|
|
|
// If the module's provided its own runtime, we don't need to do anything.
|
2016-06-21 04:39:08 +02:00
|
|
|
if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
|
2018-02-28 20:00:08 +01:00
|
|
|
return false;
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
// Declare an external variable that will pull in the runtime initialization.
|
|
|
|
auto *Int32Ty = Type::getInt32Ty(M->getContext());
|
|
|
|
auto *Var =
|
|
|
|
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
|
2015-10-23 06:22:58 +02:00
|
|
|
nullptr, getInstrProfRuntimeHookVarName());
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
// Make a function that uses it.
|
2015-10-23 06:22:58 +02:00
|
|
|
auto *User = Function::Create(FunctionType::get(Int32Ty, false),
|
|
|
|
GlobalValue::LinkOnceODRLinkage,
|
|
|
|
getInstrProfRuntimeHookVarUseFuncName(), M);
|
2014-12-08 19:02:35 +01:00
|
|
|
User->addFnAttr(Attribute::NoInline);
|
2016-06-21 04:39:08 +02:00
|
|
|
if (Options.NoRedZone)
|
|
|
|
User->addFnAttr(Attribute::NoRedZone);
|
2015-02-25 23:52:20 +01:00
|
|
|
User->setVisibility(GlobalValue::HiddenVisibility);
|
2019-02-07 19:16:22 +01:00
|
|
|
if (TT.supportsCOMDAT())
|
2016-05-24 20:47:38 +02:00
|
|
|
User->setComdat(M->getOrInsertComdat(User->getName()));
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
|
2019-02-01 21:44:24 +01:00
|
|
|
auto *Load = IRB.CreateLoad(Int32Ty, Var);
|
2014-12-08 19:02:35 +01:00
|
|
|
IRB.CreateRet(Load);
|
|
|
|
|
|
|
|
// Mark the user variable as used so that it isn't stripped out.
|
|
|
|
UsedVars.push_back(User);
|
2018-02-28 20:00:08 +01:00
|
|
|
return true;
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::emitUses() {
|
2016-10-26 01:53:31 +02:00
|
|
|
if (!UsedVars.empty())
|
|
|
|
appendToUsed(*M, UsedVars);
|
2014-12-08 19:02:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void InstrProfiling::emitInitialization() {
|
2019-02-27 18:24:33 +01:00
|
|
|
// Create ProfileFileName variable. Don't don't this for the
|
|
|
|
// context-sensitive instrumentation lowering: This lowering is after
|
|
|
|
// LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
|
|
|
|
// have already create the variable before LTO/ThinLTO linking.
|
|
|
|
if (!IsCS)
|
|
|
|
createProfileFileNameVar(*M, Options.InstrProfileOutput);
|
2019-02-01 21:43:25 +01:00
|
|
|
Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
|
2016-07-22 01:19:10 +02:00
|
|
|
if (!RegisterF)
|
2016-06-21 04:39:08 +02:00
|
|
|
return;
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
// Create the initialization function.
|
|
|
|
auto *VoidTy = Type::getVoidTy(M->getContext());
|
2015-10-23 06:22:58 +02:00
|
|
|
auto *F = Function::Create(FunctionType::get(VoidTy, false),
|
|
|
|
GlobalValue::InternalLinkage,
|
|
|
|
getInstrProfInitFuncName(), M);
|
2016-06-14 23:01:22 +02:00
|
|
|
F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
2014-12-08 19:02:35 +01:00
|
|
|
F->addFnAttr(Attribute::NoInline);
|
2016-06-21 04:39:08 +02:00
|
|
|
if (Options.NoRedZone)
|
|
|
|
F->addFnAttr(Attribute::NoRedZone);
|
2014-12-08 19:02:35 +01:00
|
|
|
|
|
|
|
// Add the basic block and the necessary calls.
|
|
|
|
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
|
2019-02-01 21:43:25 +01:00
|
|
|
IRB.CreateCall(RegisterF, {});
|
2014-12-08 19:02:35 +01:00
|
|
|
IRB.CreateRetVoid();
|
|
|
|
|
|
|
|
appendToGlobalCtors(*M, F, 0);
|
|
|
|
}
|