mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-21 18:22:53 +01:00
[CSSPGO] Undoing the concept of dangling pseudo probe
As a follow-up to https://reviews.llvm.org/D104129, I'm cleaning up the danling probe related code in both the compiler and llvm-profgen. I'm seeing a 5% size win for the pseudo_probe section for SPEC2017 and 10% for Ciner. Certain benchmark such as 602.gcc has a 20% size win. No obvious difference seen on build time for SPEC2017 and Cinder. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D104477
This commit is contained in:
parent
7afbecd70d
commit
7fbb587058
@ -698,10 +698,6 @@ public:
|
||||
/// operands in the successor blocks which refer to FromMBB to refer to this.
|
||||
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB);
|
||||
|
||||
/// move all pseudo probes in this block to the end of /c ToMBB To and tag
|
||||
/// them dangling.
|
||||
void moveAndDanglePseudoProbes(MachineBasicBlock *ToMBB);
|
||||
|
||||
/// Return true if any of the successors have probabilities attached to them.
|
||||
bool hasSuccessorProbabilities() const { return !Probs.empty(); }
|
||||
|
||||
|
@ -29,7 +29,6 @@ enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
|
||||
|
||||
enum class PseudoProbeAttributes {
|
||||
Reserved = 0x1, // Reserved for future use.
|
||||
Dangling = 0x2, // The probe is dangling.
|
||||
};
|
||||
|
||||
// The saturated distrution factor representing 100% for block probes.
|
||||
@ -84,19 +83,11 @@ struct PseudoProbe {
|
||||
// A saturated distribution factor stands for 1.0 or 100%. A pesudo probe has
|
||||
// a factor with the value ranged from 0.0 to 1.0.
|
||||
float Factor;
|
||||
|
||||
bool isDangling() const {
|
||||
return Attr & (uint32_t)PseudoProbeAttributes::Dangling;
|
||||
}
|
||||
};
|
||||
|
||||
Optional<PseudoProbe> extractProbe(const Instruction &Inst);
|
||||
|
||||
void setProbeDistributionFactor(Instruction &Inst, float Factor);
|
||||
|
||||
bool moveAndDanglePseudoProbes(BasicBlock *From, Instruction *To);
|
||||
|
||||
bool removeRedundantPseudoProbes(BasicBlock *Block);
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_IR_PSEUDOPROBE_H
|
||||
|
@ -27,7 +27,7 @@
|
||||
// TYPE (uint4)
|
||||
// 0 - block probe, 1 - indirect call, 2 - direct call
|
||||
// ATTRIBUTE (uint3)
|
||||
// 1 - reserved, 2 - dangling
|
||||
// 1 - reserved
|
||||
// ADDRESS_TYPE (uint1)
|
||||
// 0 - code address, 1 - address delta
|
||||
// CODE_ADDRESS (uint64 or ULEB128)
|
||||
|
@ -111,7 +111,7 @@ bool MergeBlockSuccessorsIntoGivenBlocks(
|
||||
/// Try to remove redundant dbg.value instructions from given basic block.
|
||||
/// Returns true if at least one instruction was removed. Remove redundant
|
||||
/// pseudo ops when RemovePseudoOp is true.
|
||||
bool RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp = false);
|
||||
bool RemoveRedundantDbgInstrs(BasicBlock *BB);
|
||||
|
||||
/// Replace all uses of an instruction (specified by BI) with a value, then
|
||||
/// remove and delete the original instruction.
|
||||
|
@ -1307,16 +1307,6 @@ static void salvageDebugInfoFromEmptyBlock(const TargetInstrInfo *TII,
|
||||
for (MachineBasicBlock *PredBB : MBB.predecessors())
|
||||
if (PredBB->succ_size() == 1)
|
||||
copyDebugInfoToPredecessor(TII, MBB, *PredBB);
|
||||
|
||||
// For AutoFDO, if the block is removed, we won't be able to sample it. To
|
||||
// avoid assigning a zero weight for BB, move all its pseudo probes into once
|
||||
// of its predecessors or successors and mark them dangling. This should allow
|
||||
// the counts inference a chance to get a more reasonable weight for the
|
||||
// block.
|
||||
if (!MBB.pred_empty())
|
||||
MBB.moveAndDanglePseudoProbes(*MBB.pred_begin());
|
||||
else if (!MBB.succ_empty())
|
||||
MBB.moveAndDanglePseudoProbes(*MBB.succ_begin());
|
||||
}
|
||||
|
||||
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
|
||||
|
@ -901,32 +901,6 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
|
||||
normalizeSuccProbs();
|
||||
}
|
||||
|
||||
/// A block emptied (i.e., with all instructions moved out of it) won't be
|
||||
/// sampled at run time. In such cases, AutoFDO will be informed of zero samples
|
||||
/// collected for the block. This is not accurate and could lead to misleading
|
||||
/// weights assigned for the block. A way to mitigate that is to treat such
|
||||
/// block as having unknown counts in the AutoFDO profile loader and allow the
|
||||
/// counts inference tool a chance to calculate a relatively reasonable weight
|
||||
/// for it. This can be done by moving all pseudo probes in the emptied block
|
||||
/// i.e, /c this, to before /c ToMBB and tag them dangling. Note that this is
|
||||
/// not needed for dead blocks which really have a zero weight. It's per
|
||||
/// transforms to decide whether to call this function or not.
|
||||
void MachineBasicBlock::moveAndDanglePseudoProbes(MachineBasicBlock *ToMBB) {
|
||||
SmallVector<MachineInstr *, 4> ToBeMoved;
|
||||
for (MachineInstr &MI : instrs()) {
|
||||
if (MI.isPseudoProbe()) {
|
||||
MI.addPseudoProbeAttribute(PseudoProbeAttributes::Dangling);
|
||||
ToBeMoved.push_back(&MI);
|
||||
}
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator I = ToMBB->getFirstTerminator();
|
||||
for (MachineInstr *MI : ToBeMoved) {
|
||||
MI->removeFromParent();
|
||||
ToMBB->insert(I, MI);
|
||||
}
|
||||
}
|
||||
|
||||
bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
|
||||
return is_contained(predecessors(), MBB);
|
||||
}
|
||||
|
@ -102,51 +102,22 @@ public:
|
||||
// Probes not surrounded by any real instructions in the same block are
|
||||
// called dangling probes. Since there's no good way to pick up a sample
|
||||
// collection point for dangling probes at compile time, they are being
|
||||
// tagged so that the profile correlation tool will not report any
|
||||
// removed so that the profile correlation tool will not report any
|
||||
// samples collected for them and it's up to the counts inference tool
|
||||
// to get them a reasonable count.
|
||||
SmallVector<MachineInstr *, 4> ToBeRemoved;
|
||||
for (MachineInstr &MI : MBB) {
|
||||
if (MI.isPseudoProbe())
|
||||
MI.addPseudoProbeAttribute(PseudoProbeAttributes::Dangling);
|
||||
ToBeRemoved.push_back(&MI);
|
||||
}
|
||||
|
||||
for (auto *MI : ToBeRemoved)
|
||||
MI->eraseFromParent();
|
||||
|
||||
Changed |= !ToBeRemoved.empty();
|
||||
}
|
||||
}
|
||||
|
||||
// Remove redundant dangling probes. Same dangling probes are redundant
|
||||
// since they all have the same semantic that is to rely on the counts
|
||||
// inference too to get reasonable count for the same original block.
|
||||
// Therefore, there's no need to keep multiple copies of them.
|
||||
auto Hash = [](const MachineInstr *MI) {
|
||||
return std::hash<uint64_t>()(MI->getOperand(0).getImm()) ^
|
||||
std::hash<uint64_t>()(MI->getOperand(1).getImm());
|
||||
};
|
||||
|
||||
auto IsEqual = [](const MachineInstr *Left, const MachineInstr *Right) {
|
||||
return Left->getOperand(0).getImm() == Right->getOperand(0).getImm() &&
|
||||
Left->getOperand(1).getImm() == Right->getOperand(1).getImm() &&
|
||||
Left->getOperand(3).getImm() == Right->getOperand(3).getImm() &&
|
||||
Left->getDebugLoc() == Right->getDebugLoc();
|
||||
};
|
||||
|
||||
SmallVector<MachineInstr *, 4> ToBeRemoved;
|
||||
std::unordered_set<MachineInstr *, decltype(Hash), decltype(IsEqual)>
|
||||
DanglingProbes(0, Hash, IsEqual);
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
for (MachineInstr &MI : MBB) {
|
||||
if (MI.isPseudoProbe()) {
|
||||
if ((uint32_t)MI.getPseudoProbeAttribute() &
|
||||
(uint32_t)PseudoProbeAttributes::Dangling)
|
||||
if (!DanglingProbes.insert(&MI).second)
|
||||
ToBeRemoved.push_back(&MI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *MI : ToBeRemoved)
|
||||
MI->eraseFromParent();
|
||||
|
||||
Changed |= !ToBeRemoved.empty();
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
@ -779,12 +779,6 @@ bool TailDuplicator::duplicateSimpleBB(
|
||||
assert(PredBB->succ_size() <= 1);
|
||||
}
|
||||
|
||||
// For AutoFDO, since BB is going to be removed, we won't be able to sample
|
||||
// it. To avoid assigning a zero weight for BB, move all its pseudo probes
|
||||
// into Succ and mark them dangling. This should allow the counts inference
|
||||
// a chance to get a more reasonable weight for BB.
|
||||
TailBB->moveAndDanglePseudoProbes(PredBB);
|
||||
|
||||
if (PredTBB)
|
||||
TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL);
|
||||
|
||||
|
@ -106,65 +106,4 @@ void addPseudoProbeAttribute(PseudoProbeInst &Inst,
|
||||
if (OldAttr != NewAttr)
|
||||
Inst.replaceUsesOfWith(Inst.getAttributes(), Builder.getInt32(NewAttr));
|
||||
}
|
||||
|
||||
/// A block emptied (i.e., with all instructions moved out of it) won't be
|
||||
/// sampled at run time. In such cases, AutoFDO will be informed of zero samples
|
||||
/// collected for the block. This is not accurate and could lead to misleading
|
||||
/// weights assigned for the block. A way to mitigate that is to treat such
|
||||
/// block as having unknown counts in the AutoFDO profile loader and allow the
|
||||
/// counts inference tool a chance to calculate a relatively reasonable weight
|
||||
/// for it. This can be done by moving all pseudo probes in the emptied block
|
||||
/// i.e, /c From, to before /c To and tag them dangling. Note that this is
|
||||
/// not needed for dead blocks which really have a zero weight. It's per
|
||||
/// transforms to decide whether to call this function or not.
|
||||
bool moveAndDanglePseudoProbes(BasicBlock *From, Instruction *To) {
|
||||
SmallVector<PseudoProbeInst *, 4> ToBeMoved;
|
||||
for (auto &I : *From) {
|
||||
if (auto *II = dyn_cast<PseudoProbeInst>(&I)) {
|
||||
addPseudoProbeAttribute(*II, PseudoProbeAttributes::Dangling);
|
||||
ToBeMoved.push_back(II);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *I : ToBeMoved)
|
||||
I->moveBefore(To);
|
||||
|
||||
return !ToBeMoved.empty();
|
||||
}
|
||||
|
||||
/// Same dangling probes in one blocks are redundant since they all have the
|
||||
/// same semantic that is to rely on the counts inference too to get reasonable
|
||||
/// count for the same original block. Therefore, there's no need to keep
|
||||
/// multiple copies of them.
|
||||
bool removeRedundantPseudoProbes(BasicBlock *Block) {
|
||||
|
||||
auto Hash = [](const PseudoProbeInst *I) {
|
||||
return std::hash<uint64_t>()(I->getFuncGuid()->getZExtValue()) ^
|
||||
std::hash<uint64_t>()(I->getIndex()->getZExtValue());
|
||||
};
|
||||
|
||||
auto IsEqual = [](const PseudoProbeInst *Left, const PseudoProbeInst *Right) {
|
||||
return Left->getFuncGuid() == Right->getFuncGuid() &&
|
||||
Left->getIndex() == Right->getIndex() &&
|
||||
Left->getAttributes() == Right->getAttributes() &&
|
||||
Left->getDebugLoc() == Right->getDebugLoc();
|
||||
};
|
||||
|
||||
SmallVector<PseudoProbeInst *, 4> ToBeRemoved;
|
||||
std::unordered_set<PseudoProbeInst *, decltype(Hash), decltype(IsEqual)>
|
||||
DanglingProbes(0, Hash, IsEqual);
|
||||
|
||||
for (auto &I : *Block) {
|
||||
if (auto *II = dyn_cast<PseudoProbeInst>(&I)) {
|
||||
if (II->getAttributes()->getZExtValue() &
|
||||
(uint32_t)PseudoProbeAttributes::Dangling)
|
||||
if (!DanglingProbes.insert(II).second)
|
||||
ToBeRemoved.push_back(II);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *I : ToBeRemoved)
|
||||
I->eraseFromParent();
|
||||
return !ToBeRemoved.empty();
|
||||
}
|
||||
} // namespace llvm
|
||||
|
@ -555,12 +555,6 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
|
||||
if (!Probe)
|
||||
return std::error_code();
|
||||
|
||||
// This is not the dangling probe from the training pass but generated by the
|
||||
// current compilation. Ignore this since they are logically deleted and
|
||||
// should not consume any profile samples.
|
||||
if (Probe->isDangling())
|
||||
return std::error_code();
|
||||
|
||||
const FunctionSamples *FS = findFunctionSamples(Inst);
|
||||
// If none of the instruction has FunctionSample, we choose to return zero
|
||||
// value sample to indicate the BB is cold. This could happen when the
|
||||
|
@ -425,15 +425,8 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
|
||||
for (auto &Block : F) {
|
||||
for (auto &I : Block) {
|
||||
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
|
||||
// Do not count dangling probes since they are logically deleted and the
|
||||
// current block that a dangling probe resides in doesn't reflect the
|
||||
// execution count of the probe. The original samples of the probe will
|
||||
// be distributed among the rest probes if there are any, this is
|
||||
// less-than-deal but at least we don't lose any samples.
|
||||
if (!Probe->isDangling()) {
|
||||
uint64_t Hash = computeCallStackHash(I);
|
||||
ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
|
||||
}
|
||||
uint64_t Hash = computeCallStackHash(I);
|
||||
ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -442,14 +435,10 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
|
||||
for (auto &Block : F) {
|
||||
for (auto &I : Block) {
|
||||
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
|
||||
// Ignore danling probes since they are logically deleted and should do
|
||||
// not consume any profile samples in the subsequent profile annotation.
|
||||
if (!Probe->isDangling()) {
|
||||
uint64_t Hash = computeCallStackHash(I);
|
||||
float Sum = ProbeFactors[{Probe->Id, Hash}];
|
||||
if (Sum != 0)
|
||||
setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
|
||||
}
|
||||
uint64_t Hash = computeCallStackHash(I);
|
||||
float Sum = ProbeFactors[{Probe->Id, Hash}];
|
||||
if (Sum != 0)
|
||||
setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -433,9 +433,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
|
||||
|
||||
// Jump threading may have introduced redundant debug values into BB
|
||||
// which should be removed.
|
||||
// Remove redundant pseudo probes as well.
|
||||
if (Changed)
|
||||
RemoveRedundantDbgInstrs(&BB, true);
|
||||
RemoveRedundantDbgInstrs(&BB);
|
||||
|
||||
// Stop processing BB if it's the entry or is now deleted. The following
|
||||
// routines attempt to eliminate BB and locating a suitable replacement
|
||||
@ -468,7 +467,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
|
||||
// detect and transform nested loops later.
|
||||
!LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
|
||||
TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
|
||||
RemoveRedundantDbgInstrs(Succ, true);
|
||||
RemoveRedundantDbgInstrs(Succ);
|
||||
// BB is valid for cleanup here because we passed in DTU. F remains
|
||||
// BB's parent until a DTU->getDomTree() event.
|
||||
LVI->eraseBlock(&BB);
|
||||
|
@ -429,7 +429,7 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
|
||||
return !ToBeRemoved.empty();
|
||||
}
|
||||
|
||||
bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp) {
|
||||
bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
|
||||
bool MadeChanges = false;
|
||||
// By using the "backward scan" strategy before the "forward scan" strategy we
|
||||
// can remove both dbg.value (2) and (3) in a situation like this:
|
||||
@ -444,8 +444,6 @@ bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp) {
|
||||
// already is described as having the value V1 at (1).
|
||||
MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB);
|
||||
MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB);
|
||||
if (RemovePseudoOp)
|
||||
MadeChanges |= removeRedundantPseudoProbes(BB);
|
||||
|
||||
if (MadeChanges)
|
||||
LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: "
|
||||
|
@ -1133,12 +1133,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
|
||||
for (BasicBlock *Pred : predecessors(BB))
|
||||
Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);
|
||||
|
||||
// For AutoFDO, since BB is going to be removed, we won't be able to sample
|
||||
// it. To avoid assigning a zero weight for BB, move all its pseudo probes
|
||||
// into Succ and mark them dangling. This should allow the counts inference a
|
||||
// chance to get a more reasonable weight for BB.
|
||||
moveAndDanglePseudoProbes(BB, &*Succ->getFirstInsertionPt());
|
||||
|
||||
// Everything that jumped to BB now goes to Succ.
|
||||
BB->replaceAllUsesWith(Succ);
|
||||
if (!Succ->hasName()) Succ->takeName(BB);
|
||||
@ -2767,19 +2761,13 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
|
||||
// encode predicated DIExpressions that yield different results on different
|
||||
// code paths.
|
||||
|
||||
// A hoisted conditional probe should be treated as dangling so that it will
|
||||
// not be over-counted when the samples collected on the non-conditional path
|
||||
// are counted towards the conditional path. We leave it for the counts
|
||||
// inference algorithm to figure out a proper count for a danglng probe.
|
||||
moveAndDanglePseudoProbes(BB, InsertPt);
|
||||
|
||||
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
|
||||
Instruction *I = &*II;
|
||||
I->dropUnknownNonDebugMetadata();
|
||||
if (I->isUsedByMetadata())
|
||||
dropDebugUsers(*I);
|
||||
if (isa<DbgInfoIntrinsic>(I)) {
|
||||
// Remove DbgInfo Intrinsics.
|
||||
if (I->isDebugOrPseudoInst()) {
|
||||
// Remove DbgInfo and pseudo probe Intrinsics.
|
||||
II = I->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
@ -1325,10 +1325,6 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
|
||||
DTU->applyUpdates(Updates);
|
||||
}
|
||||
|
||||
// Here the BB is not a dead block but folded into its predecessors, so move
|
||||
// the probe and mark it as dangling.
|
||||
moveAndDanglePseudoProbes(BB, NewSI);
|
||||
|
||||
++NumFoldValueComparisonIntoPredecessors;
|
||||
return true;
|
||||
}
|
||||
@ -2394,6 +2390,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
|
||||
// probability for ThenBB, which is fine since the optimization here takes
|
||||
// place regardless of the branch probability.
|
||||
if (isa<PseudoProbeInst>(I)) {
|
||||
// The probe should be deleted so that it will not be over-counted when
|
||||
// the samples collected on the non-conditional path are counted towards
|
||||
// the conditional path. We leave it for the counts inference algorithm to
|
||||
// figure out a proper count for an unknown probe.
|
||||
SpeculatedDbgIntrinsics.push_back(I);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2469,19 +2470,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
|
||||
SpeculatedStore->getDebugLoc());
|
||||
}
|
||||
|
||||
// A hoisted conditional probe should be treated as dangling so that it will
|
||||
// not be over-counted when the samples collected on the non-conditional path
|
||||
// are counted towards the conditional path. We leave it for the counts
|
||||
// inference algorithm to figure out a proper count for a danglng probe.
|
||||
moveAndDanglePseudoProbes(ThenBB, BI);
|
||||
|
||||
// Metadata can be dependent on the condition we are hoisting above.
|
||||
// Conservatively strip all metadata on the instruction. Drop the debug loc
|
||||
// to avoid making it appear as if the condition is a constant, which would
|
||||
// be misleading while debugging.
|
||||
for (auto &I : *ThenBB) {
|
||||
assert(!isa<PseudoProbeInst>(I) &&
|
||||
"Should not drop debug info from any pseudo probes.");
|
||||
if (!SpeculatedStoreValue || &I != SpeculatedStore)
|
||||
I.setDebugLoc(DebugLoc());
|
||||
I.dropUnknownNonDebugMetadata();
|
||||
|
@ -19,62 +19,35 @@ T:
|
||||
F:
|
||||
br label %Merge
|
||||
Merge:
|
||||
;; Check branch T and F are gone, and their probes (probe 2 and 3) are dangling.
|
||||
;; Check branch T and F are gone, and their probes (probe 2 and 3) are gone too.
|
||||
; JT-LABEL-NO: T
|
||||
; JT-LABEL-NO: F
|
||||
; JT-LABEL: Merge
|
||||
; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 2, i64 -1)
|
||||
; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 2, i64 -1)
|
||||
; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3
|
||||
; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2
|
||||
; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
|
||||
; ASM: .pseudoprobe 6699318081062747564 3 0 2
|
||||
; ASM: .pseudoprobe 6699318081062747564 2 0 2
|
||||
; ASM-NOT: .pseudoprobe 6699318081062747564 3
|
||||
; ASM-NOT: .pseudoprobe 6699318081062747564 2
|
||||
; ASM: .pseudoprobe 6699318081062747564 4 0 0
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
;; Check block T and F are gone, and their probes (probe 2 and 3) are dangling.
|
||||
;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too.
|
||||
; MIR-tail: bb.0
|
||||
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
|
||||
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 2, 0, 2
|
||||
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 3, 0, 2
|
||||
; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2
|
||||
; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3
|
||||
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0
|
||||
|
||||
|
||||
define void @foo2() {
|
||||
bb:
|
||||
%tmp = call i32 @f1()
|
||||
%tmp1 = icmp eq i32 %tmp, 1
|
||||
br i1 %tmp1, label %bb5, label %bb8
|
||||
|
||||
bb2:
|
||||
%tmp4 = icmp ne i32 %tmp, 1
|
||||
switch i1 %tmp4, label %bb2 [
|
||||
i1 0, label %bb5
|
||||
i1 1, label %bb8
|
||||
]
|
||||
|
||||
bb5:
|
||||
;; Check the pseudo probe with id 3 only has one copy.
|
||||
; JT-COUNT-1: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
|
||||
; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
|
||||
%tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
|
||||
br i1 %tmp6, label %bb8, label %bb7
|
||||
|
||||
bb7:
|
||||
br label %bb8
|
||||
|
||||
bb8:
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @test(i32 %a, i32 %b, i32 %c) {
|
||||
;; Check block bb1 and bb2 are gone, and their probes (probe 2 and 3) are dangling.
|
||||
;; Check block bb1 and bb2 are gone, and their probes (probe 2 and 3) are gone too.
|
||||
; SC-LABEL: @test(
|
||||
; SC-LABEL-NO: bb1
|
||||
; SC-LABEL-NO: bb2
|
||||
; SC: [[T1:%.*]] = icmp eq i32 [[B:%.*]], 0
|
||||
; SC-DAG: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 2, i32 2, i64 -1)
|
||||
; SC-DAG: call void @llvm.pseudoprobe(i64 [[#GUID3]], i64 3, i32 2, i64 -1)
|
||||
; SC-NOT: call void @llvm.pseudoprobe(i64 [[#]], i64 2
|
||||
; SC-NOT: call void @llvm.pseudoprobe(i64 [[#]], i64 3
|
||||
; SC: [[T2:%.*]] = icmp sgt i32 [[C:%.*]], 1
|
||||
; SC: [[T3:%.*]] = add i32 [[A:%.*]], 1
|
||||
; SC: [[SPEC_SELECT:%.*]] = select i1 [[T2]], i32 [[T3]], i32 [[A]]
|
||||
|
@ -2,9 +2,9 @@
|
||||
; RUN: opt < %s -simplifycfg -S -o %t
|
||||
; RUN: FileCheck %s < %t
|
||||
|
||||
; Test to make sure the dangling probe's metadata not being dropped.
|
||||
; Test to make sure the dangling probe is gone.
|
||||
; CHECK: define dso_local i32 @foo
|
||||
; CHECK: call void @llvm.pseudoprobe(i64 -4224472938262609671, i64 5, i32 2, i64 -1), !dbg ![[#]]
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -4224472938262609671, i64 5
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define dso_local i32 @foo(i32* nocapture %marker, i32* nocapture %move_ordering, i32* nocapture %moves, i32 %num_moves) local_unnamed_addr #0 !dbg !10 {
|
||||
|
@ -1,10 +1,10 @@
|
||||
; Test after FoldValueComparisonIntoPredecessors, one dangling probe is generated
|
||||
; Test after FoldValueComparisonIntoPredecessors, one dangling probe is gone
|
||||
; RUN: opt -S -passes='pseudo-probe,simplifycfg' < %s | FileCheck %s
|
||||
|
||||
|
||||
; CHECK: if.end80: ; preds = %if.end
|
||||
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 -2281696412744416103, i64 3, i32 0, i64 -1)
|
||||
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 -2281696412744416103, i64 4, i32 2, i64 -1)
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -2281696412744416103, i64 4
|
||||
|
||||
define dso_local i32 @readCBPandCoeffsFromNAL(i1 %c, i32 %x, i32 %y) local_unnamed_addr {
|
||||
;
|
||||
|
@ -1,27 +0,0 @@
|
||||
|
||||
# REQUIRES: x86-registered-target
|
||||
# Ensure llc can read and parse MIR pseudo probe operations.
|
||||
# RUN: llc -mtriple x86_64-- -run-pass=pseudo-probe-inserter %s -o - | FileCheck %s
|
||||
|
||||
# CHECK: PSEUDO_PROBE 6699318081062747564, 1, 0, 0
|
||||
# check probe 2 is moved before the test instruction.
|
||||
# CHECK: PSEUDO_PROBE 6699318081062747564, 2, 0, 0
|
||||
# CHECK: TEST32rr
|
||||
# check probe 3 is dangling.
|
||||
# CHECK: PSEUDO_PROBE 6699318081062747564, 3, 0, 2
|
||||
|
||||
name: foo
|
||||
body: |
|
||||
bb.0:
|
||||
TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
|
||||
PSEUDO_PROBE 6699318081062747564, 1, 0, 0
|
||||
JCC_1 %bb.1, 4, implicit $eflags
|
||||
|
||||
bb.2:
|
||||
TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
|
||||
PSEUDO_PROBE 6699318081062747564, 2, 0, 0
|
||||
|
||||
bb.1:
|
||||
PSEUDO_PROBE 6699318081062747564, 3, 0, 0
|
||||
|
||||
...
|
@ -1,41 +0,0 @@
|
||||
; REQUIRES: x86_64-linux
|
||||
; RUN: llc -pseudo-probe-for-profiling %s -filetype=asm -o - | FileCheck %s
|
||||
|
||||
declare i32 @f1()
|
||||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
|
||||
|
||||
define void @foo2() {
|
||||
bb:
|
||||
; CHECK: .pseudoprobe 2494702099028631698 1 0 0
|
||||
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1)
|
||||
%tmp = call i32 @f1()
|
||||
%tmp1 = icmp eq i32 %tmp, 1
|
||||
br i1 %tmp1, label %bb5, label %bb8
|
||||
|
||||
bb2:
|
||||
;; Check the pseudo probe with id 2 only has one copy.
|
||||
; CHECK-COUNT-1: .pseudoprobe 2494702099028631698 2 0 2
|
||||
; CHECK-NOT: .pseudoprobe 2494702099028631698 2 0 2
|
||||
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
|
||||
%tmp4 = icmp ne i32 %tmp, 1
|
||||
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
|
||||
switch i1 %tmp4, label %bb2 [
|
||||
i1 0, label %bb5
|
||||
i1 1, label %bb8
|
||||
]
|
||||
|
||||
bb5:
|
||||
%tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
|
||||
call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
|
||||
br i1 %tmp6, label %bb8, label %bb7
|
||||
|
||||
bb7:
|
||||
br label %bb8
|
||||
|
||||
bb8:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.pseudo_probe_desc = !{!0}
|
||||
|
||||
!0 = !{i64 2494702099028631698, i64 281612674956943, !"foo2", null}
|
@ -1,5 +0,0 @@
|
||||
PERF_RECORD_MMAP2 595196/595196: [0x201000(0x1000) @ 0 00:1d 224227621 1042948]: r-xp /home/inline-cs-pseudoprobe.perfbin
|
||||
|
||||
20180e
|
||||
5541f689495641d7
|
||||
0x201858/0x20180e/P/-/-/0 0x20182b/0x20184d/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -46,8 +46,6 @@ if.then: ; preds = %while.body
|
||||
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1) #6, !dbg !44
|
||||
%rem.i = urem i32 %inc8, 3, !dbg !45
|
||||
%tobool.not.i = icmp eq i32 %rem.i, 0, !dbg !45
|
||||
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 2, i32 2, i64 -1) #6, !dbg !46
|
||||
call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 3, i32 2, i64 -1) #6, !dbg !47
|
||||
%0 = sub i32 0, %s.07, !dbg !48
|
||||
%retval.0.p.i = select i1 %tobool.not.i, i32 %s.07, i32 %0, !dbg !48
|
||||
%retval.0.i = add i32 %retval.0.p.i, %inc8, !dbg !48
|
||||
|
Binary file not shown.
@ -1,5 +1,5 @@
|
||||
PERF_RECORD_MMAP2 1664112/1664112: [0x400000(0x1000) @ 0 08:11 806256818 82060973]: r-xp truncated-pseudoprobe.perfbin
|
||||
40057d
|
||||
4005b9
|
||||
40060d
|
||||
400659
|
||||
7f67469af555
|
||||
0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0 0x40059f/0x400553/P/-/-/0
|
||||
0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0 0x40062f/0x4005e3/P/-/-/0
|
||||
|
Binary file not shown.
@ -18,9 +18,9 @@
|
||||
; CHECK-PROBE-FNAME: 3: 15
|
||||
; CHECK-PROBE-FNAME: 4: 15
|
||||
; CHECK-PROBE-FNAME: 6: 15
|
||||
; CHECK-PROBE-FNAME: 8: 15 _ZL3barii.__uniq.26267048767521081047744692097241227776:15
|
||||
; CHECK-PROBE-FNAME: 8: 15 _ZL3barii.__uniq.276699478366846449772231447066107882794:15
|
||||
; CHECK-PROBE-FNAME: !CFGChecksum: 563088904013236
|
||||
; CHECK-PROBE-FNAME:[main:2 @ foo:8 @ _ZL3barii.__uniq.26267048767521081047744692097241227776]:30:15
|
||||
; CHECK-PROBE-FNAME:[main:2 @ foo:8 @ _ZL3barii.__uniq.276699478366846449772231447066107882794]:30:15
|
||||
; CHECK-PROBE-FNAME: 1: 15
|
||||
; CHECK-PROBE-FNAME: 4: 15
|
||||
; CHECK-PROBE-FNAME: !CFGChecksum: 72617220756
|
||||
|
@ -1,54 +0,0 @@
|
||||
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-dangling-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
|
||||
; RUN: FileCheck %s --input-file %t
|
||||
|
||||
; CHECK: [main:2 @ foo]:58:0
|
||||
; CHECK-NEXT: 1: 0
|
||||
; CHECK-NEXT: 2: 15
|
||||
; CHECK-NEXT: 3: 14
|
||||
; CHECK-NEXT: 4: 0
|
||||
; CHECK-NEXT: 5: 14
|
||||
; CHECK-NEXT: 6: 15
|
||||
; CHECK-NEXT: 7: 0
|
||||
; CHECK-NEXT: 9: 0
|
||||
; CHECK-NEXT: !CFGChecksum: 138950591924
|
||||
; CHECK:[main:2 @ foo:8 @ bar]:1:0
|
||||
; CHECK-NEXT: 1: 0
|
||||
; CHECK-NEXT: 4: 1
|
||||
; CHECK-NEXT: !CFGChecksum: 72617220756
|
||||
|
||||
|
||||
; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Range Counter:
|
||||
; CHECK-UNWINDER-EMPTY:
|
||||
; CHECK-UNWINDER-NEXT: (800, 82b): 14
|
||||
; CHECK-UNWINDER-NEXT: (84d, 858): 1
|
||||
|
||||
; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Branch Counter:
|
||||
; CHECK-UNWINDER-EMPTY:
|
||||
; CHECK-UNWINDER-NEXT: (82b, 800): 14
|
||||
; CHECK-UNWINDER-NEXT: (82b, 84d): 1
|
||||
; CHECK-UNWINDER-NEXT: (858, 80e): 1
|
||||
|
||||
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
|
||||
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
|
||||
; -g test.c -o a.out
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int bar(int x, int y) {
|
||||
if (x % 3) {
|
||||
return x - y;
|
||||
}
|
||||
return x + y;
|
||||
}
|
||||
|
||||
void foo() {
|
||||
int s, i = 0;
|
||||
while (i++ < 4000 * 4000)
|
||||
if (i % 91) s = bar(i, s); else s += 30;
|
||||
printf("sum is %d\n", s);
|
||||
}
|
||||
|
||||
int main() {
|
||||
foo();
|
||||
return 0;
|
||||
}
|
@ -11,7 +11,7 @@
|
||||
; CHECK-NEXT: 7: 0
|
||||
; CHECK-NEXT: 8: 14 bar:14
|
||||
; CHECK-NEXT: 9: 0
|
||||
; CHECK-NEXT: !CFGChecksum: 138950591924
|
||||
; CHECK-NEXT: !CFGChecksum: 563088904013236
|
||||
; CHECK:[main:2 @ foo:8 @ bar]:28:14
|
||||
; CHECK-NEXT: 1: 14
|
||||
; CHECK-NEXT: 4: 14
|
||||
|
@ -22,7 +22,7 @@
|
||||
; CHECK-NEXT: 6: 0
|
||||
; CHECK-NEXT: 7: 2 fb:2
|
||||
; CHECK-NEXT: 8: 1 fa:1
|
||||
; CHECK-NEXT: !CFGChecksum: 120515930909
|
||||
; CHECK-NEXT: !CFGChecksum: 563070469352221
|
||||
; CHECK-NEXT: !Attributes: 0
|
||||
; CHECK-NEXT:[main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
|
||||
; CHECK-NEXT: 1: 4
|
||||
@ -31,7 +31,7 @@
|
||||
; CHECK-NEXT: 4: 0
|
||||
; CHECK-NEXT: 5: 4 fb:4
|
||||
; CHECK-NEXT: 6: 1 fa:1
|
||||
; CHECK-NEXT: !CFGChecksum: 72617220756
|
||||
; CHECK-NEXT: !CFGChecksum: 563022570642068
|
||||
|
||||
; CHECK-KEEP-COLD: [fb]:19:6
|
||||
; CHECK-KEEP-COLD-NEXT: 1: 6
|
||||
@ -40,7 +40,7 @@
|
||||
; CHECK-KEEP-COLD-NEXT: 4: 0
|
||||
; CHECK-KEEP-COLD-NEXT: 5: 4 fb:4
|
||||
; CHECK-KEEP-COLD-NEXT: 6: 3 fa:3
|
||||
; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 72617220756
|
||||
; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 563022570642068
|
||||
; CHECK-KEEP-COLD-NEXT: !Attributes: 0
|
||||
; CHECK-KEEP-COLD-NEXT:[fa]:14:4
|
||||
; CHECK-KEEP-COLD-NEXT: 1: 4
|
||||
@ -50,7 +50,7 @@
|
||||
; CHECK-KEEP-COLD-NEXT: 6: 0
|
||||
; CHECK-KEEP-COLD-NEXT: 7: 2 fb:2
|
||||
; CHECK-KEEP-COLD-NEXT: 8: 1 fa:1
|
||||
; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 120515930909
|
||||
; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 563070469352221
|
||||
|
||||
; CHECK-UNMERGED: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
|
||||
; CHECK-UNMERGED-NEXT: 1: 4
|
||||
@ -59,7 +59,7 @@
|
||||
; CHECK-UNMERGED-NEXT: 4: 0
|
||||
; CHECK-UNMERGED-NEXT: 5: 4 fb:4
|
||||
; CHECK-UNMERGED-NEXT: 6: 1 fa:1
|
||||
; CHECK-UNMERGED-NEXT: !CFGChecksum: 72617220756
|
||||
; CHECK-UNMERGED-NEXT: !CFGChecksum: 563022570642068
|
||||
; CHECK-UNMERGED-NOT: [fa]
|
||||
; CHECK-UNMERGED-NOT: [fb]
|
||||
|
||||
@ -70,7 +70,7 @@
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 4 fb:4
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 1 fa:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563022570642068
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fb:6 @ fa]:10:3
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 3
|
||||
@ -80,7 +80,7 @@
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 1 fa:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563070469352221
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:7 @ fb]:6:2
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 2
|
||||
@ -89,7 +89,7 @@
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 2 fa:2
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563022570642068
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:8 @ fa]:4:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 1
|
||||
@ -99,7 +99,7 @@
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 0
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 563070469352221
|
||||
; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
|
||||
|
||||
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
|
||||
|
@ -11,7 +11,7 @@
|
||||
; CHECK-NEXT: 7: 0
|
||||
; CHECK-NEXT: 8: 15 bar:15
|
||||
; CHECK-NEXT: 9: 0
|
||||
; CHECK-NEXT: !CFGChecksum: 138950591924
|
||||
; CHECK-NEXT: !CFGChecksum: 563088904013236
|
||||
; CHECK:[main:2 @ foo:8 @ bar]:30:15
|
||||
; CHECK-NEXT: 1: 15
|
||||
; CHECK-NEXT: 4: 15
|
||||
|
@ -4,9 +4,9 @@ PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]
|
||||
|
||||
; CHECK: Pseudo Probe Desc:
|
||||
; CHECK: GUID: 6699318081062747564 Name: foo
|
||||
; CHECK: Hash: 138950591924
|
||||
; CHECK: Hash: 563088904013236
|
||||
; CHECK: GUID: 15822663052811949562 Name: main
|
||||
; CHECK: Hash: 4294967295
|
||||
; CHECK: Hash: 281479271677951
|
||||
; CHECK: GUID: 16434608426314478903 Name: bar
|
||||
; CHECK: Hash: 72617220756
|
||||
|
||||
@ -15,16 +15,9 @@ PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]
|
||||
; CHECK: <bar>:
|
||||
|
||||
; CHECK: [Probe]: FUNC: bar Index: 1 Type: Block
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 4 Type: Block
|
||||
; CHECK-NEXT: 754: imull $2863311531, %edi, %eax
|
||||
|
||||
; CHECK: [Probe]: FUNC: bar Index: 2 Type: Block Dangling
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 3 Type: Block Dangling
|
||||
; CHECK-NEXT: 768: cmovbl %esi, %ecx
|
||||
|
||||
; CHECK: [Probe]: FUNC: bar Index: 4 Type: Block
|
||||
; CHECK-NEXT: 76e: popq %rbp
|
||||
|
||||
|
||||
; CHECK: <foo>:
|
||||
; CHECK: [Probe]: FUNC: foo Index: 1 Type: Block
|
||||
; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block
|
||||
@ -37,20 +30,17 @@ PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]
|
||||
; CHECK-NEXT: 783: addl $1, %ecx
|
||||
|
||||
; CHECK: [Probe]: FUNC: foo Index: 3 Type: Block
|
||||
; CHECK-NEXT: 7a9: cmpl %eax, %ecx
|
||||
; CHECK-NEXT: 78e: movl %ecx, %edx
|
||||
|
||||
; CHECK: [Probe]: FUNC: foo Index: 4 Type: Block
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 1 Type: Block Inlined: @ foo:8
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ foo:8
|
||||
; CHECK-NEXT: 7bf: addl %ecx, %edx
|
||||
|
||||
; CHECK: [Probe]: FUNC: bar Index: 2 Type: Block Dangling Inlined: @ foo:8
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 3 Type: Block Dangling Inlined: @ foo:8
|
||||
; CHECK-NEXT: 7c8: cmovel %esi, %eax
|
||||
|
||||
; CHECK: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ foo:8
|
||||
; CHECK-NEXT: 7cd: movl %eax, %esi
|
||||
; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block
|
||||
; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block
|
||||
; CHECK-NEXT: 7cf: addl $1, %ecx
|
||||
|
||||
; CHECK: [Probe]: FUNC: foo Index: 7 Type: Block
|
||||
; CHECK-NEXT: 7de: movl $2098432, %edi
|
||||
@ -72,18 +62,13 @@ PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]
|
||||
; CHECK-NEXT: 803: addl $1, %ecx
|
||||
|
||||
; CHECK: [Probe]: FUNC: foo Index: 3 Type: Block Inlined: @ main:2
|
||||
; CHECK-NEXT: 829: cmpl %eax, %ecx
|
||||
; CHECK-NEXT: 80e: movl %ecx, %edx
|
||||
|
||||
; CHECK: [Probe]: FUNC: foo Index: 4 Type: Block Inlined: @ main:2
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 1 Type: Block Inlined: @ main:2 @ foo:8
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ main:2 @ foo:8
|
||||
; CHECK-NEXT: 83f: addl %ecx, %edx
|
||||
|
||||
; CHECK: [Probe]: FUNC: bar Index: 2 Type: Block Dangling Inlined: @ main:2 @ foo:8
|
||||
; CHECK-NEXT: [Probe]: FUNC: bar Index: 3 Type: Block Dangling Inlined: @ main:2 @ foo:8
|
||||
; CHECK-NEXT: 848: cmovel %esi, %eax
|
||||
|
||||
; CHECK: [Probe]: FUNC: bar Index: 4 Type: Block Inlined: @ main:2 @ foo:8
|
||||
; CHECK-NEXT: 84d: movl %eax, %esi
|
||||
; CHECK: [Probe]: FUNC: foo Index: 6 Type: Block Inlined: @ main:2
|
||||
; CHECK-NEXT: [Probe]: FUNC: foo Index: 2 Type: Block Inlined: @ main:2
|
||||
; CHECK-NEXT: 84f: addl $1, %ecx
|
||||
|
@ -9,56 +9,56 @@
|
||||
; CHECK-UNCOMPRESS: 3: 1
|
||||
; CHECK-UNCOMPRESS: 4: 1
|
||||
; CHECK-UNCOMPRESS: 7: 1 fb:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563070469352221
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 3: 1
|
||||
; CHECK-UNCOMPRESS: 4: 1
|
||||
; CHECK-UNCOMPRESS: 7: 1 fb:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563070469352221
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 3: 1
|
||||
; CHECK-UNCOMPRESS: 5: 1
|
||||
; CHECK-UNCOMPRESS: 8: 1 fa:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563070469352221
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 3: 1
|
||||
; CHECK-UNCOMPRESS: 6: 1 fa:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 3: 1
|
||||
; CHECK-UNCOMPRESS: 6: 1 fa:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 3: 1
|
||||
; CHECK-UNCOMPRESS: 6: 1 fa:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 2: 1
|
||||
; CHECK-UNCOMPRESS: 5: 1 fb:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 2: 1
|
||||
; CHECK-UNCOMPRESS: 5: 1 fb:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 2: 1
|
||||
; CHECK-UNCOMPRESS: 5: 1 fb:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1
|
||||
; CHECK-UNCOMPRESS: 1: 1
|
||||
; CHECK-UNCOMPRESS: 3: 1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563070469352221
|
||||
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0
|
||||
; CHECK-UNCOMPRESS: 5: 1 fb:1
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
|
||||
; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068
|
||||
|
||||
|
||||
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
|
||||
@ -67,35 +67,35 @@
|
||||
; CHECK: 3: 1
|
||||
; CEHCK: 5: 4 fb:4
|
||||
; CHECK: 6: 1 fa:1
|
||||
; CHECK !CFGChecksum: 72617220756
|
||||
; CHECK !CFGChecksum: 563022570642068
|
||||
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2
|
||||
; CHECK: 1: 2
|
||||
; CHECK: 3: 2
|
||||
; CHECK: 4: 1
|
||||
; CHECK: 7: 1 fb:1
|
||||
; CHECK: !CFGChecksum: 120515930909
|
||||
; CHECK: !CFGChecksum: 563070469352221
|
||||
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
|
||||
; CHECK: 1: 1
|
||||
; CHECK: 3: 1
|
||||
; CHECK: 4: 1
|
||||
; CHECK: 7: 1 fb:1
|
||||
; CHECK: !CFGChecksum: 120515930909
|
||||
; CHECK: !CFGChecksum: 563070469352221
|
||||
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa]:4:1
|
||||
; CHECK: 1: 1
|
||||
; CHECK: 3: 1
|
||||
; CHECK: 5: 1
|
||||
; CHECK: 8: 1 fa:1
|
||||
; CHECK: !CFGChecksum: 120515930909
|
||||
; CHECK: !CFGChecksum: 563070469352221
|
||||
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
|
||||
; CHECK: 1: 1
|
||||
; CHECK: 3: 1
|
||||
; CHECK: 6: 1 fa:1
|
||||
; CHECK: !CFGChecksum: 72617220756
|
||||
; CHECK: !CFGChecksum: 563022570642068
|
||||
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
|
||||
; CHECK: 1: 1
|
||||
; CHECK: 3: 1
|
||||
; CHECK: 6: 1 fa:1
|
||||
; CHECK: !CFGChecksum: 72617220756
|
||||
; CHECK: !CFGChecksum: 563022570642068
|
||||
|
||||
|
||||
|
||||
|
@ -524,14 +524,11 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
|
||||
for (auto PI : ProbeCounter) {
|
||||
const PseudoProbe *Probe = PI.first;
|
||||
uint64_t Count = PI.second;
|
||||
// Ignore dangling probes since they will be reported later if needed.
|
||||
if (Probe->isDangling())
|
||||
continue;
|
||||
FunctionSamples &FunctionProfile =
|
||||
getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
|
||||
// Record the current frame and FunctionProfile whenever samples are
|
||||
// collected for non-danglie probes. This is for reporting all of the
|
||||
// dangling probes of the frame later.
|
||||
// zero count probes of the frame later.
|
||||
FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
|
||||
FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
|
||||
FunctionProfile.addTotalSamples(Count);
|
||||
@ -568,7 +565,6 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
|
||||
for (auto &I : FrameSamples) {
|
||||
auto *FunctionProfile = I.second;
|
||||
for (auto *Probe : I.first->getProbes()) {
|
||||
if (!Probe->isDangling())
|
||||
FunctionProfile->addBodySamplesForProbe(Probe->Index, 0);
|
||||
}
|
||||
}
|
||||
|
@ -88,12 +88,7 @@ void PseudoProbe::print(raw_ostream &OS,
|
||||
}
|
||||
OS << "Index: " << Index << " ";
|
||||
OS << "Type: " << PseudoProbeTypeStr[static_cast<uint8_t>(Type)] << " ";
|
||||
if (isDangling()) {
|
||||
OS << "Dangling ";
|
||||
}
|
||||
if (isTailCall()) {
|
||||
OS << "TailCall ";
|
||||
}
|
||||
|
||||
std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP, ShowName);
|
||||
if (InlineContextStr.size()) {
|
||||
OS << "Inlined: @ ";
|
||||
@ -189,7 +184,7 @@ void PseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
|
||||
// TYPE (uint4)
|
||||
// 0 - block probe, 1 - indirect call, 2 - direct call
|
||||
// ATTRIBUTE (uint3)
|
||||
// 1 - tail call, 2 - dangling
|
||||
// 1 - reserved
|
||||
// ADDRESS_TYPE (uint1)
|
||||
// 0 - code address, 1 - address delta
|
||||
// CODE_ADDRESS (uint64 or ULEB128)
|
||||
|
@ -24,7 +24,7 @@
|
||||
namespace llvm {
|
||||
namespace sampleprof {
|
||||
|
||||
enum PseudoProbeAttributes { TAILCALL = 1, DANGLING = 2 };
|
||||
enum PseudoProbeAttributes { RESERVED = 1 };
|
||||
|
||||
// Use func GUID and index as the location info of the inline site
|
||||
using InlineSite = std::tuple<uint64_t, uint32_t>;
|
||||
@ -102,7 +102,7 @@ A pseudo probe has the format like below:
|
||||
TYPE (uint4)
|
||||
0 - block probe, 1 - indirect call, 2 - direct call
|
||||
ATTRIBUTE (uint3)
|
||||
1 - tail call, 2 - dangling
|
||||
1 - reserved
|
||||
ADDRESS_TYPE (uint1)
|
||||
0 - code address, 1 - address delta
|
||||
CODE_ADDRESS (uint64 or ULEB128)
|
||||
@ -124,15 +124,6 @@ struct PseudoProbe {
|
||||
InlineTree(Tree){};
|
||||
|
||||
bool isEntry() const { return Index == PseudoProbeFirstId; }
|
||||
|
||||
bool isDangling() const {
|
||||
return Attribute & static_cast<uint8_t>(PseudoProbeAttributes::DANGLING);
|
||||
}
|
||||
|
||||
bool isTailCall() const {
|
||||
return Attribute & static_cast<uint8_t>(PseudoProbeAttributes::TAILCALL);
|
||||
}
|
||||
|
||||
bool isBlock() const { return Type == PseudoProbeType::Block; }
|
||||
bool isIndirectCall() const { return Type == PseudoProbeType::IndirectCall; }
|
||||
bool isDirectCall() const { return Type == PseudoProbeType::DirectCall; }
|
||||
|
Loading…
Reference in New Issue
Block a user