From 4e30d875bf9abcf388a605bc799ae698e4b649ab Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Tue, 18 May 2021 16:08:38 -0700 Subject: [PATCH] [SampleFDO] New hierarchical discriminator for Flow Sensitive SampleFDO This patch implements first part of Flow Sensitive SampleFDO (FSAFDO). It has the following changes: (1) disable current discriminator encoding scheme, (2) new hierarchical discriminator for FSAFDO. For this patch, option "-enable-fs-discriminator=true" turns on the new functionality. Option "-enable-fs-discriminator=false" (the default) keeps the current SampleFDO behavior. When the fs-discriminator is enabled, we insert a flag variable, namely, llvm_fs_discriminator, to the object. This symbol will checked by create_llvm_prof tool, and used to generate a profile with FS-AFDO discriminators enabled. If this happens, for an extbinary format profile, create_llvm_prof tool will add a flag to profile summary section. Differential Revision: https://reviews.llvm.org/D102246 --- include/llvm/CodeGen/MIRFSDiscriminator.h | 74 +++++++ include/llvm/CodeGen/Passes.h | 7 + include/llvm/IR/DebugInfoMetadata.h | 61 +++--- include/llvm/InitializePasses.h | 1 + include/llvm/LTO/Config.h | 3 + include/llvm/Support/Discriminator.h | 73 +++++++ lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/MIRFSDiscriminator.cpp | 139 ++++++++++++ lib/CodeGen/TargetPassConfig.cpp | 14 ++ lib/IR/DebugInfoMetadata.cpp | 7 + lib/LTO/LTOBackend.cpp | 9 +- lib/Transforms/Utils/LoopUnroll.cpp | 4 +- lib/Transforms/Utils/LoopUnrollAndJam.cpp | 4 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 8 +- test/CodeGen/X86/fsafdo_test1.ll | 60 ++++++ test/CodeGen/X86/fsafdo_test2.ll | 233 +++++++++++++++++++++ 16 files changed, 658 insertions(+), 40 deletions(-) create mode 100644 include/llvm/CodeGen/MIRFSDiscriminator.h create mode 100644 include/llvm/Support/Discriminator.h create mode 100644 lib/CodeGen/MIRFSDiscriminator.cpp create mode 100644 test/CodeGen/X86/fsafdo_test1.ll create mode 100644 test/CodeGen/X86/fsafdo_test2.ll diff --git a/include/llvm/CodeGen/MIRFSDiscriminator.h b/include/llvm/CodeGen/MIRFSDiscriminator.h new file mode 100644 index 00000000000..cda8c8b5a54 --- /dev/null +++ b/include/llvm/CodeGen/MIRFSDiscriminator.h @@ -0,0 +1,74 @@ +//===----- MIRFSDiscriminator.h: MIR FS Discriminator Support --0-- c++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the supporting functions for adding Machine level IR +// Flow Sensitive discriminators to the instruction debug information. With +// this, a cloned machine instruction in a different MachineBasicBlock will +// have its own discriminator value. This is done in a MIRAddFSDiscriminators +// pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MIRFSDISCRIMINATOR_H +#define LLVM_CODEGEN_MIRFSDISCRIMINATOR_H + +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" + +#include + +namespace llvm { + +class MIRAddFSDiscriminators : public MachineFunctionPass { + MachineFunction *MF; + unsigned LowBit; + unsigned HighBit; + +public: + static char ID; + /// FS bits that will be used in this pass (numbers are 0 based and + /// inclusive). + MIRAddFSDiscriminators(unsigned LowBit = 0, unsigned HighBit = 0) + : MachineFunctionPass(ID), LowBit(LowBit), HighBit(HighBit) { + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); + } + + /// getNumFSBBs() - Return the number of machine BBs that have FS samples. + unsigned getNumFSBBs(); + + /// getNumFSSamples() - Return the number of samples that have flow sensitive + /// values. + uint64_t getNumFSSamples(); + + /// getMachineFunction - Return the current machine function. + const MachineFunction *getMachineFunction() const { return MF; } + +private: + bool runOnMachineFunction(MachineFunction &) override; +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MIRFSDISCRIMINATOR_H diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index b823392c111..61b22d08c58 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -165,6 +165,9 @@ namespace llvm { /// This pass perform post-ra machine sink for COPY instructions. extern char &PostRAMachineSinkingID; + /// This pass adds flow sensitive discriminators. + extern char &MIRAddFSDiscriminatorsID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// @@ -487,6 +490,10 @@ namespace llvm { /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); + /// Add Flow Sensitive Discriminators. + FunctionPass *createMIRAddFSDiscriminatorsPass(unsigned LowBit, + unsigned HighBit); + /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index da67bd8c21c..50c01a43172 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -26,6 +26,8 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Discriminator.h" #include #include #include @@ -60,6 +62,8 @@ namespace llvm { +extern cl::opt EnableFSDiscriminator; + class DITypeRefArray { const MDTuple *N = nullptr; @@ -1576,31 +1580,6 @@ class DILocation : public MDNode { ShouldCreate); } - /// With a given unsigned int \p U, use up to 13 bits to represent it. - /// old_bit 1~5 --> new_bit 1~5 - /// old_bit 6~12 --> new_bit 7~13 - /// new_bit_6 is 0 if higher bits (7~13) are all 0 - static unsigned getPrefixEncodingFromUnsigned(unsigned U) { - U &= 0xfff; - return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U; - } - - /// Reverse transformation as getPrefixEncodingFromUnsigned. - static unsigned getUnsignedFromPrefixEncoding(unsigned U) { - if (U & 1) - return 0; - U >>= 1; - return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f); - } - - /// Returns the next component stored in discriminator. - static unsigned getNextComponentInDiscriminator(unsigned D) { - if ((D & 1) == 0) - return D >> ((D & 0x40) ? 14 : 7); - else - return D >> 1; - } - TempDILocation cloneImpl() const { // Get the raw scope/inlinedAt since it is possible to invoke this on // a DILocation containing temporary metadata. @@ -1608,14 +1587,6 @@ class DILocation : public MDNode { getRawInlinedAt(), isImplicitCode()); } - static unsigned encodeComponent(unsigned C) { - return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1); - } - - static unsigned encodingBits(unsigned C) { - return (C == 0) ? 1 : (C > 0x1f ? 14 : 7); - } - public: // Disallow replacing operands. void replaceOperandWith(unsigned I, Metadata *New) = delete; @@ -1762,8 +1733,20 @@ public: static const DILocation *getMergedLocations(ArrayRef Locs); + /// Return the masked discriminator value for an input discrimnator value D + /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base). + // Example: an input of (0x1FF, 7) returns 0xFF. + static unsigned getMaskedDiscriminator(unsigned D, unsigned B) { + return (D & getN1Bits(B)); + } + + /// Return the bits used for base discriminators. + static unsigned getBaseDiscriminatorBits() { return BASE_DIS_BIT_END; } + /// Returns the base discriminator for a given encoded discriminator \p D. static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) { + if (EnableFSDiscriminator) + return getMaskedDiscriminator(D, getBaseDiscriminatorBits()); return getUnsignedFromPrefixEncoding(D); } @@ -1785,6 +1768,8 @@ public: /// Returns the duplication factor for a given encoded discriminator \p D, or /// 1 if no value or 0 is encoded. static unsigned getDuplicationFactorFromDiscriminator(unsigned D) { + if (EnableFSDiscriminator) + return 1; D = getNextComponentInDiscriminator(D); unsigned Ret = getUnsignedFromPrefixEncoding(D); if (Ret == 0) @@ -2226,6 +2211,14 @@ unsigned DILocation::getCopyIdentifier() const { Optional DILocation::cloneWithBaseDiscriminator(unsigned D) const { unsigned BD, DF, CI; + + if (EnableFSDiscriminator) { + BD = getBaseDiscriminator(); + if (D == BD) + return this; + return cloneWithDiscriminator(D); + } + decodeDiscriminator(getDiscriminator(), BD, DF, CI); if (D == BD) return this; @@ -2235,6 +2228,8 @@ Optional DILocation::cloneWithBaseDiscriminator(unsigned D) } Optional DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const { + assert(!EnableFSDiscriminator && "FSDiscriminator should not call this."); + DF *= getDuplicationFactor(); if (DF <= 1) return this; diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 1d1909f2cb4..8ca77d04516 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -275,6 +275,7 @@ void initializeLowerSwitchLegacyPassPass(PassRegistry &); void initializeLowerTypeTestsPass(PassRegistry&); void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &); void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &); +void initializeMIRAddFSDiscriminatorsPass(PassRegistry &); void initializeMIRCanonicalizerPass(PassRegistry &); void initializeMIRNamerPass(PassRegistry &); void initializeMIRPrintingPassPass(PassRegistry&); diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h index f1ff4318386..5fd3c9f408f 100644 --- a/include/llvm/LTO/Config.h +++ b/include/llvm/LTO/Config.h @@ -171,6 +171,9 @@ struct Config { bool ShouldDiscardValueNames = true; DiagnosticHandlerFunction DiagHandler; + /// Add FSAFDO discriminators. + bool AddFSDiscriminator = false; + /// If this field is set, LTO will write input file paths and symbol /// resolutions here in llvm-lto2 command line flag format. This can be /// used for testing and for running the LTO pipeline outside of the linker diff --git a/include/llvm/Support/Discriminator.h b/include/llvm/Support/Discriminator.h new file mode 100644 index 00000000000..3521a19a30d --- /dev/null +++ b/include/llvm/Support/Discriminator.h @@ -0,0 +1,73 @@ +//===---- llvm/Support/Discriminator.h -- Discriminator Utils ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the constants and utility functions for discriminators. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_DISCRIMINATOR_H +#define LLVM_SUPPORT_DISCRIMINATOR_H + +// Utility functions for encoding / decoding discriminators. +/// With a given unsigned int \p U, use up to 13 bits to represent it. +/// old_bit 1~5 --> new_bit 1~5 +/// old_bit 6~12 --> new_bit 7~13 +/// new_bit_6 is 0 if higher bits (7~13) are all 0 +static inline unsigned getPrefixEncodingFromUnsigned(unsigned U) { + U &= 0xfff; + return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U; +} + +/// Reverse transformation as getPrefixEncodingFromUnsigned. +static inline unsigned getUnsignedFromPrefixEncoding(unsigned U) { + if (U & 1) + return 0; + U >>= 1; + return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f); +} + +/// Returns the next component stored in discriminator. +static inline unsigned getNextComponentInDiscriminator(unsigned D) { + if ((D & 1) == 0) + return D >> ((D & 0x40) ? 14 : 7); + else + return D >> 1; +} + +static inline unsigned encodeComponent(unsigned C) { + return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1); +} + +static inline unsigned encodingBits(unsigned C) { + return (C == 0) ? 1 : (C > 0x1f ? 14 : 7); +} + +// Some constants used in FS Discriminators. +#define BASE_DIS_BIT_BEG 0 +#define BASE_DIS_BIT_END 7 + +#define PASS_1_DIS_BIT_BEG 8 +#define PASS_1_DIS_BIT_END 13 + +#define PASS_2_DIS_BIT_BEG 14 +#define PASS_2_DIS_BIT_END 19 + +#define PASS_3_DIS_BIT_BEG 20 +#define PASS_3_DIS_BIT_END 25 + +#define PASS_LAST_DIS_BIT_BEG 26 +#define PASS_LAST_DIS_BIT_END 31 + +// Set bits range [0 .. n] to 1. Used in FS Discriminators. +static inline unsigned getN1Bits(int N) { + if (N >= 31) + return 0xFFFFFFFF; + return (1 << (N + 1)) - 1; +} + +#endif /* LLVM_SUPPORT_DISCRIMINATOR_H */ diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index a5fb0d84929..f26afa7f212 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -106,6 +106,7 @@ add_llvm_component_library(LLVMCodeGen MachineStripDebug.cpp MachineTraceMetrics.cpp MachineVerifier.cpp + MIRFSDiscriminator.cpp MIRYamlMapping.cpp ModuloSchedule.cpp MultiHazardRecognizer.cpp diff --git a/lib/CodeGen/MIRFSDiscriminator.cpp b/lib/CodeGen/MIRFSDiscriminator.cpp new file mode 100644 index 00000000000..450a1b7310d --- /dev/null +++ b/lib/CodeGen/MIRFSDiscriminator.cpp @@ -0,0 +1,139 @@ +//===-------- MIRFSDiscriminator.cpp: Flow Sensitive Discriminator --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of a machine pass that adds the flow +// sensitive discriminator to the instruction debug information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRFSDiscriminator.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "mirfs-discriminators" + +char MIRAddFSDiscriminators::ID = 0; + +INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE, + "Add MIR Flow Sensitive Discriminators", + /* cfg = */ false, /* is_analysis = */ false) + +char &llvm::MIRAddFSDiscriminatorsID = MIRAddFSDiscriminators::ID; + +FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(unsigned LowBit, + unsigned HighBit) { + return new MIRAddFSDiscriminators(LowBit, HighBit); +} + +// Compute a hash value using debug line number, and the line numbers from the +// inline stack. +static uint64_t getCallStackHash(const MachineBasicBlock &BB, + const MachineInstr &MI, + const DILocation *DIL) { + uint64_t Ret = MD5Hash(std::to_string(DIL->getLine())); + Ret ^= MD5Hash(BB.getName()); + Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName()); + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + Ret ^= MD5Hash(std::to_string(DIL->getLine())); + Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName()); + } + return Ret; +} + +// Traverse the CFG and assign FD discriminators. If two instructions +// have the same lineno and discriminator, but residing in different BBs, +// the latter instruction will get a new discriminator value. The new +// discriminator keeps the existing discriminator value but sets new bits +// b/w LowBit and HighBit. +bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) { + if (!EnableFSDiscriminator) + return false; + + bool Changed = false; + using LocationDiscriminator = std::tuple; + using BBSet = DenseSet; + using LocationDiscriminatorBBMap = DenseMap; + using LocationDiscriminatorCurrPassMap = + DenseMap; + + LocationDiscriminatorBBMap LDBM; + LocationDiscriminatorCurrPassMap LDCM; + + // Mask of discriminators before this pass. + unsigned BitMaskBefore = getN1Bits(LowBit); + // Mask of discriminators including this pass. + unsigned BitMaskNow = getN1Bits(HighBit); + // Mask of discriminators for bits specific to this pass. + unsigned BitMaskThisPass = BitMaskNow ^ BitMaskBefore; + unsigned NumNewD = 0; + + LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: " + << MF.getFunction().getName() << "\n"); + for (MachineBasicBlock &BB : MF) { + for (MachineInstr &I : BB) { + const DILocation *DIL = I.getDebugLoc().get(); + if (!DIL) + continue; + unsigned LineNo = DIL->getLine(); + if (LineNo == 0) + continue; + unsigned Discriminator = DIL->getDiscriminator(); + LocationDiscriminator LD = {DIL->getFilename(), LineNo, Discriminator}; + auto &BBMap = LDBM[LD]; + auto R = BBMap.insert(&BB); + if (BBMap.size() == 1) + continue; + + unsigned DiscriminatorCurrPass; + DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD]; + DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit; + DiscriminatorCurrPass += getCallStackHash(BB, I, DIL); + DiscriminatorCurrPass &= BitMaskThisPass; + unsigned NewD = Discriminator | DiscriminatorCurrPass; + const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD); + if (!NewDIL) { + LLVM_DEBUG(dbgs() << "Could not encode discriminator: " + << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " + << I << "\n"); + continue; + } + + I.setDebugLoc(NewDIL); + NumNewD++; + LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ": add FS discriminator, from " + << Discriminator << " -> " << NewD << "\n"); + Changed = true; + } + } + + if (Changed) { + Module *M = MF.getFunction().getParent(); + const char *FSDiscriminatorVar = "__llvm_fs_discriminator__"; + if (!M->getGlobalVariable(FSDiscriminatorVar)) { + auto &Context = M->getContext(); + // Create a global variable to flag that FSDiscriminators are used. + new GlobalVariable(*M, Type::getInt1Ty(Context), true, + GlobalValue::WeakAnyLinkage, + ConstantInt::getTrue(Context), FSDiscriminatorVar); + } + + LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n"); + } + + return Changed; +} diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index f5a016e9491..ed36384efdd 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Discriminator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" @@ -165,6 +166,13 @@ static cl::opt EnableGlobalISelAbort( clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2", "Disable the abort but emit a diagnostic on failure"))); +// An option that disables inserting FS-AFDO discriminators before emit. +// This is mainly for debugging and tuning purpose. +static cl::opt + FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden, + cl::desc("Do not insert FS-AFDO discriminators before " + "emit.")); + // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). @@ -334,6 +342,8 @@ struct InsertedPass { namespace llvm { +extern cl::opt EnableFSDiscriminator; + class PassConfigImpl { public: // List of passes explicitly substituted by this target. Normally this is @@ -1167,6 +1177,10 @@ void TargetPassConfig::addMachinePasses() { addPass(&XRayInstrumentationID); addPass(&PatchableFunctionID); + if (EnableFSDiscriminator && !FSNoFinalDiscrim) + addPass(createMIRAddFSDiscriminatorsPass(PASS_LAST_DIS_BIT_BEG, + PASS_LAST_DIS_BIT_END)); + addPreEmitPass(); if (TM->Options.EnableIPRA) diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 1299acdc472..5e8eaacfbdc 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -23,6 +23,13 @@ using namespace llvm; +namespace llvm { +// Use FS-AFDO discriminator. +cl::opt EnableFSDiscriminator( + "enable-fs-discriminator", cl::Hidden, cl::init(false), + cl::desc("Enable adding flow sensitive discriminators")); +} // namespace llvm + const DIExpression::FragmentInfo DebugVariable::DefaultFragment = { std::numeric_limits::max(), std::numeric_limits::min()}; diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index 35fa9bdade5..4e4ba4f3a58 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -215,10 +215,15 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, PGOOptions::SampleUse, PGOOptions::NoCSAction, true); else if (Conf.RunCSIRInstr) { PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, - PGOOptions::IRUse, PGOOptions::CSIRInstr); + PGOOptions::IRUse, PGOOptions::CSIRInstr, + Conf.AddFSDiscriminator); } else if (!Conf.CSIRProfile.empty()) { PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, - PGOOptions::IRUse, PGOOptions::CSIRUse); + PGOOptions::IRUse, PGOOptions::CSIRUse, + Conf.AddFSDiscriminator); + } else if (Conf.AddFSDiscriminator) { + PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOptions::NoCSAction, true); } LoopAnalysisManager LAM; diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 302eea42efc..395d2af56b5 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -570,7 +570,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); - if (Header->getParent()->isDebugInfoForProfiling()) + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa(&I)) diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp index d85162f3b4b..66407db8e1a 100644 --- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -349,7 +349,9 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); - if (Header->getParent()->isDebugInfoForProfiling()) + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa(&I)) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 7eacfe45383..5bccd7ddd29 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1047,8 +1047,11 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) { void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) { if (const Instruction *Inst = dyn_cast_or_null(Ptr)) { const DILocation *DIL = Inst->getDebugLoc(); + + // When a FSDiscriminator is enabled, we don't need to add the multiply + // factors to the discriminators. if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && - !isa(Inst)) { + !isa(Inst) && !EnableFSDiscriminator) { assert(!VF.isScalable() && "scalable vectors not yet supported."); auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); @@ -1058,8 +1061,7 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " << DIL->getFilename() << " Line: " << DIL->getLine()); - } - else + } else B.SetCurrentDebugLocation(DIL); } else B.SetCurrentDebugLocation(DebugLoc()); diff --git a/test/CodeGen/X86/fsafdo_test1.ll b/test/CodeGen/X86/fsafdo_test1.ll new file mode 100644 index 00000000000..db87d5f84aa --- /dev/null +++ b/test/CodeGen/X86/fsafdo_test1.ll @@ -0,0 +1,60 @@ +; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; +; Check that fs-afdo discriminators are generated. +; CHECK: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3 +; Check: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5 +; CHECK: .loc 1 9 5 is_stmt 0 discriminator 3623878658 # foo.c:9:5 +; CHECK: .loc 1 7 3 is_stmt 1 discriminator 805306370 # foo.c:7:3 +; Check that variable __llvm_fs_discriminator__ is generated. +; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ +; CHECK: .section .rodata,"a",@progbits +; CHECK: .weak __llvm_fs_discriminator__ +; CHECK: __llvm_fs_discriminator__: +; CHECK: .byte 1 +; CHECK: .size __llvm_fs_discriminator__, 1 + +target triple = "x86_64-unknown-linux-gnu" + +%struct.Node = type { %struct.Node* } + +define i32 @foo(%struct.Node* readonly %node, %struct.Node* readnone %root) !dbg !6 { +entry: + %cmp = icmp eq %struct.Node* %node, %root, !dbg !8 + br i1 %cmp, label %while.end4, label %while.cond1.preheader.lr.ph, !dbg !10 + +while.cond1.preheader.lr.ph: + %tobool = icmp eq %struct.Node* %node, null + br i1 %tobool, label %while.cond1.preheader.us.preheader, label %while.body2.preheader, !dbg !11 + +while.body2.preheader: + br label %while.body2, !dbg !11 + +while.cond1.preheader.us.preheader: + br label %while.cond1.preheader.us, !dbg !10 + +while.cond1.preheader.us: + br label %while.cond1.preheader.us, !dbg !10 + +while.body2: + br label %while.body2, !dbg !11 + +while.end4: + ret i32 0, !dbg !12 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, emissionKind: LineTablesOnly) +!1 = !DIFile(filename: "foo.c", directory: "b/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 7, column: 15, scope: !9) +!9 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 2) +!10 = !DILocation(line: 7, column: 3, scope: !9) +!11 = !DILocation(line: 9, column: 5, scope: !9) +!12 = !DILocation(line: 14, column: 3, scope: !6) diff --git a/test/CodeGen/X86/fsafdo_test2.ll b/test/CodeGen/X86/fsafdo_test2.ll new file mode 100644 index 00000000000..efffbd60f74 --- /dev/null +++ b/test/CodeGen/X86/fsafdo_test2.ll @@ -0,0 +1,233 @@ +; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +;; +;; C source code for the test (compiler at -O3): +;; // A test case for loop unroll. +;; +;; __attribute__((noinline)) int bar(int i){ +;; volatile int j; +;; j = i; +;; return j; +;; } +;; +;; unsigned sum; +;; __attribute__((noinline)) void work(int i){ +;; if (sum % 7) +;; sum += i; +;; else +;; sum -= i; +;; } +;; +;; __attribute__((noinline)) void foo(){ +;; int i, j; +;; for (j = 0; j < 48; j++) +;; for (i = 0; i < 4; i++) { +;; int ii = bar(i+j*48); +;; if (ii % 2) +;; work(ii*2); +;; if (ii % 4) +;; work(ii*3); +;; } +;; } +;; +;; int main() { +;; int i; +;; for (i = 0; i < 10000000; i++) { +;; foo(); +;; } +;; } +;; +;; Check that fs-afdo discriminators are generated. +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9 +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1073741825 # unroll.c:23:9 +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 2147483649 # unroll.c:23:9 +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 268435457 # unroll.c:23:9 +;; +;; Check that variable __llvm_fs_discriminator__ is generated. +; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ +; CHECK: .section .rodata,"a",@progbits +; CHECK: .weak __llvm_fs_discriminator__ +; CHECK: __llvm_fs_discriminator__: +; CHECK: .byte 1 +; CHECK: .size __llvm_fs_discriminator__, 1 + +target triple = "x86_64-unknown-linux-gnu" + +@sum = dso_local local_unnamed_addr global i32 0, align 4 + +declare i32 @bar(i32 %i) #0 +declare void @work(i32 %i) #2 + +define dso_local void @foo() #0 !dbg !29 { +entry: + br label %for.cond1.preheader, !dbg !30 + +for.cond1.preheader: + %j.012 = phi i32 [ 0, %entry ], [ %inc11, %if.end9.3 ] + %mul = mul nuw nsw i32 %j.012, 48 + %call = tail call i32 @bar(i32 %mul), !dbg !32 + %0 = and i32 %call, 1, !dbg !33 + %tobool.not = icmp eq i32 %0, 0, !dbg !33 + br i1 %tobool.not, label %if.end, label %if.then, !dbg !35 + +if.then: + %mul4 = shl nsw i32 %call, 1, !dbg !36 + tail call void @work(i32 %mul4), !dbg !37 + br label %if.end, !dbg !38 + +if.end: + %1 = and i32 %call, 3, !dbg !39 + %tobool6.not = icmp eq i32 %1, 0, !dbg !39 + br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !40 + +if.then7: + %mul8 = mul nsw i32 %call, 3, !dbg !41 + tail call void @work(i32 %mul8), !dbg !42 + br label %if.end9, !dbg !43 + +if.end9: + %add.1 = or i32 %mul, 1, !dbg !44 + %call.1 = tail call i32 @bar(i32 %add.1), !dbg !32 + %2 = and i32 %call.1, 1, !dbg !33 + %tobool.not.1 = icmp eq i32 %2, 0, !dbg !33 + br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !35 + +for.end12: + ret void, !dbg !45 + +if.then.1: + %mul4.1 = shl nsw i32 %call.1, 1, !dbg !36 + tail call void @work(i32 %mul4.1), !dbg !37 + br label %if.end.1, !dbg !38 + +if.end.1: + %3 = and i32 %call.1, 3, !dbg !39 + %tobool6.not.1 = icmp eq i32 %3, 0, !dbg !39 + br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !40 + +if.then7.1: + %mul8.1 = mul nsw i32 %call.1, 3, !dbg !41 + tail call void @work(i32 %mul8.1), !dbg !42 + br label %if.end9.1, !dbg !43 + +if.end9.1: + %add.2 = or i32 %mul, 2, !dbg !44 + %call.2 = tail call i32 @bar(i32 %add.2), !dbg !32 + %4 = and i32 %call.2, 1, !dbg !33 + %tobool.not.2 = icmp eq i32 %4, 0, !dbg !33 + br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !35 + +if.then.2: + %mul4.2 = shl nsw i32 %call.2, 1, !dbg !36 + tail call void @work(i32 %mul4.2), !dbg !37 + br label %if.end.2, !dbg !38 + +if.end.2: + %5 = and i32 %call.2, 3, !dbg !39 + %tobool6.not.2 = icmp eq i32 %5, 0, !dbg !39 + br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !40 + +if.then7.2: + %mul8.2 = mul nsw i32 %call.2, 3, !dbg !41 + tail call void @work(i32 %mul8.2), !dbg !42 + br label %if.end9.2, !dbg !43 + +if.end9.2: + %add.3 = or i32 %mul, 3, !dbg !44 + %call.3 = tail call i32 @bar(i32 %add.3), !dbg !32 + %6 = and i32 %call.3, 1, !dbg !33 + %tobool.not.3 = icmp eq i32 %6, 0, !dbg !33 + br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !35 + +if.then.3: + %mul4.3 = shl nsw i32 %call.3, 1, !dbg !36 + tail call void @work(i32 %mul4.3), !dbg !37 + br label %if.end.3, !dbg !38 + +if.end.3: + %7 = and i32 %call.3, 3, !dbg !39 + %tobool6.not.3 = icmp eq i32 %7, 0, !dbg !39 + br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !40 + +if.then7.3: + %mul8.3 = mul nsw i32 %call.3, 3, !dbg !41 + tail call void @work(i32 %mul8.3), !dbg !42 + br label %if.end9.3, !dbg !43 + +if.end9.3: + %inc11 = add nuw nsw i32 %j.012, 1, !dbg !46 + %exitcond.not = icmp eq i32 %inc11, 48, !dbg !48 + br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !30, !llvm.loop !49 +} + + +attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "unroll.c", directory: "a/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 4, column: 3, scope: !7) +!10 = !DILocation(line: 5, column: 5, scope: !7) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 6, column: 10, scope: !7) +!16 = !DILocation(line: 7, column: 1, scope: !7) +!17 = !DILocation(line: 6, column: 3, scope: !18) +!18 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1) +!19 = distinct !DISubprogram(name: "work", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 11, column: 7, scope: !19) +!21 = !DILocation(line: 11, column: 11, scope: !22) +!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 1) +!23 = !DILocation(line: 11, column: 11, scope: !24) +!24 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2) +!25 = !DILocation(line: 11, column: 7, scope: !26) +!26 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 3) +!27 = !DILocation(line: 0, scope: !22) +!28 = !DILocation(line: 15, column: 1, scope: !19) +!29 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !8, scopeLine: 17, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!30 = !DILocation(line: 19, column: 3, scope: !31) +!31 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 2) +!32 = !DILocation(line: 21, column: 16, scope: !31) +!33 = !DILocation(line: 22, column: 14, scope: !34) +!34 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 1) +!35 = !DILocation(line: 22, column: 11, scope: !31) +!36 = !DILocation(line: 23, column: 16, scope: !29) +!37 = !DILocation(line: 23, column: 9, scope: !34) +!38 = !DILocation(line: 23, column: 9, scope: !31) +!39 = !DILocation(line: 24, column: 14, scope: !34) +!40 = !DILocation(line: 24, column: 11, scope: !31) +!41 = !DILocation(line: 25, column: 16, scope: !29) +!42 = !DILocation(line: 25, column: 9, scope: !34) +!43 = !DILocation(line: 25, column: 9, scope: !31) +!44 = !DILocation(line: 21, column: 21, scope: !34) +!45 = !DILocation(line: 27, column: 1, scope: !29) +!46 = !DILocation(line: 19, column: 24, scope: !47) +!47 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 3) +!48 = !DILocation(line: 19, column: 17, scope: !34) +!49 = distinct !{!49, !50, !51} +!50 = !DILocation(line: 19, column: 3, scope: !29) +!51 = !DILocation(line: 26, column: 3, scope: !29) +!52 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 29, type: !8, scopeLine: 29, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!53 = !DILocation(line: 31, column: 3, scope: !54) +!54 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 2) +!55 = !DILocation(line: 32, column: 5, scope: !52) +!56 = !DILocation(line: 31, column: 30, scope: !57) +!57 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 3) +!58 = !DILocation(line: 31, column: 17, scope: !59) +!59 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 1) +!60 = distinct !{!60, !61, !62} +!61 = !DILocation(line: 31, column: 3, scope: !52) +!62 = !DILocation(line: 33, column: 3, scope: !52) +!63 = !DILocation(line: 34, column: 1, scope: !52)