diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 70194c04347..7a457744db7 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -256,6 +256,11 @@ enum GlobalValueSummarySymtabCodes { // strings in strtab. // [n * name] FS_CFI_FUNCTION_DECLS = 18, + // Per-module summary that also adds relative block frequency to callee info. + // PERMODULE_RELBF: [valueid, flags, instcount, numrefs, + // numrefs x valueid, + // n x (valueid, relblockfreq)] + FS_PERMODULE_RELBF = 19, }; enum MetadataCodes { diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 5935da84b4c..a48571a8c5e 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -25,6 +25,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -54,13 +55,30 @@ struct CalleeInfo { Hot = 3, Critical = 4 }; - HotnessType Hotness = HotnessType::Unknown; - CalleeInfo() = default; - explicit CalleeInfo(HotnessType Hotness) : Hotness(Hotness) {} + // The size of the bit-field might need to be adjusted if more values are + // added to HotnessType enum. + uint32_t Hotness : 3; + uint32_t RelBlockFreq : 29; + static constexpr uint64_t MaxRelBlockFreq = (1 << 29) - 1; + + CalleeInfo() + : Hotness(static_cast(HotnessType::Unknown)), RelBlockFreq(0) {} + explicit CalleeInfo(HotnessType Hotness, uint64_t RelBF) + : Hotness(static_cast(Hotness)), RelBlockFreq(RelBF) {} void updateHotness(const HotnessType OtherHotness) { - Hotness = std::max(Hotness, OtherHotness); + Hotness = std::max(Hotness, static_cast(OtherHotness)); + } + + HotnessType getHotness() const { return HotnessType(Hotness); } + + // When there are multiple edges between the same (caller, callee) pair, the + // relative block frequencies are summed up. + void updateRelBlockFreq(uint64_t RBF) { + uint64_t Sum = SaturatingAdd(RelBlockFreq, RBF); + Sum = std::min(Sum, uint64_t(MaxRelBlockFreq)); + RelBlockFreq = static_cast(Sum); } }; diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index cf2fe7776dd..9293f603479 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -273,9 +273,24 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // to record the call edge to the alias in that case. Eventually // an alias summary will be created to associate the alias and // aliasee. - CallGraphEdges[Index.getOrInsertValueInfo( - cast(CalledValue))] - .updateHotness(Hotness); + auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo( + cast(CalledValue))]; + ValueInfo.updateHotness(Hotness); + // Add the relative block frequency to CalleeInfo if there is no profile + // information. + if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { + auto BBFreq = BFI->getBlockFreq(&BB).getFrequency(); + // FIXME: This might need some scaling to prevent BBFreq values from + // being rounded down to 0. + auto EntryFreq = BFI->getEntryFreq(); + // Block frequencies can be directly set for a block and so we need to + // handle the case of entry frequency being 0. + if (EntryFreq) + BBFreq /= EntryFreq; + else + BBFreq = 0; + ValueInfo.updateRelBlockFreq(BBFreq); + } } else { // Skip inline assembly calls. if (CI && CI->isInlineAsm()) diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 7ffa6248879..9cf1302b030 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -743,7 +743,8 @@ private: std::vector makeRefList(ArrayRef Record); std::vector makeCallList(ArrayRef Record, bool IsOldProfileFormat, - bool HasProfile); + bool HasProfile, + bool HasRelBF); Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); @@ -5047,12 +5048,15 @@ ModuleSummaryIndexBitcodeReader::makeRefList(ArrayRef Record) { return Ret; } -std::vector ModuleSummaryIndexBitcodeReader::makeCallList( - ArrayRef Record, bool IsOldProfileFormat, bool HasProfile) { +std::vector +ModuleSummaryIndexBitcodeReader::makeCallList(ArrayRef Record, + bool IsOldProfileFormat, + bool HasProfile, bool HasRelBF) { std::vector Ret; Ret.reserve(Record.size()); for (unsigned I = 0, E = Record.size(); I != E; ++I) { CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown; + uint64_t RelBF = 0; ValueInfo Callee = getValueInfoFromValueId(Record[I]).first; if (IsOldProfileFormat) { I += 1; // Skip old callsitecount field @@ -5060,7 +5064,9 @@ std::vector ModuleSummaryIndexBitcodeReader::makeCallLi I += 1; // Skip old profilecount field } else if (HasProfile) Hotness = static_cast(Record[++I]); - Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo{Hotness}}); + else if (HasRelBF) + RelBF = Record[++I]; + Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo(Hotness, RelBF)}); } return Ret; } @@ -5139,7 +5145,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { // FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs, // numrefs x valueid, // n x (valueid, hotness)] + // FS_PERMODULE_RELBF: [valueid, flags, instcount, fflags, numrefs, + // numrefs x valueid, + // n x (valueid, relblockfreq)] case bitc::FS_PERMODULE: + case bitc::FS_PERMODULE_RELBF: case bitc::FS_PERMODULE_PROFILE: { unsigned ValueID = Record[0]; uint64_t RawFlags = Record[1]; @@ -5165,9 +5175,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::vector Refs = makeRefList( ArrayRef(Record).slice(RefListStartIndex, NumRefs)); bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE); + bool HasRelBF = (BitCode == bitc::FS_PERMODULE_RELBF); std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), - IsOldProfileFormat, HasProfile); + IsOldProfileFormat, HasProfile, HasRelBF); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Calls), std::move(PendingTypeTests), @@ -5259,7 +5270,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE); std::vector Edges = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), - IsOldProfileFormat, HasProfile); + IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 3876b5ffe35..4cb38d7e254 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -86,6 +86,9 @@ static cl::opt cl::desc("Number of metadatas above which we emit an index " "to enable lazy-loading")); +cl::opt WriteRelBFToSummary( + "write-relbf-to-summary", cl::Hidden, cl::init(false), + cl::desc("Write relative block frequency to function summary ")); namespace { /// These are manifest constants used by the bitcode writer. They do not need to @@ -3378,11 +3381,15 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( NameVals.push_back(getValueId(ECI.first)); if (HasProfileData) NameVals.push_back(static_cast(ECI.second.Hotness)); + else if (WriteRelBFToSummary) + NameVals.push_back(ECI.second.RelBlockFreq); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); unsigned Code = - (HasProfileData ? bitc::FS_PERMODULE_PROFILE : bitc::FS_PERMODULE); + (HasProfileData ? bitc::FS_PERMODULE_PROFILE + : (WriteRelBFToSummary ? bitc::FS_PERMODULE_RELBF + : bitc::FS_PERMODULE)); // Emit the finished record. Stream.EmitRecord(Code, NameVals, FSAbbrev); @@ -3448,21 +3455,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { ArrayRef{GVI.second, GVI.first}); } - // Abbrev for FS_PERMODULE. - auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs - // numrefs x valueid, n x (valueid) - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); - unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - // Abbrev for FS_PERMODULE_PROFILE. - Abbv = std::make_shared(); + auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags @@ -3474,6 +3468,22 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for FS_PERMODULE or FS_PERMODULE_RELBF. + Abbv = std::make_shared(); + if (WriteRelBFToSummary) + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_RELBF)); + else + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + // numrefs x valueid, n x (valueid [, rel_block_freq]) + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS)); @@ -3675,7 +3685,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { bool HasProfileData = false; for (auto &EI : FS->calls()) { - HasProfileData |= EI.second.Hotness != CalleeInfo::HotnessType::Unknown; + HasProfileData |= + EI.second.getHotness() != CalleeInfo::HotnessType::Unknown; if (HasProfileData) break; } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index b1eefb96454..ed85c20051e 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -269,7 +269,7 @@ static void computeImportForFunction( }; const auto NewThreshold = - Threshold * GetBonusMultiplier(Edge.second.Hotness); + Threshold * GetBonusMultiplier(Edge.second.getHotness()); auto *CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold, Summary.modulePath()); @@ -293,7 +293,8 @@ static void computeImportForFunction( return Threshold * ImportInstrFactor; }; - bool IsHotCallsite = Edge.second.Hotness == CalleeInfo::HotnessType::Hot; + bool IsHotCallsite = + Edge.second.getHotness() == CalleeInfo::HotnessType::Hot; const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite); auto ExportModulePath = ResolvedCalleeSummary->modulePath(); diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 7f20e136eef..fd673701943 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -306,6 +306,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, return nullptr; STRINGIFY_CODE(FS, PERMODULE) STRINGIFY_CODE(FS, PERMODULE_PROFILE) + STRINGIFY_CODE(FS, PERMODULE_RELBF) STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS) STRINGIFY_CODE(FS, COMBINED) STRINGIFY_CODE(FS, COMBINED_PROFILE)