From 7d33c2a5f85b4d3ffd8a1b780c78f144703d4261 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 23 Feb 2021 15:50:45 -0800 Subject: [PATCH] [ThinLTO] Make cloneUsedGlobalVariables deterministic Iterating on `SmallPtrSet` with more than 8 elements is not deterministic. Use a SmallVector instead because `Used` is guaranteed to contain unique elements. While here, decrease inline element counts from 8 to 4. The number of `llvm.used`/`llvm.compiler.used` elements is usually 0 or 1. For full LTO/hybrid LTO, the number may be large, so we need to be careful. According to tejohnson's analysis https://reviews.llvm.org/D97128#2582399 , 4 is good for a large project with WholeProgramDevirt, when available_externally vtables are placed in the llvm.compiler.used set. Differential Revision: https://reviews.llvm.org/D97128 --- include/llvm/IR/Module.h | 6 ++++++ lib/IR/Module.cpp | 15 +++++++++++++++ lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 11 ++++------- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 3ac57b7e575..63d66c5fd63 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -893,6 +893,12 @@ public: GlobalVariable *collectUsedGlobalVariables(const Module &M, SmallPtrSetImpl &Set, bool CompilerUsed); +/// Given "llvm.used" or "llvm.compiler.used" as a global name, collect the +/// initializer elements of that global in a SmallVector and return the global +/// itself. +GlobalVariable *collectUsedGlobalVariables(const Module &M, + SmallVectorImpl &Vec, + bool CompilerUsed); /// An raw_ostream inserter for modules. inline raw_ostream &operator<<(raw_ostream &O, const Module &M) { diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index 9395b2bb849..4c244611fbb 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -658,6 +658,21 @@ VersionTuple Module::getSDKVersion() const { return Result; } +GlobalVariable *llvm::collectUsedGlobalVariables( + const Module &M, SmallVectorImpl &Vec, bool CompilerUsed) { + const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; + GlobalVariable *GV = M.getGlobalVariable(Name); + if (!GV || !GV->hasInitializer()) + return GV; + + const ConstantArray *Init = cast(GV->getInitializer()); + for (Value *Op : Init->operands()) { + GlobalValue *G = cast(Op->stripPointerCasts()); + Vec.push_back(G); + } + return GV; +} + GlobalVariable *llvm::collectUsedGlobalVariables( const Module &M, SmallPtrSetImpl &Set, bool CompilerUsed) { const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 2ab9dcd0991..797416f5e13 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -199,23 +199,20 @@ void forEachVirtualFunction(Constant *C, function_ref Fn) { // values whose defs were cloned into that module. static void cloneUsedGlobalVariables(const Module &SrcM, Module &DestM, bool CompilerUsed) { - SmallPtrSet Used; - SmallPtrSet NewUsed; + SmallVector Used, NewUsed; // First collect those in the llvm[.compiler].used set. collectUsedGlobalVariables(SrcM, Used, CompilerUsed); // Next build a set of the equivalent values defined in DestM. for (auto *V : Used) { auto *GV = DestM.getNamedValue(V->getName()); if (GV && !GV->isDeclaration()) - NewUsed.insert(GV); + NewUsed.push_back(GV); } // Finally, add them to a llvm[.compiler].used variable in DestM. if (CompilerUsed) - appendToCompilerUsed( - DestM, std::vector(NewUsed.begin(), NewUsed.end())); + appendToCompilerUsed(DestM, NewUsed); else - appendToUsed(DestM, - std::vector(NewUsed.begin(), NewUsed.end())); + appendToUsed(DestM, NewUsed); } // If it's possible to split M into regular and thin LTO parts, do so and write