//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the LLVM module linker. // //===----------------------------------------------------------------------===// #include "LinkDiagnosticInfo.h" #include "llvm-c/Linker.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Linker/Linker.h" #include "llvm/Support/Error.h" using namespace llvm; namespace { /// This is an implementation class for the LinkModules function, which is the /// entrypoint for this file. class ModuleLinker { IRMover &Mover; std::unique_ptr SrcM; SetVector ValuesToLink; /// For symbol clashes, prefer those from Src. unsigned Flags; /// List of global value names that should be internalized. StringSet<> Internalize; /// Function that will perform the actual internalization. The reason for a /// callback is that the linker cannot call internalizeModule without /// creating a circular dependency between IPO and the linker. std::function &)> InternalizeCallback; /// Used as the callback for lazy linking. /// The mover has just hit GV and we have to decide if it, and other members /// of the same comdat, should be linked. Every member to be linked is passed /// to Add. void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add); bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; } bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; } bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest, const GlobalValue &Src); /// Should we have mover and linker error diag info? bool emitError(const Twine &Message) { SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message)); return true; } bool getComdatLeader(Module &M, StringRef ComdatName, const GlobalVariable *&GVar); bool computeResultingSelectionKind(StringRef ComdatName, Comdat::SelectionKind Src, Comdat::SelectionKind Dst, Comdat::SelectionKind &Result, bool &LinkFromSrc); std::map> ComdatsChosen; bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK, bool &LinkFromSrc); // Keep track of the lazy linked global members of each comdat in source. DenseMap> LazyComdatMembers; /// Given a global in the source module, return the global in the /// destination module that is being linked to, if any. GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) { Module &DstM = Mover.getModule(); // If the source has no name it can't link. If it has local linkage, // there is no name match-up going on. if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage())) return nullptr; // Otherwise see if we have a match in the destination module's symtab. GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName()); if (!DGV) return nullptr; // If we found a global with the same name in the dest module, but it has // internal linkage, we are really not doing any linkage here. if (DGV->hasLocalLinkage()) return nullptr; // Otherwise, we do in fact link to the destination global. return DGV; } /// Drop GV if it is a member of a comdat that we are dropping. /// This can happen with COFF's largest selection kind. void dropReplacedComdat(GlobalValue &GV, const DenseSet &ReplacedDstComdats); bool linkIfNeeded(GlobalValue &GV); public: ModuleLinker(IRMover &Mover, std::unique_ptr SrcM, unsigned Flags, std::function &)> InternalizeCallback = {}) : Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags), InternalizeCallback(std::move(InternalizeCallback)) {} bool run(); }; } static GlobalValue::VisibilityTypes getMinVisibility(GlobalValue::VisibilityTypes A, GlobalValue::VisibilityTypes B) { if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility) return GlobalValue::HiddenVisibility; if (A == GlobalValue::ProtectedVisibility || B == GlobalValue::ProtectedVisibility) return GlobalValue::ProtectedVisibility; return GlobalValue::DefaultVisibility; } bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName, const GlobalVariable *&GVar) { const GlobalValue *GVal = M.getNamedValue(ComdatName); if (const auto *GA = dyn_cast_or_null(GVal)) { GVal = GA->getBaseObject(); if (!GVal) // We cannot resolve the size of the aliasee yet. return emitError("Linking COMDATs named '" + ComdatName + "': COMDAT key involves incomputable alias size."); } GVar = dyn_cast_or_null(GVal); if (!GVar) return emitError( "Linking COMDATs named '" + ComdatName + "': GlobalVariable required for data dependent selection!"); return false; } bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, Comdat::SelectionKind Src, Comdat::SelectionKind Dst, Comdat::SelectionKind &Result, bool &LinkFromSrc) { Module &DstM = Mover.getModule(); // The ability to mix Comdat::SelectionKind::Any with // Comdat::SelectionKind::Largest is a behavior that comes from COFF. bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any || Dst == Comdat::SelectionKind::Largest; bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any || Src == Comdat::SelectionKind::Largest; if (DstAnyOrLargest && SrcAnyOrLargest) { if (Dst == Comdat::SelectionKind::Largest || Src == Comdat::SelectionKind::Largest) Result = Comdat::SelectionKind::Largest; else Result = Comdat::SelectionKind::Any; } else if (Src == Dst) { Result = Dst; } else { return emitError("Linking COMDATs named '" + ComdatName + "': invalid selection kinds!"); } switch (Result) { case Comdat::SelectionKind::Any: // Go with Dst. LinkFromSrc = false; break; case Comdat::SelectionKind::NoDeduplicate: { const GlobalVariable *DstGV; const GlobalVariable *SrcGV; if (getComdatLeader(DstM, ComdatName, DstGV) || getComdatLeader(*SrcM, ComdatName, SrcGV)) return true; if (SrcGV->isWeakForLinker()) { // Go with Dst. LinkFromSrc = false; } else if (DstGV->isWeakForLinker()) { // Go with Src. LinkFromSrc = true; } else { return emitError("Linking COMDATs named '" + ComdatName + "': nodeduplicate has been violated!"); } break; } case Comdat::SelectionKind::ExactMatch: case Comdat::SelectionKind::Largest: case Comdat::SelectionKind::SameSize: { const GlobalVariable *DstGV; const GlobalVariable *SrcGV; if (getComdatLeader(DstM, ComdatName, DstGV) || getComdatLeader(*SrcM, ComdatName, SrcGV)) return true; const DataLayout &DstDL = DstM.getDataLayout(); const DataLayout &SrcDL = SrcM->getDataLayout(); uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType()); uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType()); if (Result == Comdat::SelectionKind::ExactMatch) { if (SrcGV->getInitializer() != DstGV->getInitializer()) return emitError("Linking COMDATs named '" + ComdatName + "': ExactMatch violated!"); LinkFromSrc = false; } else if (Result == Comdat::SelectionKind::Largest) { LinkFromSrc = SrcSize > DstSize; } else if (Result == Comdat::SelectionKind::SameSize) { if (SrcSize != DstSize) return emitError("Linking COMDATs named '" + ComdatName + "': SameSize violated!"); LinkFromSrc = false; } else { llvm_unreachable("unknown selection kind"); } break; } } return false; } bool ModuleLinker::getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &Result, bool &LinkFromSrc) { Module &DstM = Mover.getModule(); Comdat::SelectionKind SSK = SrcC->getSelectionKind(); StringRef ComdatName = SrcC->getName(); Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable(); Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName); if (DstCI == ComdatSymTab.end()) { // Use the comdat if it is only available in one of the modules. LinkFromSrc = true; Result = SSK; return false; } const Comdat *DstC = &DstCI->second; Comdat::SelectionKind DSK = DstC->getSelectionKind(); return computeResultingSelectionKind(ComdatName, SSK, DSK, Result, LinkFromSrc); } bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest, const GlobalValue &Src) { // Should we unconditionally use the Src? if (shouldOverrideFromSrc()) { LinkFromSrc = true; return false; } // We always have to add Src if it has appending linkage. if (Src.hasAppendingLinkage() || Dest.hasAppendingLinkage()) { LinkFromSrc = true; return false; } bool SrcIsDeclaration = Src.isDeclarationForLinker(); bool DestIsDeclaration = Dest.isDeclarationForLinker(); if (SrcIsDeclaration) { // If Src is external or if both Src & Dest are external.. Just link the // external globals, we aren't adding anything. if (Src.hasDLLImportStorageClass()) { // If one of GVs is marked as DLLImport, result should be dllimport'ed. LinkFromSrc = DestIsDeclaration; return false; } // If the Dest is weak, use the source linkage. if (Dest.hasExternalWeakLinkage()) { LinkFromSrc = true; return false; } // Link an available_externally over a declaration. LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration(); return false; } if (DestIsDeclaration) { // If Dest is external but Src is not: LinkFromSrc = true; return false; } if (Src.hasCommonLinkage()) { if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) { LinkFromSrc = true; return false; } if (!Dest.hasCommonLinkage()) { LinkFromSrc = false; return false; } const DataLayout &DL = Dest.getParent()->getDataLayout(); uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType()); uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType()); LinkFromSrc = SrcSize > DestSize; return false; } if (Src.isWeakForLinker()) { assert(!Dest.hasExternalWeakLinkage()); assert(!Dest.hasAvailableExternallyLinkage()); if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) { LinkFromSrc = true; return false; } LinkFromSrc = false; return false; } if (Dest.isWeakForLinker()) { assert(Src.hasExternalLinkage()); LinkFromSrc = true; return false; } assert(!Src.hasExternalWeakLinkage()); assert(!Dest.hasExternalWeakLinkage()); assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() && "Unexpected linkage type!"); return emitError("Linking globals named '" + Src.getName() + "': symbol multiply defined!"); } bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { GlobalValue *DGV = getLinkedToGlobal(&GV); if (shouldLinkOnlyNeeded()) { // Always import variables with appending linkage. if (!GV.hasAppendingLinkage()) { // Don't import globals unless they are referenced by the destination // module. if (!DGV) return false; // Don't import globals that are already defined in the destination module if (!DGV->isDeclaration()) return false; } } if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) { auto *DGVar = dyn_cast(DGV); auto *SGVar = dyn_cast(&GV); if (DGVar && SGVar) { if (DGVar->isDeclaration() && SGVar->isDeclaration() && (!DGVar->isConstant() || !SGVar->isConstant())) { DGVar->setConstant(false); SGVar->setConstant(false); } if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) { MaybeAlign Align( std::max(DGVar->getAlignment(), SGVar->getAlignment())); SGVar->setAlignment(Align); DGVar->setAlignment(Align); } } GlobalValue::VisibilityTypes Visibility = getMinVisibility(DGV->getVisibility(), GV.getVisibility()); DGV->setVisibility(Visibility); GV.setVisibility(Visibility); GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr( DGV->getUnnamedAddr(), GV.getUnnamedAddr()); DGV->setUnnamedAddr(UnnamedAddr); GV.setUnnamedAddr(UnnamedAddr); } if (!DGV && !shouldOverrideFromSrc() && (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() || GV.hasAvailableExternallyLinkage())) return false; if (GV.isDeclaration()) return false; if (const Comdat *SC = GV.getComdat()) { bool LinkFromSrc; Comdat::SelectionKind SK; std::tie(SK, LinkFromSrc) = ComdatsChosen[SC]; if (!LinkFromSrc) return false; } bool LinkFromSrc = true; if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV)) return true; if (LinkFromSrc) ValuesToLink.insert(&GV); return false; } void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) { // Add these to the internalize list if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() && !shouldLinkOnlyNeeded()) return; if (InternalizeCallback) Internalize.insert(GV.getName()); Add(GV); const Comdat *SC = GV.getComdat(); if (!SC) return; for (GlobalValue *GV2 : LazyComdatMembers[SC]) { GlobalValue *DGV = getLinkedToGlobal(GV2); bool LinkFromSrc = true; if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2)) return; if (!LinkFromSrc) continue; if (InternalizeCallback) Internalize.insert(GV2->getName()); Add(*GV2); } } void ModuleLinker::dropReplacedComdat( GlobalValue &GV, const DenseSet &ReplacedDstComdats) { Comdat *C = GV.getComdat(); if (!C) return; if (!ReplacedDstComdats.count(C)) return; if (GV.use_empty()) { GV.eraseFromParent(); return; } if (auto *F = dyn_cast(&GV)) { F->deleteBody(); } else if (auto *Var = dyn_cast(&GV)) { Var->setInitializer(nullptr); } else { auto &Alias = cast(GV); Module &M = *Alias.getParent(); GlobalValue *Declaration; if (auto *FTy = dyn_cast(Alias.getValueType())) { Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M); } else { Declaration = new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false, GlobalValue::ExternalLinkage, /*Initializer*/ nullptr); } Declaration->takeName(&Alias); Alias.replaceAllUsesWith(Declaration); Alias.eraseFromParent(); } } bool ModuleLinker::run() { Module &DstM = Mover.getModule(); DenseSet ReplacedDstComdats; for (const auto &SMEC : SrcM->getComdatSymbolTable()) { const Comdat &C = SMEC.getValue(); if (ComdatsChosen.count(&C)) continue; Comdat::SelectionKind SK; bool LinkFromSrc; if (getComdatResult(&C, SK, LinkFromSrc)) return true; ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc); if (!LinkFromSrc) continue; Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable(); Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName()); if (DstCI == ComdatSymTab.end()) continue; // The source comdat is replacing the dest one. const Comdat *DstC = &DstCI->second; ReplacedDstComdats.insert(DstC); } // Alias have to go first, since we are not able to find their comdats // otherwise. for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) { GlobalAlias &GV = *I++; dropReplacedComdat(GV, ReplacedDstComdats); } for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) { GlobalVariable &GV = *I++; dropReplacedComdat(GV, ReplacedDstComdats); } for (auto I = DstM.begin(), E = DstM.end(); I != E;) { Function &GV = *I++; dropReplacedComdat(GV, ReplacedDstComdats); } for (GlobalVariable &GV : SrcM->globals()) if (GV.hasLinkOnceLinkage()) if (const Comdat *SC = GV.getComdat()) LazyComdatMembers[SC].push_back(&GV); for (Function &SF : *SrcM) if (SF.hasLinkOnceLinkage()) if (const Comdat *SC = SF.getComdat()) LazyComdatMembers[SC].push_back(&SF); for (GlobalAlias &GA : SrcM->aliases()) if (GA.hasLinkOnceLinkage()) if (const Comdat *SC = GA.getComdat()) LazyComdatMembers[SC].push_back(&GA); // Insert all of the globals in src into the DstM module... without linking // initializers (which could refer to functions not yet mapped over). for (GlobalVariable &GV : SrcM->globals()) if (linkIfNeeded(GV)) return true; for (Function &SF : *SrcM) if (linkIfNeeded(SF)) return true; for (GlobalAlias &GA : SrcM->aliases()) if (linkIfNeeded(GA)) return true; for (unsigned I = 0; I < ValuesToLink.size(); ++I) { GlobalValue *GV = ValuesToLink[I]; const Comdat *SC = GV->getComdat(); if (!SC) continue; for (GlobalValue *GV2 : LazyComdatMembers[SC]) { GlobalValue *DGV = getLinkedToGlobal(GV2); bool LinkFromSrc = true; if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2)) return true; if (LinkFromSrc) ValuesToLink.insert(GV2); } } if (InternalizeCallback) { for (GlobalValue *GV : ValuesToLink) Internalize.insert(GV->getName()); } // FIXME: Propagate Errors through to the caller instead of emitting // diagnostics. bool HasErrors = false; if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(), [this](GlobalValue &GV, IRMover::ValueAdder Add) { addLazyFor(GV, Add); }, /* IsPerformingImport */ false)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message())); HasErrors = true; }); } if (HasErrors) return true; if (InternalizeCallback) InternalizeCallback(DstM, Internalize); return false; } Linker::Linker(Module &M) : Mover(M) {} bool Linker::linkInModule( std::unique_ptr Src, unsigned Flags, std::function &)> InternalizeCallback) { ModuleLinker ModLinker(Mover, std::move(Src), Flags, std::move(InternalizeCallback)); return ModLinker.run(); } //===----------------------------------------------------------------------===// // LinkModules entrypoint. //===----------------------------------------------------------------------===// /// This function links two modules together, with the resulting Dest module /// modified to be the composite of the two input modules. If an error occurs, /// true is returned and ErrorMsg (if not null) is set to indicate the problem. /// Upon failure, the Dest module could be in a modified state, and shouldn't be /// relied on to be consistent. bool Linker::linkModules( Module &Dest, std::unique_ptr Src, unsigned Flags, std::function &)> InternalizeCallback) { Linker L(Dest); return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback)); } //===----------------------------------------------------------------------===// // C API. //===----------------------------------------------------------------------===// LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) { Module *D = unwrap(Dest); std::unique_ptr M(unwrap(Src)); return Linker::linkModules(*D, std::move(M)); }