From d8f7c22241472291c61eb195e01ebe1788aaba85 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 22 Jan 2021 17:19:38 -0800 Subject: [PATCH] Support: Remove duplicated code in {File,clang::ModulesDependency}Collector, NFC Refactor the duplicated canonicalize-path logic in `FileCollector` and `ModulesDependencyCollector` into a new utility called `PathCanonicalizer` that's shared. This popped up when tracking down a bug common to both in https://reviews.llvm.org/D95202. As drive-bys, update a few names and comments to better reflect the effect of the code, delay removal of `..`s to avoid an unnecessary extra string copy, and leave behind a couple of FIXMEs for future consideration. Differential Revision: https://reviews.llvm.org/D95279 --- include/llvm/Support/FileCollector.h | 27 ++++++-- lib/Support/FileCollector.cpp | 82 +++++++++++++++---------- unittests/Support/FileCollectorTest.cpp | 1 - 3 files changed, 74 insertions(+), 36 deletions(-) diff --git a/include/llvm/Support/FileCollector.h b/include/llvm/Support/FileCollector.h index cbb870ec22a..8ea344a347d 100644 --- a/include/llvm/Support/FileCollector.h +++ b/include/llvm/Support/FileCollector.h @@ -69,6 +69,27 @@ protected: /// as relative paths inside of the Root. class FileCollector : public FileCollectorBase { public: + /// Helper utility that encapsulates the logic for canonicalizing a virtual + /// path and a path to copy from. + class PathCanonicalizer { + public: + struct PathStorage { + SmallString<256> CopyFrom; + SmallString<256> VirtualPath; + }; + + /// Canonicalize a pair of virtual and real paths. + PathStorage canonicalize(StringRef SrcPath); + + private: + /// Replace with a (mostly) real path, or don't modify. Resolves symlinks + /// in the directory, using \a CachedDirs to avoid redundant lookups, but + /// leaves the filename as a possible symlink. + void updateWithRealPath(SmallVectorImpl &Path); + + StringMap CachedDirs; + }; + /// \p Root is the directory where collected files are will be stored. /// \p OverlayRoot is VFS mapping root. /// \p Root directory gets created in copyFiles unless it already exists. @@ -93,8 +114,6 @@ public: private: friend FileCollectorFileSystem; - bool getRealPath(StringRef SrcPath, SmallVectorImpl &Result); - void addFileToMapping(StringRef VirtualPath, StringRef RealPath) { if (sys::fs::is_directory(VirtualPath)) VFSWriter.addDirectoryMapping(VirtualPath, RealPath); @@ -119,8 +138,8 @@ protected: /// The yaml mapping writer. vfs::YAMLVFSWriter VFSWriter; - /// Caches RealPath calls when resolving symlinks. - StringMap SymlinkMap; + /// Helper utility for canonicalizing paths. + PathCanonicalizer Canonicalizer; }; } // end namespace llvm diff --git a/lib/Support/FileCollector.cpp b/lib/Support/FileCollector.cpp index cb53878b790..99482075f67 100644 --- a/lib/Support/FileCollector.cpp +++ b/lib/Support/FileCollector.cpp @@ -53,62 +53,82 @@ FileCollector::FileCollector(std::string Root, std::string OverlayRoot) : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) { } -bool FileCollector::getRealPath(StringRef SrcPath, - SmallVectorImpl &Result) { - SmallString<256> RealPath; - StringRef FileName = sys::path::filename(SrcPath); - std::string Directory = sys::path::parent_path(SrcPath).str(); - auto DirWithSymlink = SymlinkMap.find(Directory); +void FileCollector::PathCanonicalizer::updateWithRealPath( + SmallVectorImpl &Path) { + StringRef SrcPath(Path.begin(), Path.size()); + StringRef Filename = sys::path::filename(SrcPath); + StringRef Directory = sys::path::parent_path(SrcPath); - // Use real_path to fix any symbolic link component present in a path. - // Computing the real path is expensive, cache the search through the parent - // path Directory. - if (DirWithSymlink == SymlinkMap.end()) { - auto EC = sys::fs::real_path(Directory, RealPath); - if (EC) - return false; - SymlinkMap[Directory] = std::string(RealPath.str()); + // Use real_path to fix any symbolic link component present in the directory + // part of the path, caching the search because computing the real path is + // expensive. + SmallString<256> RealPath; + auto DirWithSymlink = CachedDirs.find(Directory); + if (DirWithSymlink == CachedDirs.end()) { + // FIXME: Should this be a call to FileSystem::getRealpath(), in some + // cases? What if there is nothing on disk? + if (sys::fs::real_path(Directory, RealPath)) + return; + CachedDirs[Directory] = std::string(RealPath.str()); } else { RealPath = DirWithSymlink->second; } - sys::path::append(RealPath, FileName); - Result.swap(RealPath); - return true; + // Finish recreating the path by appending the original filename, since we + // don't need to resolve symlinks in the filename. + // + // FIXME: If we can cope with this, maybe we can cope without calling + // getRealPath() at all when there's no ".." component. + sys::path::append(RealPath, Filename); + + // Swap to create the output. + Path.swap(RealPath); } -void FileCollector::addFileImpl(StringRef SrcPath) { +/// Make Path absolute. +static void makeAbsolute(SmallVectorImpl &Path) { // We need an absolute src path to append to the root. - SmallString<256> AbsoluteSrc = SrcPath; - sys::fs::make_absolute(AbsoluteSrc); + sys::fs::make_absolute(Path); // Canonicalize src to a native path to avoid mixed separator styles. - sys::path::native(AbsoluteSrc); + sys::path::native(Path); // Remove redundant leading "./" pieces and consecutive separators. - StringRef TrimmedAbsoluteSrc = - sys::path::remove_leading_dotslash(AbsoluteSrc); + Path.erase(Path.begin(), sys::path::remove_leading_dotslash( + StringRef(Path.begin(), Path.size())) + .begin()); +} - // Canonicalize the source path by removing "..", "." components. - SmallString<256> VirtualPath = TrimmedAbsoluteSrc; - sys::path::remove_dots(VirtualPath, /*remove_dot_dot=*/true); +FileCollector::PathCanonicalizer::PathStorage +FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) { + PathStorage Paths; + Paths.VirtualPath = SrcPath; + makeAbsolute(Paths.VirtualPath); // If a ".." component is present after a symlink component, remove_dots may // lead to the wrong real destination path. Let the source be canonicalized // like that but make sure we always use the real path for the destination. - SmallString<256> CopyFrom; - if (!getRealPath(TrimmedAbsoluteSrc, CopyFrom)) - CopyFrom = VirtualPath; + Paths.CopyFrom = Paths.VirtualPath; + updateWithRealPath(Paths.CopyFrom); + + // Canonicalize the virtual path by removing "..", "." components. + sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true); + + return Paths; +} + +void FileCollector::addFileImpl(StringRef SrcPath) { + PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath); SmallString<256> DstPath = StringRef(Root); - sys::path::append(DstPath, sys::path::relative_path(CopyFrom)); + sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom)); // Always map a canonical src path to its real path into the YAML, by doing // this we map different virtual src paths to the same entry in the VFS // overlay, which is a way to emulate symlink inside the VFS; this is also // needed for correctness, not doing that can lead to module redefinition // errors. - addFileToMapping(VirtualPath, DstPath); + addFileToMapping(Paths.VirtualPath, DstPath); } llvm::vfs::directory_iterator diff --git a/unittests/Support/FileCollectorTest.cpp b/unittests/Support/FileCollectorTest.cpp index 644daa374f8..0e8e4503865 100644 --- a/unittests/Support/FileCollectorTest.cpp +++ b/unittests/Support/FileCollectorTest.cpp @@ -33,7 +33,6 @@ public: using FileCollector::FileCollector; using FileCollector::Root; using FileCollector::Seen; - using FileCollector::SymlinkMap; using FileCollector::VFSWriter; bool hasSeen(StringRef fs) {