From db17649a160af149ac7fb5ad1de727b6319724ee Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 14 Jan 2019 10:56:35 +0000 Subject: [PATCH] [VFS] Allow multiple RealFileSystem instances with independent CWDs. Summary: Previously only one RealFileSystem instance was available, and its working directory is shared with the process. This doesn't work well for multithreaded programs that want to work with relative paths - the vfs::FileSystem is assumed to provide the working directory, but a thread cannot control this exclusively. The new vfs::createPhysicalFileSystem() factory copies the process's working directory initially, and then allows it to be independently modified. This implementation records the working directory path, and glues it to relative paths to provide the correct absolute path to the sys::fs:: functions. This will give different results in unusual situations (e.g. the CWD is moved). The main alternative is the use of openat(), fstatat(), etc to ask the OS to resolve paths relative to a directory handle which can be kept open. This is more robust. There are two reasons not to do this initially: 1. these functions are not available on all supported Unixes, and are somewhere between difficult and unavailable on Windows. So we need a path-based fallback anyway. 2. this would mean also adding support at the llvm::sys::fs level, which is a larger project. My clearest idea is an OS-specific `BaseDirectory` object that can be optionally passed to functions there. Eventually this could be backed by either paths or a fd where openat() is supported. This is a large project, and demonstrating here that a path-based fallback works is a useful prerequisite. There is some subtlety to the path-manipulation mechanism: - when setting the working directory, both Specified=makeAbsolute(path) and Resolved=realpath(path) are recorded. These may differ in the presence of symlinks. - getCurrentWorkingDirectory() and makeAbsolute() use Specified - this is similar to the behavior of $PWD and sys::path::current_path - IO operations like openFileForRead use Resolved. This is similar to the behavior of an openat() based implementation, that doesn't see changes in symlinks. There may still be combinations of operations and FS states that yield unhelpful behavior. This is hard to avoid with symlinks and FS abstractions :( The caching behavior of the current working directory is removed in this patch. getRealFileSystem() is now specified to link to the process CWD, so the caching is incorrect. The user who needed this so far is clangd, which will immediately switch to createPhysicalFileSystem(). Reviewers: ilya-biryukov, bkramer, labath Subscribers: ioeric, kadircet, kristina, llvm-commits Differential Revision: https://reviews.llvm.org/D56545 llvm-svn: 351050 --- include/llvm/Support/VirtualFileSystem.h | 8 ++ lib/Support/VirtualFileSystem.cpp | 101 ++++++++++++++------ unittests/Support/VirtualFileSystemTest.cpp | 80 ++++++++++++++++ 3 files changed, 160 insertions(+), 29 deletions(-) diff --git a/include/llvm/Support/VirtualFileSystem.h b/include/llvm/Support/VirtualFileSystem.h index 606a81e494e..b8871a512fe 100644 --- a/include/llvm/Support/VirtualFileSystem.h +++ b/include/llvm/Support/VirtualFileSystem.h @@ -298,8 +298,16 @@ public: /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by /// the operating system. +/// The working directory is linked to the process's working directory. +/// (This is usually thread-hostile). IntrusiveRefCntPtr getRealFileSystem(); +/// Create an \p vfs::FileSystem for the 'real' file system, as seen by +/// the operating system. +/// It has its own working directory, independent of (but initially equal to) +/// that of the process. +std::unique_ptr createPhysicalFileSystem(); + /// A file system that allows overlaying one \p AbstractFileSystem on top /// of another. /// diff --git a/lib/Support/VirtualFileSystem.cpp b/lib/Support/VirtualFileSystem.cpp index 360f8d0061f..f728a6a7c60 100644 --- a/lib/Support/VirtualFileSystem.cpp +++ b/lib/Support/VirtualFileSystem.cpp @@ -228,9 +228,28 @@ std::error_code RealFile::close() { namespace { -/// The file system according to your operating system. +/// A file system according to your operating system. +/// This may be linked to the process's working directory, or maintain its own. +/// +/// Currently, its own working directory is emulated by storing the path and +/// sending absolute paths to llvm::sys::fs:: functions. +/// A more principled approach would be to push this down a level, modelling +/// the working dir as an llvm::sys::fs::WorkingDir or similar. +/// This would enable the use of openat()-style functions on some platforms. class RealFileSystem : public FileSystem { public: + explicit RealFileSystem(bool LinkCWDToProcess) { + if (!LinkCWDToProcess) { + SmallString<128> PWD, RealPWD; + if (llvm::sys::fs::current_path(PWD)) + return; // Awful, but nothing to do here. + if (auto Err = llvm::sys::fs::real_path(PWD, RealPWD)) + WD = {PWD, PWD}; + else + WD = {PWD, RealPWD}; + } + } + ErrorOr status(const Twine &Path) override; ErrorOr> openFileForRead(const Twine &Path) override; directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; @@ -242,15 +261,32 @@ public: SmallVectorImpl &Output) const override; private: - mutable std::mutex CWDMutex; - mutable std::string CWDCache; + // If this FS has its own working dir, use it to make Path absolute. + // The returned twine is safe to use as long as both Storage and Path live. + Twine adjustPath(const Twine &Path, SmallVectorImpl &Storage) const { + if (!WD) + return Path; + Path.toVector(Storage); + sys::fs::make_absolute(WD->Resolved, Storage); + return Storage; + } + + struct WorkingDirectory { + // The current working directory, without symlinks resolved. (echo $PWD). + SmallString<128> Specified; + // The current working directory, with links resolved. (readlink .). + SmallString<128> Resolved; + }; + Optional WD; }; } // namespace ErrorOr RealFileSystem::status(const Twine &Path) { + SmallString<256> Storage; sys::fs::file_status RealStatus; - if (std::error_code EC = sys::fs::status(Path, RealStatus)) + if (std::error_code EC = + sys::fs::status(adjustPath(Path, Storage), RealStatus)) return EC; return Status::copyWithNewName(RealStatus, Path.str()); } @@ -258,56 +294,61 @@ ErrorOr RealFileSystem::status(const Twine &Path) { ErrorOr> RealFileSystem::openFileForRead(const Twine &Name) { int FD; - SmallString<256> RealName; - if (std::error_code EC = - sys::fs::openFileForRead(Name, FD, sys::fs::OF_None, &RealName)) + SmallString<256> RealName, Storage; + if (std::error_code EC = sys::fs::openFileForRead( + adjustPath(Name, Storage), FD, sys::fs::OF_None, &RealName)) return EC; return std::unique_ptr(new RealFile(FD, Name.str(), RealName.str())); } llvm::ErrorOr RealFileSystem::getCurrentWorkingDirectory() const { - std::lock_guard Lock(CWDMutex); - if (!CWDCache.empty()) - return CWDCache; - SmallString<256> Dir; + if (WD) + return WD->Specified.str(); + + SmallString<128> Dir; if (std::error_code EC = llvm::sys::fs::current_path(Dir)) return EC; - CWDCache = Dir.str(); - return CWDCache; + return Dir.str(); } std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) { - // FIXME: chdir is thread hostile; on the other hand, creating the same - // behavior as chdir is complex: chdir resolves the path once, thus - // guaranteeing that all subsequent relative path operations work - // on the same path the original chdir resulted in. This makes a - // difference for example on network filesystems, where symlinks might be - // switched during runtime of the tool. Fixing this depends on having a - // file system abstraction that allows openat() style interactions. - if (auto EC = llvm::sys::fs::set_current_path(Path)) - return EC; + if (!WD) + return llvm::sys::fs::set_current_path(Path); - // Invalidate cache. - std::lock_guard Lock(CWDMutex); - CWDCache.clear(); + SmallString<128> Absolute, Resolved, Storage; + adjustPath(Path, Storage).toVector(Absolute); + bool IsDir; + if (auto Err = llvm::sys::fs::is_directory(Absolute, IsDir)) + return Err; + if (!IsDir) + return std::make_error_code(std::errc::not_a_directory); + if (auto Err = llvm::sys::fs::real_path(Absolute, Resolved)) + return Err; + WD = {Absolute, Resolved}; return std::error_code(); } std::error_code RealFileSystem::isLocal(const Twine &Path, bool &Result) { - return llvm::sys::fs::is_local(Path, Result); + SmallString<256> Storage; + return llvm::sys::fs::is_local(adjustPath(Path, Storage), Result); } std::error_code RealFileSystem::getRealPath(const Twine &Path, SmallVectorImpl &Output) const { - return llvm::sys::fs::real_path(Path, Output); + SmallString<256> Storage; + return llvm::sys::fs::real_path(adjustPath(Path, Storage), Output); } IntrusiveRefCntPtr vfs::getRealFileSystem() { - static IntrusiveRefCntPtr FS = new RealFileSystem(); + static IntrusiveRefCntPtr FS(new RealFileSystem(true)); return FS; } +std::unique_ptr vfs::createPhysicalFileSystem() { + return llvm::make_unique(false); +} + namespace { class RealFSDirIter : public llvm::vfs::detail::DirIterImpl { @@ -333,7 +374,9 @@ public: directory_iterator RealFileSystem::dir_begin(const Twine &Dir, std::error_code &EC) { - return directory_iterator(std::make_shared(Dir, EC)); + SmallString<128> Storage; + return directory_iterator( + std::make_shared(adjustPath(Dir, Storage), EC)); } //===-----------------------------------------------------------------------===/ diff --git a/unittests/Support/VirtualFileSystemTest.cpp b/unittests/Support/VirtualFileSystemTest.cpp index 7b42943f0fd..508725881bf 100644 --- a/unittests/Support/VirtualFileSystemTest.cpp +++ b/unittests/Support/VirtualFileSystemTest.cpp @@ -382,6 +382,25 @@ struct ScopedLink { } operator StringRef() { return Path.str(); } }; + +struct ScopedFile { + SmallString<128> Path; + ScopedFile(const Twine &Path, StringRef Contents) { + Path.toVector(this->Path); + std::error_code EC; + raw_fd_ostream OS(this->Path, EC); + EXPECT_FALSE(EC); + OS << Contents; + OS.flush(); + EXPECT_FALSE(OS.error()); + if (EC || OS.error()) + this->Path = ""; + } + ~ScopedFile() { + if (Path != "") + EXPECT_FALSE(llvm::sys::fs::remove(Path.str())); + } +}; } // end anonymous namespace TEST(VirtualFileSystemTest, BasicRealFSIteration) { @@ -411,6 +430,67 @@ TEST(VirtualFileSystemTest, BasicRealFSIteration) { EXPECT_EQ(vfs::directory_iterator(), I); } +TEST(VirtualFileSystemTest, MultipleWorkingDirs) { + // Our root contains a/aa, b/bb, c, where c is a link to a/. + // Run tests both in root/b/ and root/c/ (to test "normal" and symlink dirs). + // Interleave operations to show the working directories are independent. + ScopedDir Root("r", true), ADir(Root.Path + "/a"), BDir(Root.Path + "/b"); + ScopedLink C(ADir.Path, Root.Path + "/c"); + ScopedFile AA(ADir.Path + "/aa", "aaaa"), BB(BDir.Path + "/bb", "bbbb"); + std::unique_ptr BFS = vfs::createPhysicalFileSystem(), + CFS = vfs::createPhysicalFileSystem(); + + ASSERT_FALSE(BFS->setCurrentWorkingDirectory(BDir.Path)); + ASSERT_FALSE(CFS->setCurrentWorkingDirectory(C.Path)); + EXPECT_EQ(BDir.Path, *BFS->getCurrentWorkingDirectory()); + EXPECT_EQ(C.Path, *CFS->getCurrentWorkingDirectory()); + + // openFileForRead(), indirectly. + auto BBuf = BFS->getBufferForFile("bb"); + ASSERT_TRUE(BBuf); + EXPECT_EQ("bbbb", (*BBuf)->getBuffer()); + + auto ABuf = CFS->getBufferForFile("aa"); + ASSERT_TRUE(ABuf); + EXPECT_EQ("aaaa", (*ABuf)->getBuffer()); + + // status() + auto BStat = BFS->status("bb"); + ASSERT_TRUE(BStat); + EXPECT_EQ("bb", BStat->getName()); + + auto AStat = CFS->status("aa"); + ASSERT_TRUE(AStat); + EXPECT_EQ("aa", AStat->getName()); // unresolved name + + // getRealPath() + SmallString<128> BPath; + ASSERT_FALSE(BFS->getRealPath("bb", BPath)); + EXPECT_EQ(BB.Path, BPath); + + SmallString<128> APath; + ASSERT_FALSE(CFS->getRealPath("aa", APath)); + EXPECT_EQ(AA.Path, APath); // Reports resolved name. + + // dir_begin + std::error_code EC; + auto BIt = BFS->dir_begin(".", EC); + ASSERT_FALSE(EC); + ASSERT_NE(BIt, vfs::directory_iterator()); + EXPECT_EQ((BDir.Path + "/./bb").str(), BIt->path()); + BIt.increment(EC); + ASSERT_FALSE(EC); + ASSERT_EQ(BIt, vfs::directory_iterator()); + + auto CIt = CFS->dir_begin(".", EC); + ASSERT_FALSE(EC); + ASSERT_NE(CIt, vfs::directory_iterator()); + EXPECT_EQ((ADir.Path + "/./aa").str(), CIt->path()); // Partly resolved name! + CIt.increment(EC); // Because likely to read through this path. + ASSERT_FALSE(EC); + ASSERT_EQ(CIt, vfs::directory_iterator()); +} + #ifdef LLVM_ON_UNIX TEST(VirtualFileSystemTest, BrokenSymlinkRealFSIteration) { ScopedDir TestDirectory("virtual-file-system-test", /*Unique*/ true);